fix: gardener dupe detection — strip series prefixes (LLM seed, Push3 evolution)
This commit is contained in:
parent
174187f6a6
commit
d9de5b3708
1 changed files with 3 additions and 3 deletions
|
|
@ -78,9 +78,9 @@ DUPES=""
|
||||||
while IFS=$'\t' read -r num1 title1; do
|
while IFS=$'\t' read -r num1 title1; do
|
||||||
while IFS=$'\t' read -r num2 title2; do
|
while IFS=$'\t' read -r num2 title2; do
|
||||||
[ "$num1" -ge "$num2" ] && continue
|
[ "$num1" -ge "$num2" ] && continue
|
||||||
# Normalize: lowercase, strip common prefixes, collapse whitespace
|
# Normalize: lowercase, strip prefixes + series names, collapse whitespace
|
||||||
t1=$(echo "$title1" | tr '[:upper:]' '[:lower:]' | sed 's/^feat:\|^fix:\|^refactor://;s/[^a-z0-9 ]//g;s/ */ /g')
|
t1=$(echo "$title1" | tr '[:upper:]' '[:lower:]' | sed 's/^feat:\|^fix:\|^refactor://;s/llm seed[^—]*—\s*//;s/push3 evolution[^—]*—\s*//;s/[^a-z0-9 ]//g;s/ */ /g')
|
||||||
t2=$(echo "$title2" | tr '[:upper:]' '[:lower:]' | sed 's/^feat:\|^fix:\|^refactor://;s/[^a-z0-9 ]//g;s/ */ /g')
|
t2=$(echo "$title2" | tr '[:upper:]' '[:lower:]' | sed 's/^feat:\|^fix:\|^refactor://;s/llm seed[^—]*—\s*//;s/push3 evolution[^—]*—\s*//;s/[^a-z0-9 ]//g;s/ */ /g')
|
||||||
# Count shared words (>60% overlap = suspect)
|
# Count shared words (>60% overlap = suspect)
|
||||||
WORDS1=$(echo "$t1" | tr ' ' '\n' | sort -u)
|
WORDS1=$(echo "$t1" | tr ' ' '\n' | sort -u)
|
||||||
WORDS2=$(echo "$t2" | tr ' ' '\n' | sort -u)
|
WORDS2=$(echo "$t2" | tr ' ' '\n' | sort -u)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue