Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for key, freq in vocab.items():
try:
term, sense = split_key(key)
except ValueError:
continue
term = term.split("_")[-1]
by_word[term.lower()].append((freq, key))
too_similar = []
for values in by_word.values():
if len(values) >= 2:
values.sort(reverse=True)
freq1, key1 = values[0]
vector1 = vectors[key1]
for freq2, key2 in values[1:]:
vector2 = vectors[key2]
sim = cosine_similarity(vector1, vector2)
if sim >= (1 - min_distance):
too_similar.append(key2)
return too_similar