Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_dsd(self):
dsd = DSD()
df = dsd.load_with_pandas()
self.assertEqual(len(df), 99)
self.assertListEqual(list(df.columns), ['word1', 'word2', 'similarity'])
self.assertEqual(len(dsd.words()), 197)
from danlp.datasets import WordSim353Da, DSD
from danlp.models.embeddings import AVAILABLE_EMBEDDINGS, load_wv_with_gensim
import tabulate
def load_wv_models():
for da_wv_model in AVAILABLE_EMBEDDINGS:
yield da_wv_model, load_wv_with_gensim(da_wv_model)
ws353 = WordSim353Da()
dsd = DSD()
data = []
for model_name, wv in load_wv_models():
print("DSD words not in vocab of {}: {}".format(model_name, [w for w in dsd.words() if w.lower() not in wv.vocab]))
correlation_on_dsd = wv.evaluate_word_pairs(dsd.file_path, delimiter="\t")
spearman_rho_dsd = correlation_on_dsd[1].correlation
oov_dsd = correlation_on_dsd[2]
print("WS353 words not in vocab of {}: {}".format(model_name, [w for w in ws353.words() if w.lower() not in wv.vocab]))
correlation_on_ws353 = wv.evaluate_word_pairs(ws353.file_path, delimiter=',')
spearman_rho_ws353 = correlation_on_ws353[1].correlation
oov_ws353 = correlation_on_ws353[2]