How to use the danlp.datasets.DSD function in danlp

To help you get started, we’ve selected a few danlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alexandrainst / danlp / tests / test_datasets.py View on Github external
def test_dsd(self):
        dsd = DSD()
        df = dsd.load_with_pandas()

        self.assertEqual(len(df), 99)
        self.assertListEqual(list(df.columns), ['word1', 'word2', 'similarity'])
        self.assertEqual(len(dsd.words()), 197)
github alexandrainst / danlp / examples / benchmarks / wordembeddings_benchmarks.py View on Github external
from danlp.datasets import WordSim353Da, DSD
from danlp.models.embeddings import AVAILABLE_EMBEDDINGS, load_wv_with_gensim
import tabulate


def load_wv_models():
    for da_wv_model in AVAILABLE_EMBEDDINGS:
        yield da_wv_model, load_wv_with_gensim(da_wv_model)


ws353 = WordSim353Da()
dsd = DSD()

data = []

for model_name, wv in load_wv_models():

    print("DSD words not in vocab of {}: {}".format(model_name, [w for w in dsd.words() if w.lower() not in wv.vocab]))

    correlation_on_dsd = wv.evaluate_word_pairs(dsd.file_path, delimiter="\t")
    spearman_rho_dsd = correlation_on_dsd[1].correlation
    oov_dsd = correlation_on_dsd[2]

    print("WS353 words not in vocab of {}: {}".format(model_name, [w for w in ws353.words() if w.lower() not in wv.vocab]))
    correlation_on_ws353 = wv.evaluate_word_pairs(ws353.file_path, delimiter=',')
    spearman_rho_ws353 = correlation_on_ws353[1].correlation
    oov_ws353 = correlation_on_ws353[2]