Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_flair_tagger(self):
# Download model beforehand
download_model('flair.ner', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
print("Downloaded the flair model")
# Load the NER tagger using the DaNLP wrapper
flair_model = load_flair_ner_model()
# Using the flair POS tagger
sentence = Sentence('jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen')
flair_model.predict(sentence)
expected_string = "jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen "
self.assertEqual(sentence.to_tagged_string(), expected_string)
def test_flair_tagger(self):
# Download model beforehand
download_model('flair.ner', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
print("Downloaded the flair model")
# Load the NER tagger using the DaNLP wrapper
flair_model = load_flair_ner_model()
# Using the flair POS tagger
sentence = Sentence('jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen')
flair_model.predict(sentence)
expected_string = "jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen "
self.assertEqual(sentence.to_tagged_string(), expected_string)
'vocab_size': 5000,
'dimensions': 300,
'md5_checksum': 'fcaa981a613b325ae4dc61aba235aa82',
'size': 5594508,
'file_extension': '.bin'
}
AVAILABLE_EMBEDDINGS.append('wiki.da.small.wv')
self.embeddings_for_testing = [
'wiki.da.small.wv',
'dslreddit.da.wv'
]
# Lets download the models and unzip it
for emb in self.embeddings_for_testing:
download_model(emb, process_func=_unzip_process_func)
def test_download_fails_with_wrong_title(self):
with self.assertRaises(ValueError):
download_model('do.not.exists.wv')
with self.assertRaises(ValueError):
download_dataset('do.not.exists.zip')
def test_fasttext_embeddings(self):
# First we will add smaller test embeddings to the
MODELS['ddt.swv'] = {
'url': 'https://danlp.s3.eu-central-1.amazonaws.com/test-models/ddt.swv.zip',
'vocab_size': 5000,
'dimensions': 100,
'md5_checksum': 'c50c61e1b434908e2732c80660abf8bf',
'size': 741125088,
'file_extension': '.bin'
}
AVAILABLE_SUBWORD_EMBEDDINGS.append('ddt.swv')
download_model('ddt.swv', process_func=_unzip_process_func)
fasttext_embeddings = load_wv_with_gensim('ddt.swv')
self.assertEqual(type(fasttext_embeddings), FastTextKeyedVectors)
# The word is not in the vocab
self.assertNotIn('institutmedarbejdskontrakt', fasttext_embeddings.vocab)
# However we can get an embedding because of subword units
self.assertEqual(fasttext_embeddings['institutmedarbejdskontrakt'].size, 100)
def test_flair_tagger(self):
# Download model beforehand
download_model('flair.pos', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
print("Downloaded the flair model")
# Load the POS tagger using the DaNLP wrapper
flair_model = load_flair_pos_model()
# Using the flair POS tagger
sentence = Sentence('jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen')
flair_model.predict(sentence)
expected_string = "jeg hopper på en bil som er " \
" rød sammen med Jens-Peter E. Hansen "
self.assertEqual(sentence.to_tagged_string(), expected_string)
def test_download(self):
# Download model beforehand
model_path = download_model('spacy', DEFAULT_CACHE_DIR,
process_func=_unzip_process_func,
verbose=True)
info = spacy.info(model_path)
self.assertListEqual(info['pipeline'], ['tagger', 'parser', 'ner'])
self.assertEqual(info['lang'], 'da')
def test_download(self):
# Download model beforehand
model_path = download_model('spacy', DEFAULT_CACHE_DIR,
process_func=_unzip_process_func,
verbose=True)
info = spacy.info(model_path)
self.assertListEqual(info['pipeline'], ['tagger', 'parser', 'ner'])
self.assertEqual(info['lang'], 'da')
def test_download_fails_with_wrong_title(self):
with self.assertRaises(ValueError):
download_model('do.not.exists.wv')
with self.assertRaises(ValueError):
download_dataset('do.not.exists.zip')
def test_europarlsentiment1(self):
eusent = EuroparlSentiment1()
df = eusent.load_with_pandas()
self.assertEqual(len(df), 184)