Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
self.train_len = 4383
self.dev_len = 564
self.test_len = 565
self.ddt = DDT() # Load dataset
def test_ddt_dataset_with_spacy(self):
ddt = DDT() # Load dataset
corpus = ddt.load_with_spacy()
num_sents_train = 0
for paragraph in [paragraph[1] for paragraph in list(corpus.train_tuples)]:
num_sents_train += len(paragraph)
self.assertIsInstance(corpus, GoldCorpus)
self.assertEqual(self.train_len, num_sents_train)
# temporary omitted due to changes in storage
def is_misc(ent: str):
if len(ent) < 4:
return False
return ent[-4:] == 'MISC'
def remove_miscs(se: list):
return [
[entity if not is_misc(entity) else 'O' for entity in entities]
for entities in se
]
# Load the DaNE data
_, _, test = DDT().load_as_simple_ner(predefined_splits=True)
sentences_tokens, sentences_entities = test
# Replace MISC with O for fair comparisons
sentences_entities = remove_miscs(sentences_entities)
num_sentences = len(sentences_tokens)
num_tokens = sum([len(s) for s in sentences_tokens])
def benchmark_polyglot_mdl():
"""
Running ployglot requires these packages:
# Morfessor==2.0.6
# PyICU==2.4.2
# pycld2==0.41
# polyglot