Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_flair_tagger(self):
# Download model beforehand
download_model('flair.ner', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
print("Downloaded the flair model")
# Load the NER tagger using the DaNLP wrapper
flair_model = load_flair_ner_model()
# Using the flair POS tagger
sentence = Sentence('jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen')
flair_model.predict(sentence)
expected_string = "jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen "
self.assertEqual(sentence.to_tagged_string(), expected_string)
def test_download_fails_with_wrong_title(self):
with self.assertRaises(ValueError):
download_model('do.not.exists.wv')
with self.assertRaises(ValueError):
download_dataset('do.not.exists.zip')
def test_fasttext_embeddings(self):
# First we will add smaller test embeddings to the
MODELS['ddt.swv'] = {
'url': 'https://danlp.s3.eu-central-1.amazonaws.com/test-models/ddt.swv.zip',
'vocab_size': 5000,
'dimensions': 100,
'md5_checksum': 'c50c61e1b434908e2732c80660abf8bf',
'size': 741125088,
'file_extension': '.bin'
}
AVAILABLE_SUBWORD_EMBEDDINGS.append('ddt.swv')
download_model('ddt.swv', process_func=_unzip_process_func)
fasttext_embeddings = load_wv_with_gensim('ddt.swv')
self.assertEqual(type(fasttext_embeddings), FastTextKeyedVectors)
# The word is not in the vocab
self.assertNotIn('institutmedarbejdskontrakt', fasttext_embeddings.vocab)
# However we can get an embedding because of subword units
self.assertEqual(fasttext_embeddings['institutmedarbejdskontrakt'].size, 100)
def __init__(self, cache_dir=DEFAULT_CACHE_DIR, verbose=False):
from transformers import BertTokenizer, BertForSequenceClassification
# download the model or load the model path
path_emotion = download_model('bert.emotion', cache_dir,
process_func=_unzip_process_func,
verbose=verbose)
path_emotion = os.path.join(path_emotion,'bert.emotion')
path_reject = download_model('bert.noemotion', cache_dir,
process_func=_unzip_process_func,
verbose=verbose)
path_reject = os.path.join(path_reject,'bert.noemotion')
# load the models
self.tokenizer_rejct = BertTokenizer.from_pretrained(path_reject)
self.model_reject = BertForSequenceClassification.from_pretrained(path_reject)
self.tokenizer = BertTokenizer.from_pretrained(path_emotion)
self.model = BertForSequenceClassification.from_pretrained(path_emotion)
# load the class names mapping
self.catagories = {5: 'Foragt/Modvilje', 2: 'Forventning/Interrese',
0: 'Glæde/Sindsro', 3: 'Overasket/Målløs',
1: 'Tillid/Accept',
4: 'Vrede/Irritation', 6: 'Sorg/trist',
7: 'Frygt/Bekymret'}
def __init__(self, cache_dir=DEFAULT_CACHE_DIR, verbose=False):
from transformers import AutoModelForTokenClassification
from transformers import AutoTokenizer
# download the model or load the model path
weights_path = download_model('bert.ner', cache_dir,
process_func=_unzip_process_func,
verbose=verbose)
self.label_list = ["O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG",
"I-ORG", "B-LOC", "I-LOC"]
self.model = AutoModelForTokenClassification.from_pretrained(weights_path)
self.tokenizer = AutoTokenizer.from_pretrained(weights_path)
:param bidirectional:
:param cache_dir:
:param verbose:
"""
from flair.embeddings import FlairEmbeddings
from flair.embeddings import WordEmbeddings
from flair.embeddings import StackedEmbeddings
embeddings = []
if word_embeddings:
fasttext_embedding = WordEmbeddings('da')
embeddings.append(fasttext_embedding)
if direction == 'bi' or direction == 'fwd':
fwd_weight_path = download_model('flair.fwd', cache_dir,
verbose=verbose,
process_func=_unzip_process_func)
embeddings.append(FlairEmbeddings(fwd_weight_path))
if direction == 'bi' or direction == 'bwd':
bwd_weight_path = download_model('flair.bwd', cache_dir,
verbose=verbose,
process_func=_unzip_process_func)
embeddings.append(FlairEmbeddings(bwd_weight_path))
if len(embeddings) == 1:
return embeddings[0]
return StackedEmbeddings(embeddings=embeddings)
spacy_model_dir = os.path.join(cache_dir, pretrained_embedding + ".spacy")
if os.path.isdir(spacy_model_dir):
# Return spaCy model if spaCy model dir exists
return spacy.load(spacy_model_dir)
bin_file_path = os.path.join(cache_dir, pretrained_embedding + ".bin")
if os.path.isfile(bin_file_path):
# Then we do not need to download the model
model_info = MODELS[pretrained_embedding]
model_info['name'] = pretrained_embedding
_process_embeddings_for_spacy(bin_file_path[:-4] + ".tmp", model_info)
else:
download_model(pretrained_embedding, cache_dir,
_process_embeddings_for_spacy, verbose=True,
file_extension='.spacy')
return spacy.load(spacy_model_dir)