Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def init(tasks_base_path) -> Tuple[TaggedCorpus, TextRegressor, ModelTrainer]:
corpus = NLPTaskDataFetcher.load_corpus(NLPTask.REGRESSION, tasks_base_path)
glove_embedding: WordEmbeddings = WordEmbeddings("glove")
document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
[glove_embedding], 128, 1, False, 64, False, False
)
model = TextRegressor(document_embeddings)
trainer = ModelTrainer(model, corpus)
return corpus, model, trainer
def init(tasks_base_path) -> Tuple[TaggedCorpus, TextRegressor, ModelTrainer]:
corpus = NLPTaskDataFetcher.load_corpus(NLPTask.REGRESSION, tasks_base_path)
glove_embedding: WordEmbeddings = WordEmbeddings("glove")
document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
[glove_embedding], 128, 1, False, 64, False, False
)
model = TextRegressor(document_embeddings)
trainer = ModelTrainer(model, corpus)
return corpus, model, trainer
def init(tasks_base_path) -> Tuple[TaggedCorpus, TextRegressor, ModelTrainer]:
corpus = NLPTaskDataFetcher.load_corpus(NLPTask.REGRESSION, tasks_base_path)
glove_embedding: WordEmbeddings = WordEmbeddings("glove")
document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
[glove_embedding], 128, 1, False, 64, False, False
)
model = TextRegressor(document_embeddings)
trainer = ModelTrainer(model, corpus)
return corpus, model, trainer
assert text == sentence.to_original_text()
text = ": nation on"
sentence = Sentence(text, use_tokenizer=segtok_tokenizer)
assert text == sentence.to_original_text()
text = "I love Berlin."
sentence = Sentence(text)
assert text == sentence.to_original_text()
text = 'Schartau sagte dem " Tagesspiegel " vom Freitag , Fischer sei " in einer Weise aufgetreten , die alles andere als überzeugend war " .'
sentence = Sentence(text)
assert text == sentence.to_original_text()
text = 'Schartau sagte dem " Tagesspiegel " vom Freitag , Fischer sei " in einer Weise aufgetreten , die alles andere als überzeugend war " .'
sentence = Sentence(text, use_tokenizer=segtok_tokenizer)
assert text == sentence.to_original_text()
# init forward LM with 128 hidden states and 1 layer
language_model: LanguageModel = LanguageModel(dictionary, is_forward_lm=True, hidden_size=128, nlayers=1)
# get the example corpus and process at character level in forward direction
corpus: TextCorpus = TextCorpus(str(Path(__file__).parent / 'resources/corpora/lorem_ipsum'),
dictionary,
language_model.is_forward_lm,
character_level=True)
# train the language model
trainer: LanguageModelTrainer = LanguageModelTrainer(language_model, corpus)
trainer.train('./results', sequence_length=10, mini_batch_size=10, max_epochs=5)
# use the character LM as embeddings to embed the example sentence 'I love Berlin'
char_lm_embeddings = CharLMEmbeddings('./results/best-lm.pt')
sentence = Sentence('I love Berlin')
char_lm_embeddings.embed(sentence)
print(sentence[1].embedding.size())
# clean up results directory
shutil.rmtree('./results', ignore_errors=True)
def embed_sentence(
sentence: str,
pooling_operation,
layers: str = "1",
use_scalar_mix: bool = False,
) -> Sentence:
embeddings = CamembertEmbeddings(
pretrained_model_name_or_path=camembert_model,
layers=layers,
pooling_operation=pooling_operation,
use_scalar_mix=use_scalar_mix,
)
flair_sentence = Sentence(sentence)
embeddings.embed(flair_sentence)
return flair_sentence
def embed_sentence(
sentence: str,
pooling_operation,
layers: str = "1",
use_scalar_mix: bool = False,
) -> Sentence:
embeddings = OpenAIGPTEmbeddings(
pretrained_model_name_or_path=gpt_model,
layers=layers,
pooling_operation=pooling_operation,
use_scalar_mix=use_scalar_mix,
)
flair_sentence = Sentence(sentence)
embeddings.embed(flair_sentence)
return flair_sentence
def embed_sentence(
sentence: str,
pooling_operation,
layers: str = "1",
use_scalar_mix: bool = False,
) -> Sentence:
embeddings = RoBERTaEmbeddings(
pretrained_model_name_or_path=roberta_model,
layers=layers,
pooling_operation=pooling_operation,
use_scalar_mix=use_scalar_mix,
)
flair_sentence = Sentence(sentence)
embeddings.embed(flair_sentence)
return flair_sentence
def test_sentence_to_real_string(tasks_base_path):
sentence: Sentence = Sentence("I love Berlin.", use_tokenizer=segtok_tokenizer)
assert "I love Berlin." == sentence.to_plain_string()
corpus = flair.datasets.GERMEVAL(base_path=tasks_base_path)
sentence = corpus.train[0]
assert (
'Schartau sagte dem " Tagesspiegel " vom Freitag , Fischer sei " in einer Weise aufgetreten , die alles andere als überzeugend war " .'
== sentence.to_tokenized_string()
)
assert (
'Schartau sagte dem "Tagesspiegel" vom Freitag, Fischer sei "in einer Weise aufgetreten, die alles andere als überzeugend war".'
== sentence.to_plain_string()
)
sentence = corpus.train[1]
assert (
def embed_sentence(
sentence: str,
pooling_operation,
layers: str = "1",
use_scalar_mix: bool = False,
) -> Sentence:
embeddings = OpenAIGPT2Embeddings(
pretrained_model_name_or_path=gpt_model,
layers=layers,
pooling_operation=pooling_operation,
use_scalar_mix=use_scalar_mix,
)
flair_sentence = Sentence(sentence)
embeddings.embed(flair_sentence)
return flair_sentence