Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_train_resume_language_model_training(
resources_path, results_base_path, tasks_base_path
):
# get default dictionary
dictionary: Dictionary = Dictionary.load("chars")
# get the example corpus and process at character level in forward direction
corpus: TextCorpus = TextCorpus(
resources_path / "corpora/lorem_ipsum",
dictionary,
forward=True,
character_level=True,
)
assert corpus.test is not None
assert corpus.train is not None
assert corpus.valid is not None
assert len(corpus.train) == 2
def test_train_resume_language_model_training(
resources_path, results_base_path, tasks_base_path
):
# get default dictionary
dictionary: Dictionary = Dictionary.load("chars")
# init forward LM with 128 hidden states and 1 layer
language_model: LanguageModel = LanguageModel(
dictionary, is_forward_lm=True, hidden_size=128, nlayers=1
)
# get the example corpus and process at character level in forward direction
corpus: TextCorpus = TextCorpus(
resources_path / "corpora/lorem_ipsum",
dictionary,
language_model.is_forward_lm,
character_level=True,
)
# train the language model
trainer: LanguageModelTrainer = LanguageModelTrainer(
def test_dictionary_get_idx_for_item():
dictionary: Dictionary = Dictionary(add_unk=False)
dictionary.add_item("class_1")
dictionary.add_item("class_2")
dictionary.add_item("class_3")
idx = dictionary.get_idx_for_item("class_2")
assert 1 == idx
def test_dictionary_get_items_without_unk():
dictionary: Dictionary = Dictionary(add_unk=False)
dictionary.add_item("class_1")
dictionary.add_item("class_2")
dictionary.add_item("class_3")
items = dictionary.get_items()
assert 3 == len(items)
assert "class_1" == items[0]
assert "class_2" == items[1]
assert "class_3" == items[2]
def test_dictionary_save_and_load():
dictionary: Dictionary = Dictionary(add_unk=False)
dictionary.add_item("class_1")
dictionary.add_item("class_2")
dictionary.add_item("class_3")
file_path = "dictionary.txt"
dictionary.save(file_path)
loaded_dictionary = dictionary.load_from_file(file_path)
assert len(dictionary) == len(loaded_dictionary)
assert len(dictionary.get_items()) == len(loaded_dictionary.get_items())
# clean up file
os.remove(file_path)
def test_train_language_model(results_base_path, resources_path):
# get default dictionary
dictionary: Dictionary = Dictionary.load("chars")
# init forward LM with 128 hidden states and 1 layer
language_model: LanguageModel = LanguageModel(
dictionary, is_forward_lm=True, hidden_size=128, nlayers=1
)
# get the example corpus and process at character level in forward direction
corpus: TextCorpus = TextCorpus(
resources_path / "corpora/lorem_ipsum",
dictionary,
language_model.is_forward_lm,
character_level=True,
)
# train the language model
trainer: LanguageModelTrainer = LanguageModelTrainer(
def __init__(self, document_embeddings: flair.embeddings.DocumentEmbeddings):
super(TextRegressor, self).__init__(
document_embeddings=document_embeddings,
label_dictionary=flair.data.Dictionary(),
multi_label=False,
)
log.info("Using REGRESSION - experimental")
self.loss_function = nn.MSELoss()
self,
path_to_char_dict: str = None,
char_embedding_dim: int = 25,
hidden_size_char: int = 25,
):
"""Uses the default character dictionary if none provided."""
super().__init__()
self.name = "Char"
self.static_embeddings = False
# use list of common characters if none provided
if path_to_char_dict is None:
self.char_dictionary: Dictionary = Dictionary.load("common-chars")
else:
self.char_dictionary: Dictionary = Dictionary.load_from_file(
path_to_char_dict
)
self.char_embedding_dim: int = char_embedding_dim
self.hidden_size_char: int = hidden_size_char
self.char_embedding = torch.nn.Embedding(
len(self.char_dictionary.item2idx), self.char_embedding_dim
)
self.char_rnn = torch.nn.LSTM(
self.char_embedding_dim,
self.hidden_size_char,
num_layers=1,
bidirectional=True,
)
self.__embedding_length = self.char_embedding_dim * 2