Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_convert_featurizer_output_shape():
from rasa.nlu.featurizers.convert_featurizer import ConveRTFeaturizer
td = training_data.load_data("data/examples/rasa/demo-rasa.json")
convert_featurizer = ConveRTFeaturizer()
convert_featurizer.train(td, config=None)
text_features_dim = np.array(
[
example.get("text_features").shape[0]
for example in td.intent_examples
if example.get("text_features") is not None
]
)
response_features_dim = np.array(
[
example.get("response_features").shape[0]
for example in td.intent_examples
def test_dialogflow_data():
td = training_data.load_data("data/examples/dialogflow/")
assert not td.is_empty()
assert len(td.entity_examples) == 5
assert len(td.intent_examples) == 24
assert len(td.training_examples) == 24
assert len(td.lookup_tables) == 2
assert td.intents == {"affirm", "goodbye", "hi", "inform"}
assert td.entities == {"cuisine", "location"}
non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v}
assert non_trivial_synonyms == {
"mexico": "mexican",
"china": "chinese",
"india": "indian",
}
# The order changes based on different computers hence the grouping
assert {td.lookup_tables[0]["name"], td.lookup_tables[1]["name"]} == {
"location",
def test_convert_featurizer_output_shape():
from rasa.nlu.featurizers.convert_featurizer import ConveRTFeaturizer
td = training_data.load_data("data/examples/rasa/demo-rasa.json")
convert_featurizer = ConveRTFeaturizer()
convert_featurizer.train(td, config=None)
text_features_dim = np.array(
[
example.get("text_features").shape[0]
for example in td.intent_examples
if example.get("text_features") is not None
]
)
response_features_dim = np.array(
[
example.get("response_features").shape[0]
for example in td.intent_examples
out_path = tmpdir.join("rasa_nlu_data.json")
convert_training_data(data_file, out_path.strpath, output_format, language)
td = training_data.load_data(out_path.strpath, language)
assert td.entity_examples != []
assert td.intent_examples != []
gold_standard = training_data.load_data(gold_standard_file, language)
cmp_message_list(td.entity_examples, gold_standard.entity_examples)
cmp_message_list(td.intent_examples, gold_standard.intent_examples)
assert td.entity_synonyms == gold_standard.entity_synonyms
# converting the converted file back to original
# file format and performing the same tests
rto_path = tmpdir.join("data_in_original_format.txt")
convert_training_data(out_path.strpath, rto_path.strpath, "json", language)
rto = training_data.load_data(rto_path.strpath, language)
cmp_message_list(gold_standard.entity_examples, rto.entity_examples)
cmp_message_list(gold_standard.intent_examples, rto.intent_examples)
assert gold_standard.entity_synonyms == rto.entity_synonyms
def test_count_vector_featurizer_char_intent_featurizer():
from rasa.nlu.featurizers.count_vectors_featurizer import CountVectorsFeaturizer
ftr = CountVectorsFeaturizer({"min_ngram": 1, "max_ngram": 2, "analyzer": "char"})
td = training_data.load_data("data/examples/rasa/demo-rasa.json")
ftr.train(td, config=None)
intent_features_exist = np.array(
[
True if example.get("intent_features") is not None else False
for example in td.intent_examples
]
)
# no intent features should have been set
assert not any(intent_features_exist)
def test_section_value_with_delimiter():
td_section_with_delimiter = training_data.load_data(
"data/test/markdown_single_sections/section_with_delimiter.md"
)
assert td_section_with_delimiter.entity_synonyms == {"10:00 am": "10:00"}
def test_markdown_single_sections():
td_regex_only = training_data.load_data(
"data/test/markdown_single_sections/regex_only.md"
)
assert td_regex_only.regex_features == [{"name": "greet", "pattern": r"hey[^\s]*"}]
td_syn_only = training_data.load_data(
"data/test/markdown_single_sections/synonyms_only.md"
)
assert td_syn_only.entity_synonyms == {"Chines": "chinese", "Chinese": "chinese"}
def train_test(td_file, config_file, model_dir, key="company", noise=0.1):
"""trains a model using the training data
(split into train-test) and config"""
td = load_data(td_file)
trainer = Trainer(config.load(config_file))
train, test = td.train_test_split(train_frac=0.8)
test = add_noise(test, key, noise=noise)
trainer.train(train)
tmp_fname = "data/tmp/temp_test.json"
model_loc = trainer.persist(model_dir)
with open(tmp_fname, "w", encoding="utf8") as f:
f.write(test.as_json())
evaluate_model(tmp_fname, model_loc)
def convert_training_data(
data_file: Text, out_file: Text, output_format: Text, language: Text
):
if not os.path.exists(data_file):
print_error(
"Data file '{}' does not exist. Provide a valid NLU data file using "
"the '--data' argument.".format(data_file)
)
return
if output_format == "json":
td = training_data.load_data(data_file, language)
output = td.nlu_as_json(indent=2)
elif output_format == "md":
td = training_data.load_data(data_file, language)
output = td.nlu_as_markdown()
else:
print_error(
"Did not recognize output format. Supported output formats: 'json' and "
"'md'. Specify the desired output format with '--format'."
)
return
write_to_file(out_file, output)
def train_model(td_file, config_file, model_dir):
# trains a model using the training data and config
td = load_data(td_file)
trainer = Trainer(config.load(config_file))
trainer.train(td)
# creates model and returns the path to this model for evaluation
model_loc = trainer.persist(model_dir)
return model_loc