Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Fit using a custom set of features, including a custom feature extractor.
This is only for advanced users.
>>> clf.fit(features={
'in-gaz': {}, // gazetteer features
'contrived': lambda exa, res: {'contrived': len(exa.text) == 26}
})
"""
# create model with given params
model_config = self._get_model_config(**kwargs)
model = create_model(model_config)
if not label_set:
label_set = model_config.train_label_set
label_set = label_set if label_set else DEFAULT_TRAIN_SET_REGEX
new_hash = self._get_model_hash(model_config, queries, label_set)
cached_model = self._resource_loader.hash_to_model_path.get(new_hash)
if incremental_timestamp and cached_model:
logger.info("No need to fit. Loading previous model.")
self.load(cached_model)
return
queries, classes = self._get_queries_and_labels(queries, label_set)
if not queries:
logger.warning(
"Could not fit model since no relevant examples were found. "
'Make sure the labeled queries for training are placed in "%s" '
"files in your MindMeld project.",
"model_settings": {"classifier_type": "logreg"},
"params": {"fit_intercept": True, "C": 100},
"features": {
"bag-of-words": {"lengths": [1]},
"freq": {"bins": 5},
"length": {},
},
}
)
model = TextModel(config)
examples = [q.query for q in self.labeled_data]
labels = [q.intent for q in self.labeled_data]
model.initialize_resources(resource_loader, examples, labels)
model.fit(examples, labels)
assert model.predict([markup.load_query("hi").query]) == "greet"
assert model.predict([markup.load_query("bye").query]) == "exit"
def test_load_special_chars_5(query_factory):
"""Tests loading a query with special characters"""
text = "what christmas movies are , showing at {{8pm|sys_time}|range}"
processed_query = markup.load_query(text, query_factory)
assert len(processed_query.entities) == 1
entity = processed_query.entities[0]
assert entity.span == Span(42, 44)
assert entity.normalized_text == "8pm"
def test_dump_role(query_factory):
"""Tests dumping a basic query with an entity with a role"""
query_text = "What stores are open between 3 and 5"
query = query_factory.create_query(query_text)
entities = [
QueryEntity.from_query(
query, Span(29, 29), entity_type="sys_time", role="open_hours"
),
QueryEntity.from_query(
query, Span(35, 35), entity_type="sys_time", role="close_hours"
),
]
processed_query = ProcessedQuery(query, entities=entities)
markup_text = (
"What stores are open between {3|sys_time|open_hours} and "
"{5|sys_time|close_hours}"
)
entity_text = "What stores are open between {3|sys_time} and {5|sys_time}"
assert markup.dump_query(processed_query) == markup_text
assert markup.dump_query(processed_query, no_role=True) == entity_text
assert (
markup.dump_query(processed_query, no_role=True, no_entity=True) == query_text
def test_dump_group_nested(query_factory):
"""Tests dumping a query with nested entity groups"""
query_text = "Order one large Tesora with medium cream and medium sugar"
query = query_factory.create_query(query_text)
entities = [
QueryEntity.from_query(query, Span(6, 8), entity_type="quantity"),
QueryEntity.from_query(query, Span(10, 14), entity_type="size"),
QueryEntity.from_query(query, Span(16, 21), entity_type="product"),
QueryEntity.from_query(query, Span(28, 33), entity_type="size"),
QueryEntity.from_query(query, Span(35, 39), entity_type="option"),
QueryEntity.from_query(query, Span(45, 50), entity_type="size"),
QueryEntity.from_query(query, Span(52, 56), entity_type="option"),
]
entities[4] = entities[4].with_children((entities[3],))
entities[6] = entities[6].with_children((entities[5],))
entities[2] = entities[2].with_children(
(entities[0], entities[1], entities[4], entities[6])
)
processed_query = ProcessedQuery(query, entities=entities)
markup_text = (
"Order [{one|quantity} {large|size} {Tesora|product} with [{medium|size} "
"{cream|option}|option] and [{medium|size} {sugar|option}|option]|product]"
def test_dump_group_nested_2(query_factory):
"""Tests dumping a query with nested entity groups"""
query_text = "Can I get one curry sauce with my rice ball with house salad"
query = query_factory.create_query(query_text)
entities = [
QueryEntity.from_query(
query, Span(10, 12), entity_type="sys_number", role="quantity"
),
QueryEntity.from_query(query, Span(14, 24), entity_type="option"),
QueryEntity.from_query(query, Span(34, 59), entity_type="dish"),
]
entities[1] = entities[1].with_children((entities[0],))
entities[2] = entities[2].with_children((entities[1],))
processed_query = ProcessedQuery(query, entities=entities)
markup_text = (
"Can I get [[{one|sys_number|quantity} {curry sauce|option}|option] "
"with my {rice ball with house salad|dish}|dish]"
)
entity_text = (
"Can I get {one|sys_number|quantity} {curry sauce|option} "
"with my {rice ball with house salad|dish}"
def test_dump_multi_nested(query_factory):
"""Tests dumping a query with multiple nested system entities"""
query_text = "show me houses between 600,000 and 1,000,000 dollars"
query = query_factory.create_query(query_text)
lower = NestedEntity.from_query(
query, Span(8, 14), parent_offset=15, entity_type="sys_number"
)
upper = NestedEntity.from_query(
query, Span(20, 28), parent_offset=15, entity_type="sys_number"
)
raw_entity = Entity(
"between 600,000 dollars and 1,000,000",
"price",
value={"children": [lower, upper]},
)
entities = [QueryEntity.from_query(query, Span(15, 51), entity=raw_entity)]
processed_query = ProcessedQuery(query, entities=entities)
markup_text = (
"show me houses {between {600,000|sys_number} and "
"{1,000,000|sys_number} dollars|price}"
)
assert markup.dump_query(processed_query) == markup_text
assert markup.dump_query(processed_query, no_group=True) == markup_text
def test_dump_group_nested(query_factory):
"""Tests dumping a query with nested entity groups"""
query_text = "Order one large Tesora with medium cream and medium sugar"
query = query_factory.create_query(query_text)
entities = [
QueryEntity.from_query(query, Span(6, 8), entity_type="quantity"),
QueryEntity.from_query(query, Span(10, 14), entity_type="size"),
QueryEntity.from_query(query, Span(16, 21), entity_type="product"),
QueryEntity.from_query(query, Span(28, 33), entity_type="size"),
QueryEntity.from_query(query, Span(35, 39), entity_type="option"),
QueryEntity.from_query(query, Span(45, 50), entity_type="size"),
QueryEntity.from_query(query, Span(52, 56), entity_type="option"),
]
entities[4] = entities[4].with_children((entities[3],))
entities[6] = entities[6].with_children((entities[5],))
entities[2] = entities[2].with_children(
(entities[0], entities[1], entities[4], entities[6])
)
processed_query = ProcessedQuery(query, entities=entities)
markup_text = (
"Order [{one|quantity} {large|size} {Tesora|product} with [{medium|size} "
"{cream|option}|option] and [{medium|size} {sugar|option}|option]|product]"
)
entity_text = (
"Order {one|quantity} {large|size} {Tesora|product} with {medium|size} "
"{cream|option} and {medium|size} {sugar|option}"
def test_load_group(query_factory):
"""Tests loading a query with an entity group"""
text = "a [{large|size} {latte|product} with {nonfat milk|option}|product] please"
processed_query = markup.load_query(text, query_factory)
entities = processed_query.entities
assert len(entities) == 3
assert entities[0].text == "large"
assert entities[0].entity.type == "size"
assert entities[0].span == Span(2, 6)
assert entities[0].parent == entities[1]
assert entities[1].text == "latte"
assert entities[1].entity.type == "product"
assert entities[1].span == Span(8, 12)
assert entities[1].children == (entities[0], entities[2])
assert entities[2].text == "nonfat milk"
assert entities[2].entity.type == "option"
assert entities[2].span == Span(19, 29)
assert entities[2].parent == entities[1]
norm_text = query.normalized_text[norm_span.start : norm_span.end + 1]
proc_span = query.transform_span(
norm_span, TEXT_FORM_NORMALIZED, TEXT_FORM_PROCESSED
)
proc_text = query.processed_text[proc_span.start : proc_span.end + 1]
raw_span = query.transform_span(norm_span, TEXT_FORM_NORMALIZED, TEXT_FORM_RAW)
raw_text = query.text[raw_span.start : raw_span.end + 1]
assert norm_text == "test one"
assert proc_span == raw_span
assert proc_text == raw_text
assert raw_span == Span(0, 8)
assert raw_text == "Test: One"