Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_dump_entities(query_factory):
"""Tests dumping a basic query with two entities"""
query_text = "When does the Elm Street store close on Monday?"
query = query_factory.create_query(query_text)
entities = [
QueryEntity.from_query(query, Span(14, 23), entity_type="store_name"),
QueryEntity.from_query(query, Span(40, 45), entity_type="sys_time"),
]
processed_query = ProcessedQuery(query, entities=entities)
markup_text = (
"When does the {Elm Street|store_name} store close on {Monday|sys_time}?"
)
assert markup.dump_query(processed_query) == markup_text
assert markup.dump_query(processed_query, no_entity=True) == query_text
query_text = "show me houses between 600,000 and 1,000,000 dollars"
query = query_factory.create_query(query_text)
lower = NestedEntity.from_query(
query, Span(8, 14), parent_offset=15, entity_type="sys_number"
)
upper = NestedEntity.from_query(
query, Span(20, 28), parent_offset=15, entity_type="sys_number"
)
raw_entity = Entity(
"between 600,000 dollars and 1,000,000",
"price",
value={"children": [lower, upper]},
)
entities = [QueryEntity.from_query(query, Span(15, 51), entity=raw_entity)]
processed_query = ProcessedQuery(query, entities=entities)
markup_text = (
"show me houses {between {600,000|sys_number} and "
"{1,000,000|sys_number} dollars|price}"
)
assert markup.dump_query(processed_query) == markup_text
assert markup.dump_query(processed_query, no_group=True) == markup_text
assert markup.dump_query(processed_query, no_entity=True) == query_text
def test_bootstrap_query_no_entity(query_factory):
""""Tests bootstrap output for a query without entities"""
query_text = "cancel the timer"
query = query_factory.create_query(query_text)
confidence = {
"domains": {"times_and_dates": 0.95, "espionage": 0.05},
"intents": {"stop_timer": 0.9, "start_timer": 0.07, "cut_blue_wire": 0.03},
"entities": [],
"roles": [],
}
processed_query = ProcessedQuery(
query,
domain="times_and_dates",
intent="stop_timer",
entities=[],
confidence=confidence,
)
bootstrap_data = markup.bootstrap_query_row(processed_query, show_confidence=True)
expected_data = {
"query": "cancel the timer",
"domain": "times_and_dates",
"domain_conf": 0.95,
"intent": "stop_timer",
"intent_conf": 0.9,
"entity_conf": 1.0,
"role_conf": 1.0,
entities = [
QueryEntity.from_query(query, Span(6, 8), entity_type="quantity"),
QueryEntity.from_query(query, Span(10, 14), entity_type="size"),
QueryEntity.from_query(query, Span(16, 21), entity_type="product"),
QueryEntity.from_query(query, Span(28, 33), entity_type="size"),
QueryEntity.from_query(query, Span(35, 39), entity_type="option"),
QueryEntity.from_query(query, Span(45, 50), entity_type="size"),
QueryEntity.from_query(query, Span(52, 56), entity_type="option"),
]
entities[4] = entities[4].with_children((entities[3],))
entities[6] = entities[6].with_children((entities[5],))
entities[2] = entities[2].with_children(
(entities[0], entities[1], entities[4], entities[6])
)
processed_query = ProcessedQuery(query, entities=entities)
markup_text = (
"Order [{one|quantity} {large|size} {Tesora|product} with [{medium|size} "
"{cream|option}|option] and [{medium|size} {sugar|option}|option]|product]"
)
entity_text = (
"Order {one|quantity} {large|size} {Tesora|product} with {medium|size} "
"{cream|option} and {medium|size} {sugar|option}"
)
group_text = (
"Order [one large Tesora with [medium "
"cream|option] and [medium sugar|option]|product]"
)
assert markup.dump_query(processed_query) == markup_text
assert markup.dump_query(processed_query, no_group=True) == entity_text
def test_dump_role(query_factory):
"""Tests dumping a basic query with an entity with a role"""
query_text = "What stores are open between 3 and 5"
query = query_factory.create_query(query_text)
entities = [
QueryEntity.from_query(
query, Span(29, 29), entity_type="sys_time", role="open_hours"
),
QueryEntity.from_query(
query, Span(35, 35), entity_type="sys_time", role="close_hours"
),
]
processed_query = ProcessedQuery(query, entities=entities)
markup_text = (
"What stores are open between {3|sys_time|open_hours} and "
"{5|sys_time|close_hours}"
)
entity_text = "What stores are open between {3|sys_time} and {5|sys_time}"
assert markup.dump_query(processed_query) == markup_text
assert markup.dump_query(processed_query, no_role=True) == entity_text
assert (
markup.dump_query(processed_query, no_role=True, no_entity=True) == query_text
)
def test_query_cache_has_the_correct_format(kwik_e_mart_app_path):
query_cache_location = os.path.join(kwik_e_mart_app_path, QUERY_CACHE_RELATIVE_PATH)
versioned_data = joblib.load(query_cache_location)
if "cached_queries" in versioned_data:
query_cache = versioned_data["cached_queries"]
else:
query_cache = versioned_data
assert ("store_info", "help", "User manual") in query_cache
assert query_cache[("store_info", "help", "User manual")].domain == "store_info"
assert query_cache[("store_info", "help", "User manual")].intent == "help"
assert type(query_cache[("store_info", "help", "User manual")]) == ProcessedQuery
confidence = (
{"entities": entity_confidence, "roles": role_confidence} if verbose else {}
)
if using_nbest_transcripts:
return ProcessedQuery(
query[0],
entities=processed_entities,
confidence=confidence,
nbest_transcripts_queries=query,
nbest_transcripts_entities=entities,
nbest_aligned_entities=aligned_entities,
)
return ProcessedQuery(
query[0], entities=processed_entities, confidence=confidence
)
entity_confidence, entities = self._get_pred_entities(
query, dynamic_resource=dynamic_resource, verbose=verbose
)
aligned_entities = self._align_entities(entities)
processed_entities, role_confidence = self._process_entities(
query, entities, aligned_entities, verbose
)
confidence = (
{"entities": entity_confidence, "roles": role_confidence} if verbose else {}
)
if using_nbest_transcripts:
return ProcessedQuery(
query[0],
entities=processed_entities,
confidence=confidence,
nbest_transcripts_queries=query,
nbest_transcripts_entities=entities,
nbest_aligned_entities=aligned_entities,
)
return ProcessedQuery(
query[0], entities=processed_entities, confidence=confidence
)
intent (str, optional): The name of the intent annotated for the query.
is_gold (bool, optional): True if the markup passed in is a reference,
human-labeled example. Defaults to False.
query_options (dict, optional): A dict containing options for creating
a Query, such as `language`, `time_zone` and `timestamp`
Returns:
ProcessedQuery: a processed query
"""
query_factory = query_factory or QueryFactory.create_query_factory()
query_options = query_options or {}
_, query, entities = process_markup(
markup, query_factory=query_factory, query_options=query_options
)
return ProcessedQuery(
query, domain=domain, intent=intent, entities=entities, is_gold=is_gold
)