How to use the mindmeld.models.helpers.register_query_feature function in mindmeld

To help you get started, we’ve selected a few mindmeld examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="word-shape")
def extract_word_shape(lengths=(1,), **args):
    """
    Extracts word shape for ngrams of specified lengths.

    Args:
        lengths (list of int): The ngram length

    Returns:
        (function) An feature extraction function that takes a query and \
            returns ngrams of word shapes, for n of specified lengths.
    """
    del args

    def word_shape_basic(token):
        # example: option --> xxxxx+, 123 ---> ddd, call --> xxxx
        shape = ["d" if character.isdigit() else "x" for character in token]
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="sys-candidates")
def extract_sys_candidates(entities=None, **args):
    """
    Return an extractor for features based on a heuristic guess of numeric \
        candidates in the current query.

    Returns:
            (function) The feature extractor.
     """
    del args
    entities = entities or DEFAULT_SYS_ENTITIES

    def _extractor(query, resources):
        del resources
        system_entities = query.get_system_entity_candidates(list(entities))
        sys_ent_counter = Counter()
        for entity in system_entities:
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="exact")
@requires(QUERY_FREQ_RSC)
def extract_query_string(scaling=1000, **args):
    """
    Extract whole query string as a feature.

    Returns:
        (function) A feature extraction function that takes a query and \
            returns the whole query string for exact matching

    """

    def _extractor(query, resources):
        query_key = "<{}>".format(query.normalized_text)
        if query_key in resources[QUERY_FREQ_RSC]:
            return {"exact|query:{}".format(query_key): scaling}
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="bag-of-words-seq")
@requires(WORD_NGRAM_FREQ_RSC)
def extract_bag_of_words_features(
    ngram_lengths_to_start_positions, thresholds=(0,), **args
):
    """Returns a bag-of-words feature extractor.

    Args:
        ngram_lengths_to_start_positions (dict)
        thresholds (int): Cut off value to include word in n-gram vocab

    Returns:
        (function) The feature extractor.
    """
    threshold_list = list(thresholds)
    word_thresholds = threshold_list + [0] * (
        len(ngram_lengths_to_start_positions.keys()) - len(threshold_list)
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="in-gaz")
@requires(GAZETTEER_RSC)
def extract_in_gaz_feature(scaling=1, **args):
    """Returns a feature extractor that generates a set of features indicating the presence
    of query n-grams in different entity gazetteers. Used by the domain and intent classifiers
    when the 'in-gaz' feature is specified in the config.

    Args:
        scaling (int): A multiplicative scale factor to the ``ratio_pop`` and ``ratio`` features of
        the in-gaz feature set.

    Returns:
        function: Returns an extractor function
    """
    del args

    def _extractor(query, resources):
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="length")
def extract_length(**args):
    """
    Extract length measures (tokens and chars; linear and log) on whole query.

    Returns:
        (function) A feature extraction function that takes a query and \
            returns number of tokens and characters on linear and log scales
    """
    del args

    def _extractor(query, resources):
        del resources
        tokens = len(query.normalized_tokens)
        chars = len(query.normalized_text)
        return {
            "tokens": tokens,
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="char-ngrams-seq")
@requires(CHAR_NGRAM_FREQ_RSC)
def extract_char_ngrams_features(
    ngram_lengths_to_start_positions, thresholds=(0,), **args
):
    """Returns a character n-gram feature extractor.

        Args:
            ngram_lengths_to_start_positions (dict):
            The window of tokens to be considered relative to the
            current token while extracting char n-grams
            thresholds (int): Cut off value to include word in n-gram vocab

        Returns:
            (function) The feature extractor.
        """
    del args
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="in-gaz-ngram-seq")
@requires(GAZETTEER_RSC)
def extract_in_gaz_ngram_features(**args):
    """Returns a feature extractor for surrounding ngrams in gazetteers
    """
    del args

    def _extractor(query, resources):
        def get_ngram_gaz_features(query, gazes, entity_type):
            tokens = query.normalized_tokens
            feat_seq = [{} for _ in tokens]

            for i, _ in enumerate(feat_seq):
                feat_prefix = "in_gaz|type:{}|ngram".format(entity_type)

                # entity PMI and conditional prob
                p_total = (
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="gaz-freq")
@requires(GAZETTEER_RSC)
@requires(WORD_FREQ_RSC)
def extract_gaz_freq(**args):
    """
    Extract frequency bin features for each gazetteer

    Returns:
        (function): A feature extraction function that returns the log of the \
            count of query tokens within each gazetteer's frequency bins.
    """
    del args

    def _extractor(query, resources):
        tokens = query.normalized_tokens
        freq_features = defaultdict(int)
github cisco / mindmeld / mindmeld / models / query_features.py View on Github external
@register_query_feature(feature_name="enable-stemming")
@requires(ENABLE_STEMMING)
def enabled_stemming(**args):
    """Feature extractor for enabling stemming of the query
    """
    del args

    def _extractor(query, resources):
        # no op
        del query
        del resources

    return _extractor