How to use the deeppavlov.core.models.estimator.Estimator function in deeppavlov

To help you get started, we’ve selected a few deeppavlov examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepmipt / DeepPavlov / deeppavlov / core / commands / train.py View on Github external
def fit_chainer(config: dict, iterator: BasicDatasetIterator) -> Chainer:

    chainer_config: dict = config['chainer']
    chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'))
    for component_config in chainer_config['pipe']:
        component = from_params(component_config, vocabs=[], mode='train')
        if 'fit_on' in component_config:
            component: Estimator

            preprocessed = chainer(*iterator.iter_all('train'), to_return=component_config['fit_on'])
            if len(component_config['fit_on']) == 1:
                preprocessed = [preprocessed]
            else:
                preprocessed = zip(*preprocessed)
            component.fit(*preprocessed)
            component.save()

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            chainer.append(c_in, c_out, component, in_y, main)
    return chainer
github deepmipt / DeepPavlov / deeppavlov / core / commands / train.py View on Github external
chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'))
    for component_config in chainer_config['pipe']:
        component = from_params(component_config, mode='train')
        if 'fit_on' in component_config:
            component: Estimator

            preprocessed = chainer(*iterator.get_instances('train'), to_return=component_config['fit_on'])
            if len(component_config['fit_on']) == 1:
                preprocessed = [preprocessed]
            else:
                preprocessed = zip(*preprocessed)
            component.fit(*preprocessed)
            component.save()

        if 'fit_on_batch' in component_config:
            component: Estimator
            component.fit_batches(iterator, config['train']['batch_size'])
            component.save()

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
            chainer.append(component, c_in, c_out, in_y, main)
    return chainer
github deepmipt / DeepPavlov / deeppavlov / models / preprocessors / personachat_preprocessor.py View on Github external
for j, token in enumerate(utt[-self.token_limit:]):
                            for k, char in enumerate(token[:self.char_limit]):
                                utt_idxs[b, i, j, k] = char
            elif len(get_shape(batch)) == 3:
                utt_idxs = np.zeros([len(batch), self.token_limit, self.char_limit], dtype=np.int32)
                for i, utt in enumerate(batch):
                    for j, token in enumerate(utt[-self.token_limit:]):
                        for k, char in enumerate(token[:self.char_limit]):
                            utt_idxs[i, j, k] = char
            else:
                raise RuntimeError("Unsupported batch shape")

        return utt_idxs

@register('personachat_vocab')
class PersonachatEmbedder(Estimator):
    # TODO: refactor to merge this code with SQuAD embedder
    def __init__(self, emb_folder, emb_url, save_path, load_path,
                 x_len_limit, persona_len_limit, y_len_limit, char_limit, level='token', *args, **kwargs):
        self.emb_folder = expand_path(emb_folder)
        self.level = level
        self.emb_url = emb_url
        self.emb_file_name = Path(emb_url).name
        self.save_path = expand_path(save_path)
        self.load_path = expand_path(load_path)
        self.x_len_limit = x_len_limit
        self.persona_len_limit = persona_len_limit
        self.y_len_limit = y_len_limit
        self.char_limit = char_limit
        self.loaded = False

        self.NULL = ""
github deepmipt / DeepPavlov / deeppavlov / core / data / sqlite_database.py View on Github external
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sqlite3
from logging import getLogger
from typing import List, Dict

from deeppavlov.core.common.registry import register
from deeppavlov.core.models.estimator import Estimator

log = getLogger(__name__)


@register('sqlite_database')
class Sqlite3Database(Estimator):
    """
    Loads and trains sqlite table of any items (with name ``table_name``
    and path ``save_path``).

    Primary (unique) keys must be specified, all other keys are infered from data.
    Batch here is a list of dictionaries, where each dictionary corresponds to an item.
    If an item doesn't contain values for all keys, then missing values will be stored
    with ``unknown_value``.

    Parameters:
        save_path: sqlite database path.
        primary_keys: list of table primary keys' names.
        keys: all table keys' names.
        table_name: name of the sqlite table.
        unknown_value: value assigned to missing item values.
        **kwargs: parameters passed to parent
github deepmipt / DeepPavlov / deeppavlov / models / preprocessors / siamese_preprocessor.py View on Github external
from logging import getLogger
from typing import List, Union, Iterable, Optional

import numpy as np

from deeppavlov.core.commands.utils import expand_path
from deeppavlov.core.common.registry import register
from deeppavlov.core.data.utils import zero_pad_truncate
from deeppavlov.core.models.component import Component
from deeppavlov.core.models.estimator import Estimator

log = getLogger(__name__)


@register('siamese_preprocessor')
class SiamesePreprocessor(Estimator):
    """ Preprocessing of data samples containing text strings to feed them in a siamese network.

    First ``num_context_turns`` strings in each data sample corresponds to the dialogue ``context``
    and the rest string(s) in the sample is (are) ``response(s)``.

    Args:
        save_path: The parameter is only needed to initialize the base class
            :class:`~deeppavlov.core.models.serializable.Serializable`.
        load_path: The parameter is only needed to initialize the base class
            :class:`~deeppavlov.core.models.serializable.Serializable`.
        max_sequence_length: A maximum length of text sequences in tokens.
            Longer sequences will be truncated and shorter ones will be padded.
        dynamic_batch:  Whether to use dynamic batching. If ``True``, the maximum length of a sequence for a batch
            will be equal to the maximum of all sequences lengths from this batch,
            but not higher than ``max_sequence_length``.
        padding: Padding. Possible values are ``pre`` and ``post``.
github deepmipt / DeepPavlov / deeppavlov / models / preprocessors / ubuntu_preprocessor.py View on Github external
import numpy as np
from typing import List, Union, Iterable

from deeppavlov.core.commands.utils import expand_path
from deeppavlov.core.common.log import get_logger
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.component import Component
from deeppavlov.core.models.estimator import Estimator
from deeppavlov.core.data.utils import zero_pad_truncate

log = get_logger(__name__)


@register('ubuntu_preprocessor')
class UbuntuPreprocessor(Estimator):
    """ Preprocessing of data samples from Ubuntu Dialogue Corpus v1/v2 dataset
     to feed them in SMN or DAM ranking neural models.

    First ``num_context_turns`` strings in each data sample corresponds to the dialogue ``context``
    and the rest string(s) in the sample is (are) ``response(s)``.

    Args:
        save_path: The parameter is only needed to initialize the base class
            :class:`~deeppavlov.core.models.serializable.Serializable`.
        load_path: The parameter is only needed to initialize the base class
            :class:`~deeppavlov.core.models.serializable.Serializable`.
        max_sequence_length: A maximum length of text sequences in tokens.
            Longer sequences will be truncated and shorter ones will be padded.
        dynamic_batch:  Whether to use dynamic batching. If ``True``, the maximum length of a sequence for a batch
            will be equal to the maximum of all sequences lengths from this batch,
            but not higher than ``max_sequence_length``.
github deepmipt / DeepPavlov / deeppavlov / models / seq2seq_go_bot / features.py View on Github external
# See the License for the specific language governing permissions and
# limitations under the License.

import json
from typing import List
from logging import getLogger

import numpy as np

from deeppavlov.core.models.estimator import Estimator


log = getLogger()


class StateFeaturizer(Estimator):

    def __init__(self, dontcare_value: str = None, **kwargs) -> None:
        super().__init__(**kwargs)
        self.dontcare_value = dontcare_value
        self.dim = 2 if dontcare_value else 1
        self.len = 0
        self.keys = []
        if self.load_path.exists():
            self.load()

    @classmethod
    def _get_depth(cls, d: dict) -> int:
        for k in d.keys():
            if isinstance(d[k], dict):
                return cls._get_depth(d[k]) + 1
            break
github deepmipt / DeepPavlov / deeppavlov / models / preprocessors / squad_preprocessor.py View on Github external
for idx, sp in enumerate(span):
                    if not (ans_end <= sp[0] or ans_st >= sp[1]):
                        answer_span.append(idx)
                if len(answer_span) != 0:
                    y1, y2 = answer_span[0], answer_span[-1]
                else:
                    # answer not found in context
                    y1, y2 = -1, -1
                start[-1].append(y1)
                end[-1].append(y2)
                answers[-1].append(ans)
        return answers, start, end


@register('squad_vocab_embedder')
class SquadVocabEmbedder(Estimator):
    """ SquadVocabEmbedder is used to build tokens/chars vocabulary and embedding matrix.

        It extracts tokens/chars form dataset and looks for pretrained embeddings.

        Params:
            emb_folder: path to download pretrained embeddings
            emb_url: link to pretrained embeddings
            save_path: extracted embeddings save path
            load_path: extracted embeddigns load path
            context_limit: max context length in tokens
            question_limit: max question length in tokens
            char_limit: max number of characters in token
            level: token or char
        """

    def __init__(self, emb_folder: str, emb_url: str, save_path: str, load_path: str,
github deepmipt / DeepPavlov / deeppavlov / models / morpho_tagger / unimorph.py View on Github external
from collections import defaultdict
import ujson as json
from pathlib import Path
from typing import List, Tuple

import numpy as np

from deeppavlov.core.models.estimator import Estimator
from deeppavlov.core.common.registry import register

@register("unimorph_vectorizer")
class UnimorphDictionaryVectorizer(Estimator):


    def __init__(self, save_path, load_path, use_last_word=False,
                 use_suffixes=False, min_suffix_count=10, max_suffix_length=5, **kwargs):
        load_path = Path(load_path).with_suffix(".json")
        save_path = Path(save_path).with_suffix(".json")
        super().__init__(save_path=save_path, load_path=load_path, **kwargs)
        self.use_last_word = use_last_word
        self.use_suffixes = use_suffixes
        self.min_suffix_count = min_suffix_count
        self.max_suffix_length = max_suffix_length
        if self.load_path.exists():
            self.load()

    @property
    def pos_number(self):
github deepmipt / DeepPavlov / deeppavlov / models / classifiers / sklearn_classifiers.py View on Github external
log.warning("initializing `{}` from scratch".format('LogisticRegression'))
                self.model = LogisticRegression(self.penalty, self.dual, self.tol, self.C, self.fit_intercept,
                                                self.intercept_scaling, self.class_weight, self.random_state,
                                                self.solver, self.max_iter, self.multi_class, self.verbose,
                                                self.warm_start, self.n_jobs)
        else:
            log.warning("No `load_path` is provided for {0}. "
                        "Initializing `{0}` from scratch".format(self.__class__.__name__))
            self.model = LogisticRegression(self.penalty, self.dual, self.tol, self.C, self.fit_intercept,
                                            self.intercept_scaling, self.class_weight, self.random_state,
                                            self.solver, self.max_iter, self.multi_class, self.verbose,
                                            self.warm_start, self.n_jobs)


@register("support_vector_classifier")
class Svm(Estimator):
    """
    The class implements the Support Vector Classifier from Sklearn library.

    Args:
        save_path (str): save path
        load_path (str): load path
        mode: train/infer trigger
        **kwargs: additional arguments

    Attributes:
        model: Support Vector Classifier class from sklearn
    """

    def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4, C=1.0, multi_class='ovr',
                 fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None,
                 max_iter=1000, **kwargs) -> None: