How to use deeppavlov - 10 common examples

To help you get started, we’ve selected a few deeppavlov examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepmipt / DeepPavlov / deeppavlov / evolve.py View on Github external
Args:
        population: list of dictionaries - configs of current population
        evolution: ParamsEvolution
        gpus: list of given devices (list of integers)

    Returns:
        None
    """
    population_size = len(population)
    for k in range(population_size // len(gpus) + 1):
        procs = []
        for j in range(len(gpus)):
            i = k * len(gpus) + j
            if i < population_size:
                save_path = expand_path(
                    evolution.get_value_from_config(parse_config(population[i]),
                                                    evolution.path_to_models_save_path))

                save_path.mkdir(parents=True, exist_ok=True)
                f_name = save_path / "config.json"
                save_json(population[i], f_name)

                with save_path.joinpath('out.txt').open('w', encoding='utf8') as outlog,\
                        save_path.joinpath('err.txt').open('w', encoding='utf8') as errlog:
                    env = dict(os.environ)
                    if len(gpus) > 1 or gpus[0] != -1:
                        env['CUDA_VISIBLE_DEVICES'] = str(gpus[j])

                    procs.append(Popen("{} -m deeppavlov train {}".format(sys.executable, str(f_name)),
                                       shell=True, stdout=outlog, stderr=errlog, env=env))
        for j, proc in enumerate(procs):
            i = k * len(gpus) + j
github deepmipt / DeepPavlov / deeppavlov / models / classifiers / intents / test_metrics.py View on Github external
def main(config_name='config_infer.json'):

    # K.clear_session()

    with open(config_name) as f:
        config = json.load(f)

    # Reading datasets from files
    reader_config = config['dataset_reader']
    reader = REGISTRY[reader_config['name']]
    data = reader.read(reader_config['data_path'])

    # Building dict of datasets
    dataset_config = config['dataset']
    dataset = from_params(REGISTRY[dataset_config['name']],
                          dataset_config, data=data)

    # Merging train and valid dataset for further split on train/valid
    # dataset.merge_data(fields_to_merge=['train', 'valid'], new_field='train')
    # dataset.split_data(field_to_split='train', new_fields=['train', 'valid'], proportions=[0.9, 0.1])

    preproc_config = config['preprocessing']
    preproc = from_params(REGISTRY[preproc_config['name']],
                          preproc_config)
    # dataset = preproc.preprocess(dataset=dataset, data_type='train')
    # dataset = preproc.preprocess(dataset=dataset, data_type='valid')
    dataset = preproc.preprocess(dataset=dataset, data_type='test')

    # Extracting unique classes
github deepmipt / DeepPavlov / deeppavlov / models / sklearn / sklearn_component.py View on Github external
else:
            y_ = None

        try:
            log.info("Fitting model {}".format(self.model_name))
            self.model.fit(x_features, y_)
        except TypeError or ValueError:
            try:
                if issparse(x_features):
                    log.info("Converting input for model {} to dense array".format(self.model_name))
                    self.model.fit(x_features.todense(), y_)
                else:
                    log.info("Converting input for model {} to sparse array".format(self.model_name))
                    self.model.fit(csr_matrix(x_features), y_)
            except:
                raise ConfigError("Can not fit on the given data".format(self.model_name))

        return
github deepmipt / DeepPavlov / deeppavlov / models / dp_assistant / states_parser.py View on Github external
utterances_history.append(utterance['text'])
                annotations_history.append(utterance['annotations'])

            last_utterances.append(utterances_history[-1])
            utterances_histories.append(utterances_history)
            last_annotations.append(annotations_history[-1])
            annotations_histories.append(annotations_history)

            dialog_ids.append(dialog['id'])
            user_ids.append(dialog['user']['id'])

        return last_utterances, last_annotations, utterances_histories, annotations_histories, dialog_ids, user_ids


@register('annotations_parser')
class AnnotationsParser(Component):
    """ Inputs utterance annotations and gets recursive values.

    Example:
        > parser = AnnotaionsParser(keys=['ner.tokens', 'ner.tags'])
        > parser([{'ner': {'tokens': ['I'], 'tags': ['O']}}])
        [['I']], [['O']]
    """

    def __init__(self, keys, **kwargs):
        self.keys = [k.split('.') for k in keys]

    def __call__(self, annotations: List[dict]) -> List[List]:
        ann_values = [[]] * len(self.keys)
        for ann in annotations:
            for i, key_rec in enumerate(self.keys):
                val = ann
github deepmipt / DeepPavlov / deeppavlov / models / bidirectional_lms / elmo_bilm.py View on Github external
import logging

from deeppavlov.core.commands.utils import expand_path
# from deeppavlov.core.common.log import get_logger
from deeppavlov.core.common.registry import register
from deeppavlov.core.data.utils import zero_pad
from deeppavlov.core.models.component import Component
from deeppavlov.core.models.tf_backend import TfModelMeta

from deeppavlov.models.bidirectional_lms.elmo.utils import load_model, load_options_latest_checkpoint
from deeppavlov.models.bidirectional_lms.elmo.data import InferBatcher

log = logging.getLogger(__name__)

@register('elmo_bilm')
class ELMoEmbedder(Component, metaclass=TfModelMeta):
    """

    """
    def __init__(self, model_dir: str, forward_direction_sequence: bool = True, backward_direction_sequence: bool = True,
                 pad_zero: bool = False, max_token: Optional[int] = None, mini_batch_size: int = 32, **kwargs) -> None:

        self.model_dir = model_dir if '://' in model_dir else str(expand_path(model_dir))

        self.forward_direction_sequence = forward_direction_sequence
        self.backward_direction_sequence = backward_direction_sequence
        if not (self.forward_direction_sequence or self.backward_direction_sequence):
            log.error(f'At least one direction sequence of forward_direction_sequence or backward_direction_sequence'\
                      ' must be equal to True.')
            sys.exit(1)

        self.pad_zero = pad_zero
github deepmipt / DeepPavlov / deeppavlov / core / commands / train.py View on Github external
def fit_chainer(config: dict, iterator: BasicDatasetIterator) -> Chainer:

    chainer_config: dict = config['chainer']
    chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'))
    for component_config in chainer_config['pipe']:
        component = from_params(component_config, vocabs=[], mode='train')
        if 'fit_on' in component_config:
            component: Estimator

            preprocessed = chainer(*iterator.iter_all('train'), to_return=component_config['fit_on'])
            if len(component_config['fit_on']) == 1:
                preprocessed = [preprocessed]
            else:
                preprocessed = zip(*preprocessed)
            component.fit(*preprocessed)
            component.save()

        if 'in' in component_config:
            c_in = component_config['in']
            c_out = component_config['out']
            in_y = component_config.get('in_y', None)
            main = component_config.get('main', False)
github deepmipt / DeepPavlov / deeppavlov / core / commands / train.py View on Github external
kwargs = {k: v for k, v in reader_config.items() if k not in ['name', 'data_path']}
    data = reader.read(data_path, **kwargs)

    iterator_config = config['dataset_iterator']
    iterator: BasicDatasetIterator = from_params(iterator_config, data=data)

    if 'chainer' in config:
        model = fit_chainer(config, iterator)
    else:
        vocabs = config.get('vocabs', {})
        for vocab_param_name, vocab_config in vocabs.items():
            v: Estimator = from_params(vocab_config, mode='train')
            vocabs[vocab_param_name] = _fit(v, iterator)

        model_config = config['model']
        model = from_params(model_config, vocabs=vocabs, mode='train')

    train_config = {
        'metrics': ['accuracy'],

        'validate_best': True,
        'test_best': True
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(zip(train_config['metrics'],
                                 get_metrics_by_names(train_config['metrics'])))
github deepmipt / DeepPavlov / deeppavlov / pipeline_manager / train.py View on Github external
module_name, cls_name = c.split(':')
                reader = getattr(importlib.import_module(module_name), cls_name)()
            except ValueError:
                e = ConfigError('Expected class description in a `module.submodules:ClassName` form, but got `{}`'
                                .format(c))
                log.exception(e)
                raise e
        else:
            reader = get_model(reader_config.pop('name'))()
        data_path = expand_path(reader_config.pop('data_path', ''))
        data = reader.read(data_path, **reader_config)
    else:
        log.warning("No dataset reader is provided in the JSON config.")

    iterator_config = config['dataset_iterator']
    iterator: Union[DataLearningIterator, DataFittingIterator] = from_params(iterator_config,
                                                                             data=data)

    train_config = {
        'metrics': ['accuracy'],
        'validate_best': to_validate,
        'test_best': True
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')

    metrics_functions = list(zip(train_config['metrics'], get_metrics_by_names(train_config['metrics'])))

    if to_train:
github deepmipt / DeepPavlov / deeppavlov / core / commands / train.py View on Github external
reader_config = config['dataset_reader']
    reader = get_model(reader_config['name'])()
    data_path = expand_path(reader_config.get('data_path', ''))
    kwargs = {k: v for k, v in reader_config.items() if k not in ['name', 'data_path']}
    data = reader.read(data_path, **kwargs)

    iterator_config = config['dataset_iterator']
    iterator: BasicDatasetIterator = from_params(iterator_config, data=data)

    if 'chainer' in config:
        model = fit_chainer(config, iterator)
    else:
        vocabs = config.get('vocabs', {})
        for vocab_param_name, vocab_config in vocabs.items():
            v: Estimator = from_params(vocab_config, mode='train')
            vocabs[vocab_param_name] = _fit(v, iterator)

        model_config = config['model']
        model = from_params(model_config, vocabs=vocabs, mode='train')

    train_config = {
        'metrics': ['accuracy'],

        'validate_best': True,
        'test_best': True
    }

    try:
        train_config.update(config['train'])
    except KeyError:
        log.warning('Train config is missing. Populating with default values')
github deepmipt / DeepPavlov / deeppavlov / core / commands / train.py View on Github external
_fit_batches(model, iterator, train_config)
        elif callable(getattr(model, 'fit', None)):
            _fit(model, iterator, train_config)
        elif not isinstance(model, Chainer):
            log.warning('Nothing to train')

        model.destroy()

    res = {}

    if train_config['validate_best'] or train_config['test_best']:
        # try:
        #     model_config['load_path'] = model_config['save_path']
        # except KeyError:
        #     log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
        model = build_model_from_config(config, load_trained=True)
        log.info('Testing the best saved model')

        if train_config['validate_best']:
            report = {
                'valid': _test_model(model, metrics_functions, iterator,
                                     train_config.get('batch_size', -1), 'valid',
                                     show_examples=train_config['show_examples'])
            }

            res['valid'] = report['valid']['metrics']

            print(json.dumps(report, ensure_ascii=False))

        if train_config['test_best']:
            report = {
                'test': _test_model(model, metrics_functions, iterator,