Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Args:
population: list of dictionaries - configs of current population
evolution: ParamsEvolution
gpus: list of given devices (list of integers)
Returns:
None
"""
population_size = len(population)
for k in range(population_size // len(gpus) + 1):
procs = []
for j in range(len(gpus)):
i = k * len(gpus) + j
if i < population_size:
save_path = expand_path(
evolution.get_value_from_config(parse_config(population[i]),
evolution.path_to_models_save_path))
save_path.mkdir(parents=True, exist_ok=True)
f_name = save_path / "config.json"
save_json(population[i], f_name)
with save_path.joinpath('out.txt').open('w', encoding='utf8') as outlog,\
save_path.joinpath('err.txt').open('w', encoding='utf8') as errlog:
env = dict(os.environ)
if len(gpus) > 1 or gpus[0] != -1:
env['CUDA_VISIBLE_DEVICES'] = str(gpus[j])
procs.append(Popen("{} -m deeppavlov train {}".format(sys.executable, str(f_name)),
shell=True, stdout=outlog, stderr=errlog, env=env))
for j, proc in enumerate(procs):
i = k * len(gpus) + j
def main(config_name='config_infer.json'):
# K.clear_session()
with open(config_name) as f:
config = json.load(f)
# Reading datasets from files
reader_config = config['dataset_reader']
reader = REGISTRY[reader_config['name']]
data = reader.read(reader_config['data_path'])
# Building dict of datasets
dataset_config = config['dataset']
dataset = from_params(REGISTRY[dataset_config['name']],
dataset_config, data=data)
# Merging train and valid dataset for further split on train/valid
# dataset.merge_data(fields_to_merge=['train', 'valid'], new_field='train')
# dataset.split_data(field_to_split='train', new_fields=['train', 'valid'], proportions=[0.9, 0.1])
preproc_config = config['preprocessing']
preproc = from_params(REGISTRY[preproc_config['name']],
preproc_config)
# dataset = preproc.preprocess(dataset=dataset, data_type='train')
# dataset = preproc.preprocess(dataset=dataset, data_type='valid')
dataset = preproc.preprocess(dataset=dataset, data_type='test')
# Extracting unique classes
else:
y_ = None
try:
log.info("Fitting model {}".format(self.model_name))
self.model.fit(x_features, y_)
except TypeError or ValueError:
try:
if issparse(x_features):
log.info("Converting input for model {} to dense array".format(self.model_name))
self.model.fit(x_features.todense(), y_)
else:
log.info("Converting input for model {} to sparse array".format(self.model_name))
self.model.fit(csr_matrix(x_features), y_)
except:
raise ConfigError("Can not fit on the given data".format(self.model_name))
return
utterances_history.append(utterance['text'])
annotations_history.append(utterance['annotations'])
last_utterances.append(utterances_history[-1])
utterances_histories.append(utterances_history)
last_annotations.append(annotations_history[-1])
annotations_histories.append(annotations_history)
dialog_ids.append(dialog['id'])
user_ids.append(dialog['user']['id'])
return last_utterances, last_annotations, utterances_histories, annotations_histories, dialog_ids, user_ids
@register('annotations_parser')
class AnnotationsParser(Component):
""" Inputs utterance annotations and gets recursive values.
Example:
> parser = AnnotaionsParser(keys=['ner.tokens', 'ner.tags'])
> parser([{'ner': {'tokens': ['I'], 'tags': ['O']}}])
[['I']], [['O']]
"""
def __init__(self, keys, **kwargs):
self.keys = [k.split('.') for k in keys]
def __call__(self, annotations: List[dict]) -> List[List]:
ann_values = [[]] * len(self.keys)
for ann in annotations:
for i, key_rec in enumerate(self.keys):
val = ann
import logging
from deeppavlov.core.commands.utils import expand_path
# from deeppavlov.core.common.log import get_logger
from deeppavlov.core.common.registry import register
from deeppavlov.core.data.utils import zero_pad
from deeppavlov.core.models.component import Component
from deeppavlov.core.models.tf_backend import TfModelMeta
from deeppavlov.models.bidirectional_lms.elmo.utils import load_model, load_options_latest_checkpoint
from deeppavlov.models.bidirectional_lms.elmo.data import InferBatcher
log = logging.getLogger(__name__)
@register('elmo_bilm')
class ELMoEmbedder(Component, metaclass=TfModelMeta):
"""
"""
def __init__(self, model_dir: str, forward_direction_sequence: bool = True, backward_direction_sequence: bool = True,
pad_zero: bool = False, max_token: Optional[int] = None, mini_batch_size: int = 32, **kwargs) -> None:
self.model_dir = model_dir if '://' in model_dir else str(expand_path(model_dir))
self.forward_direction_sequence = forward_direction_sequence
self.backward_direction_sequence = backward_direction_sequence
if not (self.forward_direction_sequence or self.backward_direction_sequence):
log.error(f'At least one direction sequence of forward_direction_sequence or backward_direction_sequence'\
' must be equal to True.')
sys.exit(1)
self.pad_zero = pad_zero
def fit_chainer(config: dict, iterator: BasicDatasetIterator) -> Chainer:
chainer_config: dict = config['chainer']
chainer = Chainer(chainer_config['in'], chainer_config['out'], chainer_config.get('in_y'))
for component_config in chainer_config['pipe']:
component = from_params(component_config, vocabs=[], mode='train')
if 'fit_on' in component_config:
component: Estimator
preprocessed = chainer(*iterator.iter_all('train'), to_return=component_config['fit_on'])
if len(component_config['fit_on']) == 1:
preprocessed = [preprocessed]
else:
preprocessed = zip(*preprocessed)
component.fit(*preprocessed)
component.save()
if 'in' in component_config:
c_in = component_config['in']
c_out = component_config['out']
in_y = component_config.get('in_y', None)
main = component_config.get('main', False)
kwargs = {k: v for k, v in reader_config.items() if k not in ['name', 'data_path']}
data = reader.read(data_path, **kwargs)
iterator_config = config['dataset_iterator']
iterator: BasicDatasetIterator = from_params(iterator_config, data=data)
if 'chainer' in config:
model = fit_chainer(config, iterator)
else:
vocabs = config.get('vocabs', {})
for vocab_param_name, vocab_config in vocabs.items():
v: Estimator = from_params(vocab_config, mode='train')
vocabs[vocab_param_name] = _fit(v, iterator)
model_config = config['model']
model = from_params(model_config, vocabs=vocabs, mode='train')
train_config = {
'metrics': ['accuracy'],
'validate_best': True,
'test_best': True
}
try:
train_config.update(config['train'])
except KeyError:
log.warning('Train config is missing. Populating with default values')
metrics_functions = list(zip(train_config['metrics'],
get_metrics_by_names(train_config['metrics'])))
module_name, cls_name = c.split(':')
reader = getattr(importlib.import_module(module_name), cls_name)()
except ValueError:
e = ConfigError('Expected class description in a `module.submodules:ClassName` form, but got `{}`'
.format(c))
log.exception(e)
raise e
else:
reader = get_model(reader_config.pop('name'))()
data_path = expand_path(reader_config.pop('data_path', ''))
data = reader.read(data_path, **reader_config)
else:
log.warning("No dataset reader is provided in the JSON config.")
iterator_config = config['dataset_iterator']
iterator: Union[DataLearningIterator, DataFittingIterator] = from_params(iterator_config,
data=data)
train_config = {
'metrics': ['accuracy'],
'validate_best': to_validate,
'test_best': True
}
try:
train_config.update(config['train'])
except KeyError:
log.warning('Train config is missing. Populating with default values')
metrics_functions = list(zip(train_config['metrics'], get_metrics_by_names(train_config['metrics'])))
if to_train:
reader_config = config['dataset_reader']
reader = get_model(reader_config['name'])()
data_path = expand_path(reader_config.get('data_path', ''))
kwargs = {k: v for k, v in reader_config.items() if k not in ['name', 'data_path']}
data = reader.read(data_path, **kwargs)
iterator_config = config['dataset_iterator']
iterator: BasicDatasetIterator = from_params(iterator_config, data=data)
if 'chainer' in config:
model = fit_chainer(config, iterator)
else:
vocabs = config.get('vocabs', {})
for vocab_param_name, vocab_config in vocabs.items():
v: Estimator = from_params(vocab_config, mode='train')
vocabs[vocab_param_name] = _fit(v, iterator)
model_config = config['model']
model = from_params(model_config, vocabs=vocabs, mode='train')
train_config = {
'metrics': ['accuracy'],
'validate_best': True,
'test_best': True
}
try:
train_config.update(config['train'])
except KeyError:
log.warning('Train config is missing. Populating with default values')
_fit_batches(model, iterator, train_config)
elif callable(getattr(model, 'fit', None)):
_fit(model, iterator, train_config)
elif not isinstance(model, Chainer):
log.warning('Nothing to train')
model.destroy()
res = {}
if train_config['validate_best'] or train_config['test_best']:
# try:
# model_config['load_path'] = model_config['save_path']
# except KeyError:
# log.warning('No "save_path" parameter for the model, so "load_path" will not be renewed')
model = build_model_from_config(config, load_trained=True)
log.info('Testing the best saved model')
if train_config['validate_best']:
report = {
'valid': _test_model(model, metrics_functions, iterator,
train_config.get('batch_size', -1), 'valid',
show_examples=train_config['show_examples'])
}
res['valid'] = report['valid']['metrics']
print(json.dumps(report, ensure_ascii=False))
if train_config['test_best']:
report = {
'test': _test_model(model, metrics_functions, iterator,