Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Args:
population: list of dictionaries - configs of current population
evolution: ParamsEvolution
gpus: list of given devices (list of integers)
Returns:
None
"""
population_size = len(population)
for k in range(population_size // len(gpus) + 1):
procs = []
for j in range(len(gpus)):
i = k * len(gpus) + j
if i < population_size:
save_path = expand_path(
evolution.get_value_from_config(parse_config(population[i]),
evolution.path_to_models_save_path))
save_path.mkdir(parents=True, exist_ok=True)
f_name = save_path / "config.json"
save_json(population[i], f_name)
with save_path.joinpath('out.txt').open('w', encoding='utf8') as outlog,\
save_path.joinpath('err.txt').open('w', encoding='utf8') as errlog:
env = dict(os.environ)
if len(gpus) > 1 or gpus[0] != -1:
env['CUDA_VISIBLE_DEVICES'] = str(gpus[j])
procs.append(Popen("{} -m deeppavlov train {}".format(sys.executable, str(f_name)),
shell=True, stdout=outlog, stderr=errlog, env=env))
for j, proc in enumerate(procs):
i = k * len(gpus) + j
param_value_search = value['search_choice']
param_names.append(param_name)
param_values.append(param_value_search)
# find optimal params
if args.search_type == 'grid':
# generate params combnations for grid search
combinations = list(product(*param_values))
# calculate cv scores
scores = []
for comb in combinations:
config = deepcopy(config_init)
for param_path, param_value in zip(param_paths, comb):
params_helper.insert_value_or_dict_into_config(config, param_path, param_value)
config = parse_config(config)
if (n_folds is not None) | is_loo:
# CV for model evaluation
score_dict = calc_cv_score(config, data=data, n_folds=n_folds, is_loo=is_loo)
score = score_dict[next(iter(score_dict))]
else:
# train/valid for model evaluation
data_to_evaluate = data.copy()
if len(data_to_evaluate['valid']) == 0:
data_to_evaluate['train'], data_to_evaluate['valid'] = train_test_split(data_to_evaluate['train'],
test_size=0.2)
iterator = get_iterator_from_config(config, data_to_evaluate)
score = train_evaluate_model_from_config(config, iterator=iterator)['valid'][target_metric]
scores.append(score)
evolution.basic_config, "validate_best"))[0]
+ ["validate_best"])
test_best = evolution.get_value_from_config(evolution.basic_config,
list(evolution.find_model_path(
evolution.basic_config, "test_best"))[0]
+ ["test_best"])
if (not validate_best) and test_best:
log.info("Validate_best is set to False. Tuning parameters on test")
elif (not validate_best) and (not test_best):
raise ConfigError("Validate_best and test_best are set to False. Can not evolve.")
population_metrics = {}
for m in considered_metrics:
population_metrics[m] = []
for i in range(population_size):
logpath = expand_path(evolution.get_value_from_config(parse_config(population[i]),
evolution.path_to_models_save_path)
) / "out.txt"
reports_data = logpath.read_text(encoding='utf8').splitlines()[-2:]
reports = []
for j in range(2):
try:
reports.append(json.loads(reports_data[j]))
except:
pass
val_results = {}
test_results = {}
for m in considered_metrics:
val_results[m] = None
test_results[m] = None
if len(reports) == 2 and "valid" in reports[0].keys() and "test" in reports[1].keys():
def build_model(config: Union[str, Path, dict], mode: str = 'infer',
load_trained: bool = False, download: bool = False,
serialized: Optional[bytes] = None) -> Chainer:
"""Build and return the model described in corresponding configuration file."""
config = parse_config(config)
if serialized:
serialized: list = pickle.loads(serialized)
if download:
deep_download(config)
import_packages(config.get('metadata', {}).get('imports', []))
model_config = config['chainer']
model = Chainer(model_config['in'], model_config['out'], model_config.get('in_y'))
for component_config in model_config['pipe']:
if load_trained and ('fit_on' in component_config or 'in_y' in component_config):
try:
def predict_with_model(config_path: [Path, str]) -> List[Optional[List[str]]]:
"""Returns predictions of morphotagging model given in config :config_path:.
Args:
config_path: a path to config
Returns:
a list of morphological analyses for each sentence. Each analysis is either a list of tags
or a list of full CONLL-U descriptions.
"""
config = parse_config(config_path)
reader_config = config['dataset_reader']
reader = get_model(reader_config['class_name'])()
data_path = expand_path(reader_config.get('data_path', ''))
read_params = {k: v for k, v in reader_config.items() if k not in ['class_name', 'data_path']}
data: Dict = reader.read(data_path, **read_params)
iterator_config = config['dataset_iterator']
iterator: MorphoTaggerDatasetIterator = from_params(iterator_config, data=data)
model = build_model(config, load_trained=True)
answers = [None] * len(iterator.test)
batch_size = config['predict'].get("batch_size", -1)
for indexes, (x, _) in iterator.gen_batches(
batch_size=batch_size, data_type="test", shuffle=False, return_indexes=True):
y = model(x)
def get_server_params(server_config_path, model_config):
server_config = read_json(server_config_path)
model_config = parse_config(model_config)
server_params = server_config['common_defaults']
if check_nested_dict_keys(model_config, ['metadata', 'labels', 'server_utils']):
model_tag = model_config['metadata']['labels']['server_utils']
if model_tag in server_config['model_defaults']:
model_defaults = server_config['model_defaults'][model_tag]
for param_name in model_defaults.keys():
if model_defaults[param_name]:
server_params[param_name] = model_defaults[param_name]
return server_params
def get_config_downloads(config: Union[str, Path, dict]) -> Set[Tuple[str, Path]]:
config = parse_config(config)
downloads = set()
if 'metadata' in config and 'download' in config['metadata']:
for resource in config['metadata']['download']:
if isinstance(resource, str):
resource = {
'url': resource
}
url = resource['url']
dest = expand_path(resource.get('subdir', ''))
downloads.add((url, dest))
config_references = [expand_path(config_ref) for config_ref in get_all_elems_from_json(config, 'config_path')]
def __init__(self, config_path: Union[str, Dict, Path]) -> None:
"""
Initialize observer, read input args, builds a directory tree, initialize date, start test of
experiment on tiny data.
"""
if isinstance(config_path, (str, Path)):
self.exp_config = read_json(config_path)
else:
self.exp_config = config_path
self.exp_config = parse_config(self.exp_config)
self.exp_name = self.exp_config['pipeline_search'].get('exp_name', 'experiment')
self.date = self.exp_config['pipeline_search'].get('date', datetime.now().strftime('%Y-%m-%d'))
self.info = self.exp_config['pipeline_search'].get('info')
self.root = expand_path(
self.exp_config['pipeline_search'].get('root',
'~/.deeppavlov/experiments'))
self.plot = self.exp_config['pipeline_search'].get('plot', False)
self.save_best = self.exp_config['pipeline_search'].get('save_best', False)
self.do_test = self.exp_config['pipeline_search'].get('do_test', False)
self.search_type = self.exp_config['pipeline_search'].get('search_type', 'random')
self.sample_num = self.exp_config['pipeline_search'].get('sample_num', 10)
self.target_metric = self.exp_config['pipeline_search'].get('target_metric')
self.multiprocessing = self.exp_config['pipeline_search'].get('multiprocessing', True)
self.max_num_workers = self.exp_config['pipeline_search'].get('max_num_workers')
def get_model(model_config):
config = parse_config(model_config)
chainer = build_model(config)
return chainer.get_main_component(), chainer, config
def __init__(self, config_file, *args, **kwargs):
config = parse_config(config_file)
config["chainer"]["pipe"].pop()
config["chainer"]["out"] = ["y_predicted"]
self.model = build_model(config)