Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
unittest.TestCase.setUp(self)
self.rh = RunHistory()
self.cs = get_config_space()
self.config1 = Configuration(self.cs,
values={'a': 0, 'b': 100})
self.config2 = Configuration(self.cs,
values={'a': 100, 'b': 0})
self.config3 = Configuration(self.cs,
values={'a': 100, 'b': 100})
self.scen = Scenario({"cutoff_time": 2, 'cs': self.cs,
"run_obj": 'runtime',
"output_dir": ''})
self.stats = Stats(scenario=self.scen)
self.stats.start_timing()
self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
def test_choose_next_higher_budget(self):
seed = 42
config = self.scenario.cs.sample_configuration
rh = RunHistory()
rh.add(config=config(), cost=1, time=10, instance_id=None,
seed=1, budget=1, additional_info=None, status=StatusType.SUCCESS)
rh.add(config=config(), cost=2, time=10, instance_id=None,
seed=1, budget=2, additional_info=None, status=StatusType.SUCCESS)
rh.add(config=config(), cost=3, time=10, instance_id=None,
seed=1, budget=2, additional_info=None, status=StatusType.SUCCESS)
rh.add(config=config(), cost=4, time=10, instance_id=None,
seed=1, budget=3, additional_info=None, status=StatusType.SUCCESS)
smbo = SMAC4AC(self.scenario, rng=seed, runhistory=rh).solver
smbo.epm_chooser.min_samples_model = 2
# Return two configurations evaluated with budget==2
x, y = smbo.epm_chooser._collect_data_to_train_model()
self.assertListEqual(list(y.flatten()), [2, 3])
self.assertEqual(x.shape[0], 2)
def test_epm_reuse_rf(self):
""" if no runhistory is passed to epm, but there was a model trained
before, that model should be reused! (if reuse_epm flag is set) """
scen = Scenario(self.scen_fn, cmd_options={'run_obj': 'quality'})
scen.feature_array = None
validator = Validator(scen, self.trajectory)
old_rh = RunHistory(average_cost)
for config in [e["incumbent"] for e in self.trajectory]:
old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0',
seed=127)
self.assertTrue(isinstance(validator.validate_epm(runhistory=old_rh),
RunHistory))
self.assertTrue(isinstance(validator.validate_epm(
output_fn="test/test_files/validation/"),
RunHistory))
self.assertRaises(ValueError, validator.validate_epm, reuse_epm=False)
req_opts.add_argument("--verbose_level", default=logging.INFO,
choices=["INFO", "DEBUG"],
help="random seed")
req_opts.add_argument("--save_fn", default="fw_importance.pdf",
help="file name of saved plot")
args_ = parser.parse_args()
logging.basicConfig(level=args_.verbose_level)
# if args_.verbose_level == "DEBUG":
# logging.parent.level = 10
scen = Scenario(args_.scenario_file)
hist = RunHistory()
for runhist_fn in args_.runhistory:
hist.update_from_json(fn=runhist_fn, cs=scen.cs)
fws = ForwardSelection(scenario=scen,
runhistory=hist)
fws.run(save_fn=args_.save_fn)
not c.lower() in self.valid_values and
not c in parameters]
for c in set(self.valid_values).intersection(set(data.columns)):
# Cast to numeric
data[c] = data[c].apply(pd.to_numeric, errors='ignore')
data, id_to_config = self.extract_configs(data, cs, id_to_config)
data, id_to_inst_feats = self.extract_instances(data, feature_names,
instance_features)
self.logger.debug("Found: seed=%s, cost=%s, time=%s, status=%s, budget=%s",
'seed' in data.columns, 'cost' in data.columns,
'time' in data.columns, 'status' in data.columns, 'budget' in data.columns)
# Create RunHistory
rh = RunHistory()
def add_to_rh(row):
new_status = self._interpret_status(row['status']) if 'status' in row else StatusType.SUCCESS
rh.add(config=id_to_config[row['config_id']],
cost=row['cost'],
time=row['time'] if 'time' in row else -1,
status=new_status,
instance_id=row['instance_id'] if 'instance_id' in row else None,
seed=row['seed'] if 'seed' in row else None,
budget=row['budget'] if 'budget' in row else 0,
additional_info=None,
origin=DataOrigin.INTERNAL)
data.apply(add_to_rh, axis=1)
return rh
#
# We could simply modify the scenario-object, stored in
# 'smac.solver.scenario' and start optimization again:
#smac.solver.scenario.ta_run_limit = 50
#smac.optimize()
# Or, to show the whole process of recovering a SMAC-run from the output
# directory, create a new scenario with an extended budget:
new_scenario = Scenario(orig_scen_dict,
cmd_options={'runcount_limit': 50, # overwrite these args
'output_dir' : 'restored'})
# We load the runhistory, ...
rh_path = os.path.join(old_output_dir, "runhistory.json")
runhistory = RunHistory(aggregate_func=None)
runhistory.load_json(rh_path, new_scenario.cs)
# ... stats, ...
stats_path = os.path.join(old_output_dir, "stats.json")
stats = Stats(new_scenario)
stats.load(stats_path)
# ... and trajectory.
traj_path = os.path.join(old_output_dir, "traj_aclib2.json")
trajectory = TrajLogger.read_traj_aclib_format(
fn=traj_path, cs=new_scenario.cs)
incumbent = trajectory[-1]["incumbent"]
# Now we can initialize SMAC with the recovered objects and restore the
# state where we left off. By providing stats and a restore_incumbent, SMAC
# automatically detects the intention of restoring a state.
smac = SMAC(scenario=new_scenario,
runhistory=runhistory,
tae_kwargs: Optional[dict]
arguments passed to constructor of '~tae'
"""
self.logger = logging.getLogger(
self.__module__ + "." + self.__class__.__name__)
self.n_iterations = n_iterations
self.scenario = scenario
self.run_id, self.rng = get_rng(rng, run_id, self.logger)
self.kwargs = kwargs
self.output_dir = None
self.top_dir = None
self.solver = None
self.portfolio = None
self.rh = RunHistory(average_cost)
self._tae = tae
self._tae_kwargs = tae_kwargs
if incs_per_round <= 0:
self.logger.warning('Invalid value in %s: %d. Setting to 1', 'incs_per_round', incs_per_round)
self.incs_per_round = max(incs_per_round, 1)
if n_optimizers <= 0:
self.logger.warning('Invalid value in %s: %d. Setting to 1', 'n_optimizers', n_optimizers)
self.n_optimizers = max(n_optimizers, 1)
self.val_set = self._get_validation_set(val_set)
self.cost_per_inst = {}
self.optimizer = None
self.portfolio_cost = None
# max_runs_epm is the maximum total number of runs considered for epm to limit maximum possible number configs
max_configs = int(self.max_runs_epm / (len(scenario.train_insts) + len(scenario.test_insts)))
if len(all_configs) > max_configs:
self.logger.debug("Limiting number of configs to train epm from %d to %d (based on max runs %d) and "
"choosing the ones with the most runs (for parallel coordinates)",
len(all_configs), max_configs, self.max_runs_epm)
all_configs = sorted(all_configs,
key=lambda c: len(original_rh.get_runs_for_config(c, only_max_observed_budget=False)))
all_configs = all_configs[:max_configs]
if not default in all_configs:
all_configs = [default] + all_configs
if not incumbent in all_configs:
all_configs.append(incumbent)
# Get costs for those configurations
epm_rh = RunHistory()
epm_rh.update(validated_rh)
if scenario.feature_dict: # if instances are available
epm_rh.update(timing(validator.validate_epm)(all_configs, 'train+test', 1, runhistory=validated_rh))
config_to_cost = OrderedDict({c : epm_rh.get_cost(c) for c in all_configs})
data = OrderedDict()
data['cost'] = list(config_to_cost.values())
for hp in self.runscontainer.scenario.cs.get_hyperparameter_names():
data[hp] = np.array([c[hp] #if hp in c.get_dictionary() and not isinstance(c[hp], str) else np.nan
for c in config_to_cost.keys()])
df = pd.DataFrame(data=data)
return df
self.logger.debug("Ranges: %s", str(ranges))
for r in range(len(ranges))[1:]:
if ranges[r] <= ranges[r-1]:
if ranges[r-1] + 1 >= len(as_list):
raise RuntimeError("There was a problem with the quantiles of the configuration footprint. "
"Please report this Error on \"https://github.com/automl/CAVE/issues\" and provide the debug.txt-file.")
ranges[r] = ranges[r-1] + 1
self.logger.debug("Fixed ranges to: %s", str(ranges))
# Sanity check
if not ranges[0] == 0 or not ranges[-1] == len(as_list) or not len(ranges) == quantiles + 1:
raise RuntimeError("Sanity check on range-creation in configurator footprint went wrong. "
"Please report this Error on \"https://github.com/automl/CAVE/issues\" and provide the debug.txt-file.")
tmp_rh = RunHistory(average_cost)
for i, j in zip(ranges[:-1], ranges[1:]):
for idx in range(i, j):
k, v = as_list[idx]
tmp_rh.add(config=rh.ids_config[k.config_id],
cost=v.cost, time=v.time, status=v.status,
instance_id=k.instance_id, seed=k.seed,
additional_info=v.additional_info)
if timestamps:
labels.append("{0:.2f}".format(timestamps[j - 1]))
r_p_q_p_c.append([len(tmp_rh.get_runs_for_config(c)) for c in conf_list])
self.logger.debug("Labels: " + str(labels))
return labels, r_p_q_p_c