Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_selector_assert_equivalent() -> None:
select1 = tools.Selector(columns=["a", "b"], data=[[0, 1], [2, 3]])
select2 = tools.Selector(columns=["b", "a"], data=[[3, 2], [1, 0]])
select3 = tools.Selector(columns=["a", "b"], data=[[0, 5], [2, 3]])
select1.assert_equivalent(select2)
np.testing.assert_raises(AssertionError, select1.assert_equivalent, select3)
def test_xp_plotter() -> None:
opt = "OnePlusOneOptimizer"
df = tools.Selector.read_csv(Path(__file__).parent / "sphere_perf_example.csv").select(optimizer_name=[opt])
data = plotting.XpPlotter.make_data(df)
# check data
testing.assert_set_equal(data.keys(), {opt})
testing.assert_set_equal(data[opt].keys(), {"budget", "loss", "loss_std", "num_eval"})
np.testing.assert_almost_equal(data[opt]["budget"], [200, 400, 800])
np.testing.assert_almost_equal(data[opt]["loss"], [0.4811605, 0.3920045, 0.14778369])
np.testing.assert_almost_equal(data[opt]["loss_std"], [0.83034832, 0.73255529, 0.18551625])
# plot
with patch('matplotlib.pyplot.Figure.tight_layout'): # avoid warning message
plotter = plotting.XpPlotter(data, title="Title")
with patch('matplotlib.pyplot.Figure.savefig'):
plotter.save("should_not_exist.png")
random_seed = np.random.randint(1000)
signature = inspect.signature(maker)
if not signature.parameters:
return # not designed to be seedable
# draw twice with same random seed_generator and once with a different one
results = []
algo = "OnePlusOne" # for simplifying the test
for seed in [random_seed, random_seed, random_seed + 1]:
xps = list(itertools.islice(maker(seed), 0, 8))
for xp in xps:
if isinstance(xp.function, rl.agents.TorchAgentFunction):
xp.function._num_test_evaluations = 1 # patch for faster evaluation
simplified = [Experiment(xp.function, algo, budget=2, num_workers=min(2, xp.optimsettings.num_workers), seed=xp.seed) for xp in xps]
np.random.shuffle(simplified) # compute in any order
selector = Selector(data=[xp.run() for xp in simplified])
results.append(Selector(selector.loc[:, ["loss", "seed"]])) # elapsed_time can vary...
results[0].assert_equivalent(results[1], f"Non identical outputs for seed={random_seed}")
np.testing.assert_raises(
AssertionError, results[1].assert_equivalent, results[2], f"Identical output with different seeds (seed={random_seed})"
)
def _make_winners_df(df: pd.DataFrame, all_optimizers: List[str]) -> tools.Selector:
"""Finds mean loss over all runs for each of the optimizers, and creates a matrix
winner_ij = 1 if opt_i is better (lower loss) then opt_j (and .5 for ties)
"""
if not isinstance(df, tools.Selector):
df = tools.Selector(df)
all_optim_set = set(all_optimizers)
assert all(x in all_optim_set for x in df.unique("optimizer_name"))
assert all(x in df.columns for x in ["optimizer_name", "loss"])
winners = tools.Selector(index=all_optimizers, columns=all_optimizers, data=0.0)
grouped = df.loc[:, ["optimizer_name", "loss"]].groupby(["optimizer_name"]).mean()
df_optimizers = list(grouped.index)
values = np.array(grouped)
diffs = values - values.T
# loss_ij = 1 means opt_i beats opt_j once (beating means getting a lower loss/regret)
winners.loc[df_optimizers, df_optimizers] = (diffs < 0) + 0.5 * (diffs == 0)
return winners
if "error" not in df.columns: # backward compatibility
return df # type: ignore
# errors with no recommendation
errordf = df.select(error=lambda x: isinstance(x, str) and x, loss=np.isnan)
for row in errordf.itertuples():
print(f'Removing "{row.optimizer_name}" with dimension {row.dimension}: got error "{row.error}".')
# error with recoreded recommendation
handlederrordf = df.select(error=lambda x: isinstance(x, str) and x, loss=lambda x: not np.isnan(x))
for row in handlederrordf.itertuples():
print(
f'Keeping non-optimal recommendation of "{row.optimizer_name}" ' f'with dimension {row.dimension} which raised "{row.error}".'
)
err_inds = set(errordf.index)
output = df.loc[[i for i in df.index if i not in err_inds], [c for c in df.columns if c != "error"]]
assert not output.loc[:, "loss"].isnull().values.any(), "Some nan values remain while there should not be any!"
output = tools.Selector(output.reset_index(drop=True))
return output # type: ignore
Example
-------
df.select(column1=["a", "b"])
will return a new Selector with rows having either "a" or "b" as value in column1
"""
df = self
for name, criterion in kwargs.items():
if isinstance(criterion, abc.Iterable) and not isinstance(criterion, str):
selected = df.loc[:, name].isin(criterion)
elif callable(criterion):
selected = [bool(criterion(x)) for x in df.loc[:, name]]
else:
selected = df.loc[:, name].isin([criterion])
df = df.loc[selected, :]
return Selector(df)
else: # computation was started but interrupted (eg: KeyboardInterrupt)
if xp != self._current_experiment:
warnings.warn(f"Could not resume unfinished xp: {self._current_experiment}")
self._current_experiment = xp
else:
opt = self._current_experiment._optimizer
if opt is not None:
print(f"Resuming existing experiment from iteration {opt.num_ask}.", flush=True)
self._current_experiment.run()
summary = self._current_experiment.get_description()
if process_function is not None:
process_function(self, self._current_experiment)
self.summaries.append(summary)
self._current_experiment = None
print(f"Finished {indstr}", flush=True)
return tools.Selector(data=self.summaries)
"""
assert xpaxis in ["budget", "pseudotime"]
df = remove_errors(df)
df.loc[:, "loss"] = pd.to_numeric(df.loc[:, "loss"])
df = tools.Selector(df.fillna("N-A")) # remove NaN in non score values
assert not any("Unnamed: " in x for x in df.columns), f"Remove the unnamed index column: {df.columns}"
assert "error " not in df.columns, f"Remove error rows before plotting"
required = {"optimizer_name", "budget", "loss", "elapsed_time", "elapsed_budget"}
missing = required - set(df.columns)
assert not missing, f"Missing fields: {missing}"
output_folder = Path(output_folder)
os.makedirs(output_folder, exist_ok=True)
# check which descriptors do vary
descriptors = sorted(set(df.columns) - (required | {"seed", "pseudotime"})) # all other columns are descriptors
to_drop = [x for x in descriptors if len(df.unique(x)) == 1]
df = tools.Selector(df.loc[:, [x for x in df.columns if x not in to_drop]])
descriptors = sorted(set(df.columns) - (required | {"seed", "pseudotime"})) # now those should be actual interesting descriptors
print(f"Descriptors: {descriptors}")
print("# Fight plots")
#
# fight plot
# choice of the combination variables to fix
fight_descriptors = descriptors + ["budget"] # budget can be used as a descriptor for fight plots
combinable = [x for x in fight_descriptors if len(df.unique(x)) > 1] # should be all now
num_rows = 6
# For the competence map case we must consider pairs of attributes, hence maxcomb_size >= 2.
# A competence map shows for each value of each of two attributes which algorithm was best.
if competencemaps:
max_combsize = max(max_combsize, 2)
for fixed in list(itertools.chain.from_iterable(itertools.combinations(combinable, order) for order in range(max_combsize + 1))):
orders = [len(c) for c in df.unique(fixed)]