Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_automl_sequential_wrapper(tmpdir):
# Given
data_inputs = np.array(range(100))
expected_outputs = np.array(range(100, 200))
hyperparameter_space = HyperparameterSpace({
'multiplication_1__multiply_by': RandInt(1, 3),
'multiplication_2__multiply_by': RandInt(1, 3),
'multiplication_3__multiply_by': RandInt(1, 3),
})
pipeline = Pipeline([
('multiplication_1', MultiplyByN()),
('multiplication_2', MultiplyByN()),
('multiplication_3', MultiplyByN())
], cache_folder=tmpdir).set_hyperparams_space(hyperparameter_space)
auto_ml = RandomSearch(pipeline, hyperparams_repository=HyperparamsJSONRepository(tmpdir), n_iter=100)
# When
auto_ml: AutoMLSequentialWrapper = auto_ml.fit(data_inputs, expected_outputs)
best_model: Pipeline = auto_ml.get_best_model()
predicted_outputs = best_model.transform(data_inputs)
from neuraxle.base import MetaStepMixin, BaseStep, NonFittableMixin, NonTransformableMixin
from neuraxle.hyperparams.distributions import RandInt, Boolean
from neuraxle.hyperparams.space import HyperparameterSpace, HyperparameterSamples
from neuraxle.steps.loop import StepClonerForEachDataInput
from testing.test_pipeline import SomeStep
SOME_STEP_HP_KEY = 'somestep_hyperparam'
RAND_INT_SOME_STEP = RandInt(-10, 0)
RAND_INT_STEP_CLONER = RandInt(0, 10)
META_STEP_HP = 'metastep_hyperparam'
SOME_STEP_HP = "SomeStep__somestep_hyperparam"
META_STEP_HP_VALUE = 1
SOME_STEP_HP_VALUE = 2
HYPE_SPACE = HyperparameterSpace({
"a__test": Boolean()
})
HYPE_SAMPLE = HyperparameterSamples({
"a__test": True
})
def test_hyperparam_space():
p = Pipeline([
AddFeatures([
SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})),
SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)}))
]),
ModelStacking([
SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})),
SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)}))
],
joiner=NumpyTranspose(),
judge=SomeStep(hyperparams_space=HyperparameterSpace({"alpha": LogUniform(0.1, 10.0)}))
)
])
rvsed = p.get_hyperparams_space()
p.set_hyperparams(rvsed)
hyperparams = p.get_hyperparams()
assert 'AddFeatures__SomeStep1__n_components' in hyperparams.keys()
assert 'AddFeatures__SomeStep__n_components' in hyperparams.keys()
def test_step_cloner_should_get_hyperparams_space():
p = StepClonerForEachDataInput(SomeStep())
p.set_hyperparams_space(HyperparameterSpace({
META_STEP_HP: RAND_INT_STEP_CLONER,
SOME_STEP_HP: RAND_INT_SOME_STEP
}))
hyperparams_space = p.get_hyperparams_space()
assert hyperparams_space[META_STEP_HP] == RAND_INT_STEP_CLONER
assert hyperparams_space[SOME_STEP_HP] == RAND_INT_SOME_STEP
RAND_INT_META_STEP = RandInt(0, 10)
def test_meta_step_mixin_should_get_hyperparams():
p = SomeMetaStepMixin(SomeStep())
p.set_hyperparams(HyperparameterSamples({
META_STEP_HP: META_STEP_HP_VALUE,
SOME_STEP_HP: SOME_STEP_HP_VALUE
}))
hyperparams = p.get_hyperparams()
assert hyperparams[META_STEP_HP] == META_STEP_HP_VALUE
assert hyperparams[SOME_STEP_HP] == SOME_STEP_HP_VALUE
def test_meta_step_mixin_should_set_hyperparams():
def test_hyperparam_space():
p = Pipeline([
AddFeatures([
SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)})),
SomeStep(hyperparams_space=HyperparameterSpace({"n_components": RandInt(1, 5)}))
]),
ModelStacking([
SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
SomeStep(hyperparams_space=HyperparameterSpace({"n_estimators": RandInt(1, 1000)})),
SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)})),
SomeStep(hyperparams_space=HyperparameterSpace({"max_depth": RandInt(1, 100)}))
],
joiner=NumpyTranspose(),
judge=SomeStep(hyperparams_space=HyperparameterSpace({"alpha": LogUniform(0.1, 10.0)}))
)
])
rvsed = p.get_hyperparams_space()
p.set_hyperparams(rvsed)
hyperparams = p.get_hyperparams()
assert 'AddFeatures__SomeStep1__n_components' in hyperparams.keys()
assert 'AddFeatures__SomeStep__n_components' in hyperparams.keys()
assert 'AddFeatures__SomeStep1__n_components' in hyperparams.keys()
assert 'ModelStacking__SomeStep__n_estimators' in hyperparams.keys()
assert 'ModelStacking__SomeStep1__n_estimators' in hyperparams.keys()
def main():
boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
# Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
AddFeatures([
SKLearnWrapper(
PCA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
SKLearnWrapper(
FastICA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),
"learning_rate": LogUniform(0.07, 0.7)
})
),
SKLearnWrapper(
KMeans(),
def main():
p = Pipeline([
('step1', MultiplyByN()),
('step2', MultiplyByN()),
Pipeline([
Identity(),
Identity(),
SKLearnWrapper(PCA(n_components=4))
])
])
p.set_hyperparams_space(HyperparameterSpace({
'step1__multiply_by': RandInt(42, 50),
'step2__multiply_by': RandInt(-10, 0),
'Pipeline__SKLearnWrapper_PCA__n_components': RandInt(2, 3)
}))
samples = p.get_hyperparams_space().rvs()
p.set_hyperparams(samples)
samples = p.get_hyperparams()
assert 42 <= samples['step1__multiply_by'] <= 50
assert -10 <= samples['step2__multiply_by'] <= 0
assert samples['Pipeline__SKLearnWrapper_PCA__n_components'] in [2, 3]
assert p['Pipeline']['SKLearnWrapper_PCA'].get_wrapped_sklearn_predictor().n_components in [2, 3]
p = Pipeline([
AddFeatures([
SKLearnWrapper(
PCA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
SKLearnWrapper(
FastICA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),
"learning_rate": LogUniform(0.07, 0.7)
})
),
SKLearnWrapper(
KMeans(),
HyperparameterSpace({"n_clusters": RandInt(5, 10)})
),
],
joiner=NumpyTranspose(),
judge=SKLearnWrapper(
Ridge(),
HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})
),
)
])
print("Meta-fitting on train:")
def narrow_space_from_best_guess(self, best_guess, kept_space_ratio: float = 0.5) -> HyperparameterDistribution:
"""
Will narrow the underlying distribution towards the best guess.
:param best_guess: the value towards which we want to narrow down the space. Should be between 0.0 and 1.0.
:param kept_space_ratio: what proportion of the space is kept. Default is to keep half the space (0.5).
:return: a new HyperparameterDistribution that has been narrowed down.
"""
lost_space_ratio = 1.0 - kept_space_ratio
new_min_included = round(self.min_included * kept_space_ratio + best_guess * lost_space_ratio)
new_max_included = round(self.max_included * kept_space_ratio + best_guess * lost_space_ratio)
if new_max_included <= new_min_included or kept_space_ratio == 0.0:
return FixedHyperparameter(best_guess).was_narrowed_from(kept_space_ratio, self)
return RandInt(new_min_included, new_max_included).was_narrowed_from(kept_space_ratio, self)