Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.assertEqual(
flow.upload_date,
flow.components['lr'].upload_date,
msg=(
flow.name,
flow.flow_id,
flow.components['lr'].name, flow.components['lr'].flow_id,
),
)
clf1 = sklearn.tree.DecisionTreeClassifier(max_depth=2)
flow1 = self.extension.model_to_flow(clf1)
flow1, sentinel = self._add_sentinel_to_flow_name(flow1, None)
flow1.publish()
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
flow1.flow_id))
# In order to assign different upload times to the flows!
time.sleep(1)
clf2 = sklearn.ensemble.VotingClassifier(
[('dt', sklearn.tree.DecisionTreeClassifier(max_depth=2))])
flow2 = self.extension.model_to_flow(clf2)
flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel)
flow2.publish()
TestBase._mark_entity_for_removal('flow', (flow2.flow_id, flow2.name))
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
flow2.flow_id))
# If one component was published before the other, the components in
# the flow should have different upload dates
self.assertNotEqual(flow2.upload_date,
language='English',
licence='MIT',
default_target_attribute='col_{}'.format(data.shape[1] - 1),
row_id_attribute=None,
ignore_attribute=None,
citation='None',
attributes=attributes,
data=data,
version_label='test',
original_data_url='http://openml.github.io/openml-python',
paper_url='http://openml.github.io/openml-python'
)
dataset.publish()
TestBase._mark_entity_for_removal('data', dataset.id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
dataset.id))
self.assertEqual(
_get_online_dataset_arff(dataset.id),
dataset._dataset,
"Uploaded arff does not match original one"
)
self.assertEqual(
_get_online_dataset_format(dataset.id),
'arff',
"Wrong format for dataset"
)
def test_sklearn_to_flow_list_of_lists(self):
from sklearn.preprocessing import OrdinalEncoder
ordinal_encoder = OrdinalEncoder(categories=[[0, 1], [0, 1]])
extension = openml.extensions.sklearn.SklearnExtension()
# Test serialization works
flow = extension.model_to_flow(ordinal_encoder)
# Test flow is accepted by server
self._add_sentinel_to_flow_name(flow)
flow.publish()
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))
# Test deserialization works
server_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True)
self.assertEqual(server_flow.parameters['categories'], '[[0, 1], [0, 1]]')
self.assertEqual(server_flow.model.categories, flow.model.categories)
contributor=None,
collection_date=None,
language='English',
licence=None,
default_target_attribute='y',
row_id_attribute=None,
ignore_attribute=None,
citation=None,
attributes=column_names,
data=sparse_data,
version_label='test',
)
xor_dataset.publish()
TestBase._mark_entity_for_removal('data', xor_dataset.id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
xor_dataset.id))
self.assertEqual(
_get_online_dataset_arff(xor_dataset.id),
xor_dataset._dataset,
"Uploaded ARFF does not match original one"
)
self.assertEqual(
_get_online_dataset_format(xor_dataset.id),
'sparse_arff',
"Wrong format for dataset"
)
def _existing_setup_exists(self, classif):
flow = self.extension.model_to_flow(classif)
flow.name = 'TEST%s%s' % (get_sentinel(), flow.name)
flow.publish()
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))
# although the flow exists, we can be sure there are no
# setups (yet) as it hasn't been ran
setup_id = openml.setups.setup_exists(flow)
self.assertFalse(setup_id)
setup_id = openml.setups.setup_exists(flow)
self.assertFalse(setup_id)
# now run the flow on an easy task:
task = openml.tasks.get_task(115) # diabetes
run = openml.runs.run_flow_on_task(flow, task)
# spoof flow id, otherwise the sentinel is ignored
run.flow_id = flow.flow_id
run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
('hotencoding', sklearn.preprocessing.OneHotEncoder(**ohe_params)),
(
'variencethreshold',
sklearn.feature_selection.VarianceThreshold(),
),
('classifier', sklearn.tree.DecisionTreeClassifier())
]
complicated = sklearn.pipeline.Pipeline(steps=steps)
for classifier in [nb, complicated]:
flow = self.extension.model_to_flow(classifier)
flow, _ = self._add_sentinel_to_flow_name(flow, None)
# publish the flow
flow = flow.publish()
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
flow.flow_id))
# redownload the flow
flow = openml.flows.get_flow(flow.flow_id)
# check if flow exists can find it
flow = openml.flows.get_flow(flow.flow_id)
downloaded_flow_id = openml.flows.flow_exists(
flow.name,
flow.external_version,
)
self.assertEqual(downloaded_flow_id, flow.flow_id)
flow.publish()
# Not collecting flow_id for deletion since this is a test for failed upload
self.assertEqual(api_call_mock.call_count, 1)
self.assertEqual(get_flow_mock.call_count, 1)
self.assertEqual(flow_exists_mock.call_count, 1)
flow_copy = copy.deepcopy(flow)
flow_copy.name = flow_copy.name[:-1]
get_flow_mock.return_value = flow_copy
flow_exists_mock.return_value = 1
with self.assertRaises(ValueError) as context_manager:
flow.publish()
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
flow.flow_id))
fixture = (
"The flow on the server is inconsistent with the local flow. "
"The server flow ID is 1. Please check manually and remove "
"the flow if necessary! Error is:\n"
"'Flow sklearn.ensemble.forest.RandomForestClassifier: "
"values for attribute 'name' differ: "
"'sklearn.ensemble.forest.RandomForestClassifier'"
"\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'"
)
self.assertEqual(context_manager.exception.args[0], fixture)
self.assertEqual(get_flow_mock.call_count, 2)
def test_upload_dataset_with_url(self):
dataset = OpenMLDataset(
"%s-UploadTestWithURL" % self._get_sentinel(),
"test",
data_format="arff",
version=1,
url="https://www.openml.org/data/download/61/dataset_61_iris.arff",
)
dataset.publish()
TestBase._mark_entity_for_removal('data', dataset.dataset_id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
dataset.dataset_id))
self.assertIsInstance(dataset.dataset_id, int)
# from the past
try:
# in case the run did not exists yet
run = openml.runs.run_model_on_task(
model=clf,
task=task,
avoid_duplicate_runs=True,
)
self.assertEqual(
len(run.trace.trace_iterations),
num_iterations * num_folds,
)
run = run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
self._wait_for_processed_run(run.run_id, 200)
run_id = run.run_id
except openml.exceptions.OpenMLRunsExistError as e:
# The only error we expect, should fail otherwise.
run_ids = [int(run_id) for run_id in e.run_ids]
self.assertGreater(len(run_ids), 0)
run_id = random.choice(list(run_ids))
# now the actual unit test ...
run_trace = openml.runs.get_run_trace(run_id)
self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)