Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def versioned_pickle_data_set(filepath_pkl, load_version, save_version):
return PickleLocalDataSet(
filepath=filepath_pkl, version=Version(load_version, save_version)
)
def test_version_str_repr(self, load_version, save_version):
"""Test that version is in string representation of the class instance
when applicable."""
filepath = "test.json"
ds = JSONDataSet(filepath=filepath)
ds_versioned = JSONDataSet(
filepath=filepath, version=Version(load_version, save_version)
)
assert filepath in str(ds)
assert "version" not in str(ds)
assert filepath in str(ds_versioned)
ver_str = "version=Version(load={}, save='{}')".format(
load_version, save_version
)
assert ver_str in str(ds_versioned)
assert "JSONDataSet" in str(ds_versioned)
assert "JSONDataSet" in str(ds)
assert "protocol" in str(ds_versioned)
assert "protocol" in str(ds)
def test_version_str_repr(self, load_version, save_version):
"""Test that version is in string representation of the class instance
when applicable."""
filepath = "test.xlsx"
ds = ExcelLocalDataSet(filepath=filepath)
ds_versioned = ExcelLocalDataSet(
filepath=filepath, version=Version(load_version, save_version)
)
assert filepath in str(ds)
assert "version" not in str(ds)
assert filepath in str(ds_versioned)
ver_str = "version=Version(load={}, save='{}')".format(
load_version, save_version
)
assert ver_str in str(ds_versioned)
def versioned_csv_data_set(filepath_csv, load_version, save_version):
return CSVDataSet(
filepath=filepath_csv, version=Version(load_version, save_version)
)
def test_sequential_load_from_disk(
self, dummy_dataframe, filepath, versioned_csv_data_set
):
"""Tests if the correct load version is logged when two versions are saved in
disk."""
save_version_1 = "2019-01-01T23.00.00.000Z"
save_version_2 = "2019-01-01T23.59.59.999Z"
CSVLocalDataSet(
filepath=filepath,
save_args={"sep": ","},
version=Version(None, save_version_1),
).save(dummy_dataframe)
CSVLocalDataSet(
filepath=filepath,
save_args={"sep": ","},
version=Version(None, save_version_2),
).save(dummy_dataframe)
versioned_csv_data_set.load()
last_load_version = versioned_csv_data_set.get_last_load_version()
assert last_load_version == save_version_2
project_name = "bob"
project_version = __version__
def _get_pipelines(self) -> Dict[str, Pipeline]:
return {"__default__": Pipeline([node(identity, "cars", "boats")])}
mocker.patch("logging.config.dictConfig")
dummy_context = DummyContext(str(tmp_path))
filepath = str(dummy_context.project_path / "cars.csv")
old_save_version = generate_timestamp()
old_df = pd.DataFrame({"col1": [0, 0], "col2": [0, 0], "col3": [0, 0]})
old_csv_data_set = CSVLocalDataSet(
filepath=filepath,
save_args={"sep": ","},
version=Version(None, old_save_version),
)
old_csv_data_set.save(old_df)
new_save_version = generate_timestamp()
new_csv_data_set = CSVLocalDataSet(
filepath=filepath,
save_args={"sep": ","},
version=Version(None, new_save_version),
)
new_csv_data_set.save(dummy_dataframe)
load_versions = {"cars": old_save_version}
dummy_context.run(load_versions=load_versions)
assert not dummy_context.catalog.load("boats").equals(dummy_dataframe)
assert dummy_context.catalog.load("boats").equals(old_df)
def versioned_parquet_data_set(data_path, load_version, save_version):
return ParquetLocalDataSet(
filepath=data_path, version=Version(load_version, save_version)
)
def versioned_txt_data_set(filepath_txt, load_version, save_version):
return TextLocalDataSet(
filepath=filepath_txt, version=Version(load_version, save_version)
)
def test_version_str_repr(self, load_version, save_version):
"""Test that version is in string representation of the class instance
when applicable."""
ds = CSVS3DataSet(filepath=FILENAME, bucket_name=BUCKET_NAME)
ds_versioned = CSVS3DataSet(
filepath=FILENAME,
bucket_name=BUCKET_NAME,
version=Version(load_version, save_version),
)
assert FILENAME in str(ds)
assert "version" not in str(ds)
assert FILENAME in str(ds_versioned)
ver_str = "version=Version(load={}, save='{}')".format(
load_version, save_version
)
assert ver_str in str(ds_versioned)
def test_version_str_repr(self, load_version, save_version):
"""Test that version is in string representation of the class instance
when applicable."""
filepath = "test.hdf"
ds = HDFLocalDataSet(filepath=filepath, key="test_hdf")
ds_versioned = HDFLocalDataSet(
filepath=filepath,
key="test_hdf",
version=Version(load_version, save_version),
)
assert filepath in str(ds)
assert "version" not in str(ds)
assert filepath in str(ds_versioned)
ver_str = "version=Version(load={}, save='{}')".format(
load_version, save_version
)
assert ver_str in str(ds_versioned)