How to use the kedro.io.MemoryDataSet function in kedro

To help you get started, we’ve selected a few kedro examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github quantumblacklabs / kedro / tests / runner / test_parallel_runner.py View on Github external
def test_node_returning_none(self):
        pipeline = Pipeline([node(identity, "A", "B"), node(return_none, "B", "C")])
        catalog = DataCatalog({"A": MemoryDataSet("42")})
        pattern = "Saving `None` to a `DataSet` is not allowed"
        with pytest.raises(DataSetError, match=pattern):
            ParallelRunner().run(pipeline, catalog)
github quantumblacklabs / kedro / tests / io / test_data_catalog.py View on Github external
def memory_catalog():
    ds1 = MemoryDataSet({"data": 42})
    ds2 = MemoryDataSet([1, 2, 3, 4, 5])
    return DataCatalog({"ds1": ds1, "ds2": ds2})
github quantumblacklabs / kedro / tests / contrib / io / pyspark / test_memory_data_set.py View on Github external
def test_save_modify_original_data(spark_data_frame):
    """Check that the data set object is not updated when the original
    SparkDataFrame is changed."""
    memory_data_set = MemoryDataSet()
    memory_data_set.save(spark_data_frame)
    spark_data_frame = _update_spark_df(spark_data_frame, 1, 1, "new value")

    assert not _check_equals(memory_data_set.load(), spark_data_frame)
github quantumblacklabs / kedro / tests / runner / test_sequential_runner.py View on Github external
def conflicting_feed_dict(pandas_df_feed_dict):
    ds1 = MemoryDataSet({"data": 0})
    ds3 = pandas_df_feed_dict["ds3"]
    return {"ds1": ds1, "ds3": ds3}
github quantumblacklabs / kedro / tests / io / test_memory_data_set.py View on Github external
def test_exists(self, new_data):
        """Test `exists` method invocation"""
        data_set = MemoryDataSet()
        assert not data_set.exists()

        data_set.save(new_data)
        assert data_set.exists()
github quantumblacklabs / kedro / tests / runner / test_parallel_runner.py View on Github external
def test_memory_data_set_input(self, fan_out_fan_in):
        pipeline = Pipeline([fan_out_fan_in])
        catalog = DataCatalog({"A": MemoryDataSet("42")})
        result = ParallelRunner().run(pipeline, catalog)
        assert "Z" in result
        assert len(result["Z"]) == 3
        assert result["Z"] == ("42", "42", "42")
github quantumblacklabs / kedro / tests / contrib / io / pyspark / test_memory_data_set.py View on Github external
def memory_data_set(spark_data_frame):
    return MemoryDataSet(data=spark_data_frame)
github quantumblacklabs / kedro / kedro / runner / sequential_runner.py View on Github external
def create_default_data_set(self, ds_name: str) -> AbstractDataSet:
        """Factory method for creating the default data set for the runner.

        Args:
            ds_name: Name of the missing data set

        Returns:
            An instance of an implementation of AbstractDataSet to be used
            for all unregistered data sets.

        """
        return MemoryDataSet()
github Minyus / causallift / src / causallift / causal_lift.py View on Github external
if self.runner is None:
            self.df = bundle_train_and_test_data(args_raw, train_df, test_df)
            self.args = impute_cols_features(args_raw, self.df)
            self.args = schedule_propensity_scoring(self.args, self.df)
            self.treatment_fractions = treatment_fractions_(self.args, self.df)
            if self.args.need_propensity_scoring:
                self.propensity_model = fit_propensity(self.args, self.df)
                self.df = estimate_propensity(self.args, self.df, self.propensity_model)

        if self.runner:
            self.kedro_context.catalog.add_feed_dict(
                {
                    "train_df": MemoryDataSet(train_df),
                    "test_df": MemoryDataSet(test_df),
                    "args_raw": MemoryDataSet(args_raw),
                },
                replace=True,
            )
            self.kedro_context.catalog.add_feed_dict(dataset_catalog, replace=True)

            self.kedro_context.run(tags=["011_bundle_train_and_test_data"])
            self.df = self.kedro_context.catalog.load("df_00")

            self.kedro_context.run(
                tags=[
                    "121_prepare_args",
                    "131_treatment_fractions_",
                    "141_initialize_model",
                ]
            )
            self.args = self.kedro_context.catalog.load("args")
github quantumblacklabs / kedro / features / steps / pipeline_steps.py View on Github external
def set_catalog(context, key1, key2, key3, key4):
    ds1 = pd.DataFrame({"col1": [1, 2], "col2": [3, 4], "col3": [5, 6]})
    ds2 = pd.DataFrame({"col1": [9, 8], "col2": [7, 6], "col3": [5, 4]})
    context.catalog = DataCatalog(
        {
            key1: MemoryDataSet(ds1),
            key2: MemoryDataSet(),
            key3: MemoryDataSet(ds2),
            key4: MemoryDataSet(),
        }