How to use the pandera.Int function in pandera

To help you get started, we’ve selected a few pandera examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_dataframe_checks():
    schema = DataFrameSchema(
        columns={
            "col1": Column(Int),
            "col2": Column(Float),
            "col3": Column(String),
            "col4": Column(String),
        },
        checks=[
            Check(lambda df: df["col1"] < df["col2"]),
            Check(lambda df: df["col3"] == df["col4"]),
        ]
    )
    df = pd.DataFrame({
        "col1": [1, 2, 3],
        "col2": [2.0, 3.0, 4.0],
        "col3": ["foo", "bar", "baz"],
        "col4": ["foo", "bar", "baz"],
    })
github pandera-dev / pandera / tests / test_decorators.py View on Github external
def test_check_function_decorator_transform():
    """Test that transformer argument is in effect in check_input decorator."""

    in_schema = DataFrameSchema(
        {"column1": Column(Int)},
        transformer=lambda df: df.assign(column2="foo"))
    out_schema = DataFrameSchema(
        {"column1": Column(Int),
         "column2": Column(String)})

    @check_input(in_schema)
    @check_output(out_schema)
    def func_input_transform1(df):
        return df

    result1 = func_input_transform1(pd.DataFrame({"column1": [1, 2, 3]}))
    assert "column2" in result1

    @check_input(in_schema, 1)
    @check_output(out_schema, 1)
    def func_input_transform2(_, df):
github pandera-dev / pandera / tests / test_schemas.py View on Github external
def test_tail_dataframe_schema():
    df = pd.DataFrame({
        "col1": [i for i in range(100)] + [i for i in range(-1, -1001, -1)]
    })

    schema = DataFrameSchema(
        columns={"col1": Column(Int, Check(lambda s: s < 0))})

    # Validating with tail of 1000 should pass
    assert schema.validate(df, tail=1000).equals(df)
    with pytest.raises(errors.SchemaError):
        schema.validate(df)
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_column():
    schema = DataFrameSchema({
        "a": Column(Int, Check(lambda x: x > 0, element_wise=True))
    })
    data = pd.DataFrame({"a": [1, 2, 3]})
    assert isinstance(schema.validate(data), pd.DataFrame)
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def init_schema_element_wise():
        DataFrameSchema({
            "col1": Column(Int, [
                Check(lambda s: s["foo"] > 10,
                      element_wise=True,
                      groupby=["col2"]),
            ]),
            "col2": Column(String, Check(lambda s: s.isin(["foo", "bar"]))),
        })
github pandera-dev / pandera / tests / test_hypotheses.py View on Github external
def test_dataframe_hypothesis_checks():

    df = pd.DataFrame({
        "col1": range(100, 201),
        "col2": range(0, 101),
    })

    hypothesis_check_schema = DataFrameSchema(
        columns={
            "col1": Column(Int),
            "col2": Column(Int),
        },
        checks=[
            # two-sample test
            Hypothesis(
                test=stats.ttest_ind,
                samples=["col1", "col2"],
                relationship=lambda stat, pvalue, alpha=0.01: (
                    stat > 0 and pvalue / 2 < alpha
                ),
                relationship_kwargs={"alpha": 0.5},
            ),
            # one-sample test
            Hypothesis(
                test=stats.ttest_1samp,
                samples=["col1"],
github pandera-dev / pandera / tests / test_schemas.py View on Github external
def test_dataframe_schema():
    schema = DataFrameSchema(
        {
            "a": Column(Int,
                        Check(lambda x: x > 0, element_wise=True)),
            "b": Column(Float,
                        Check(lambda x: 0 <= x <= 10, element_wise=True)),
            "c": Column(String,
                        Check(lambda x: set(x) == {"x", "y", "z"})),
            "d": Column(Bool,
                        Check(lambda x: x.mean() > 0.5)),
            "e": Column(Category,
                        Check(lambda x: set(x) == {"c1", "c2", "c3"})),
            "f": Column(Object,
                        Check(lambda x: x.isin([(1,), (2,), (3,)]))),
            "g": Column(DateTime,
                        Check(lambda x: x >= pd.Timestamp("2015-01-01"),
                              element_wise=True)),
            "i": Column(Timedelta,
                        Check(lambda x: x < pd.Timedelta(10, unit="D"),
github pandera-dev / pandera / tests / test_checks.py View on Github external
def test_vectorized_checks():
    schema = SeriesSchema(
        Int, Check(
            lambda s: s.value_counts() == 2, element_wise=False))
    validated_series = schema.validate(pd.Series([1, 1, 2, 2, 3, 3]))
    assert isinstance(validated_series, pd.Series)

    # error case
    with pytest.raises(errors.SchemaError):
        schema.validate(pd.Series([1, 2, 3]))
github pandera-dev / pandera / tests / test_checks.py View on Github external
"col2": Column(String, Check(lambda s: s.isin(["foo", "bar"]))),
    })

    df = pd.DataFrame({
        "col1": [7, 8, 9, 11, 12, 13],
        "col2": ["bar", "bar", "bar", "foo", "foo", "foo"],
    })

    validated_df = schema.validate(df)
    assert isinstance(validated_df, pd.DataFrame)
    assert len(validated_df.columns) == 2
    assert set(validated_df.columns) == {"col1", "col2"}

    # raise KeyError when groups does not include a particular group name
    schema_fail_key_error = DataFrameSchema({
        "col1": Column(Int, [
            Check(lambda s: s["bar"] > 10, groupby="col2", groups="foo"),
        ]),
        "col2": Column(String, Check(lambda s: s.isin(["foo", "bar"]))),
    })
    with pytest.raises(KeyError, match="^'bar'"):
        schema_fail_key_error.validate(df)

    # raise KeyError when the group does not exist in the groupby column when
    # referenced in the Check function
    schema_fail_nonexistent_key_in_fn = DataFrameSchema({
        "col1": Column(Int, [
            Check(lambda s: s["baz"] > 10, groupby="col2", groups=["foo"]),
        ]),
        "col2": Column(String, Check(lambda s: s.isin(["foo", "bar"]))),
    })
    with pytest.raises(KeyError, match="^'baz'"):
github pandera-dev / pandera / tests / test_hypotheses.py View on Github external
samples=["col1"],
                relationship=lambda stat, pvalue, alpha=0.01: (
                    stat > 0 and pvalue / 2 < alpha
                ),
                test_kwargs={"popmean": 50},
                relationship_kwargs={"alpha": 0.01},
            ),
        ]
    )

    hypothesis_check_schema.validate(df)

    # raise error when using groupby
    hypothesis_check_schema_groupby = DataFrameSchema(
        columns={
            "col1": Column(Int),
            "col2": Column(Int),
        },
        checks=[
            # two-sample test
            Hypothesis(
                test=stats.ttest_ind,
                samples=["col1", "col2"],
                groupby="col3",
                relationship=lambda stat, pvalue, alpha=0.01: (
                    stat > 0 and pvalue / 2 < alpha
                ),
                relationship_kwargs={"alpha": 0.5},
            ),
        ]
    )
    with pytest.raises(errors.SchemaDefinitionError):