How to use the pandera.Hypothesis.two_sample_ttest function in pandera

To help you get started, we’ve selected a few pandera examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pandera-dev / pandera / tests / test_hypotheses.py View on Github external
def test_hypothesis():
    # Example df for tests:
    df = (
        pd.DataFrame({
            "height_in_feet": [6.5, 7, 6.1, 5.1, 4],
            "sex": ["M", "M", "F", "F", "F"]
        })
    )

    # Initialise the different ways of calling a test:
    schema_pass_ttest_on_alpha_val_1 = DataFrameSchema({
        "height_in_feet": Column(Float, [
            Hypothesis.two_sample_ttest(
                sample1="M",
                sample2="F",
                groupby="sex",
                relationship="greater_than",
                alpha=0.5),
        ]),
        "sex": Column(String)
    })

    schema_pass_ttest_on_alpha_val_2 = DataFrameSchema({
        "height_in_feet": Column(Float, [
            Hypothesis(test=stats.ttest_ind,
                       samples=["M", "F"],
                       groupby="sex",
                       relationship="greater_than",
                       relationship_kwargs={"alpha": 0.5}
github pandera-dev / pandera / tests / test_pandera.py View on Github external
def test_hypothesis():
    # Example df for tests:
    df = (
        pd.DataFrame({
            "height_in_feet": [6.5, 7, 6.1, 5.1, 4],
            "sex": ["M", "M", "F", "F", "F"]
        })
    )

    # Initialise the different ways of calling a test:
    schema_pass_ttest_on_alpha_val_1 = DataFrameSchema({
        "height_in_feet": Column(Float, [
            Hypothesis.two_sample_ttest(
                sample1="M",
                sample2="F",
                groupby="sex",
                relationship="greater_than",
                alpha=0.5),
        ]),
        "sex": Column(String)
    })

    schema_pass_ttest_on_alpha_val_2 = DataFrameSchema({
        "height_in_feet": Column(Float, [
            Hypothesis(test=stats.ttest_ind,
                       samples=["M", "F"],
                       groupby="sex",
                       relationship="greater_than",
                       relationship_kwargs={"alpha": 0.5}
github pandera-dev / pandera / tests / test_pandera.py View on Github external
Hypothesis.two_sample_ttest(
                    sample1="M",
                    sample2="F",
                    groupby="sex",
                    relationship=relationship,
                    alpha=0.5),
            ]),
            "sex": Column(String)
        })
        assert isinstance(schema, DataFrameSchema)

    for relationship in ["foo", "bar", 1, 2, 3, None]:
        with pytest.raises(errors.SchemaError):
            DataFrameSchema({
                "height_in_feet": Column(Float, [
                    Hypothesis.two_sample_ttest(
                        sample1="M",
                        sample2="F",
                        groupby="sex",
                        relationship=relationship,
                        alpha=0.5),
                ]),
                "sex": Column(String)
            })
github pandera-dev / pandera / tests / test_hypotheses.py View on Github external
Hypothesis.two_sample_ttest(
                    sample1="M",
                    sample2="F",
                    groupby="sex",
                    relationship=relationship,
                    alpha=0.5),
            ]),
            "sex": Column(String)
        })
        assert isinstance(schema, DataFrameSchema)

    for relationship in ["foo", "bar", 1, 2, 3, None]:
        with pytest.raises(errors.SchemaError):
            DataFrameSchema({
                "height_in_feet": Column(Float, [
                    Hypothesis.two_sample_ttest(
                        sample1="M",
                        sample2="F",
                        groupby="sex",
                        relationship=relationship,
                        alpha=0.5),
                ]),
                "sex": Column(String)
            })
github pandera-dev / pandera / tests / test_pandera.py View on Github external
),
                relationship_kwargs={"alpha": 0.5}
            )
        ]),
        "sex": Column(String),
    })

    # Check the 3 happy paths are successful:
    schema_pass_ttest_on_alpha_val_1.validate(df)
    schema_pass_ttest_on_alpha_val_2.validate(df)
    schema_pass_ttest_on_alpha_val_3.validate(df)
    schema_pass_ttest_on_custom_relationship.validate(df)

    schema_fail_ttest_on_alpha_val_1 = DataFrameSchema({
        "height_in_feet": Column(Float, [
            Hypothesis.two_sample_ttest(
                sample1="M",
                sample2="F",
                groupby="sex",
                relationship="greater_than",
                alpha=0.05),
        ]),
        "sex": Column(String)
    })

    schema_fail_ttest_on_alpha_val_2 = DataFrameSchema({
        "height_in_feet": Column(Float, [
            Hypothesis(test=stats.ttest_ind,
                       samples=["M", "F"],
                       groupby="sex",
                       relationship="greater_than",
                       relationship_kwargs={"alpha": 0.05}),
github pandera-dev / pandera / tests / test_hypotheses.py View on Github external
),
                relationship_kwargs={"alpha": 0.5}
            )
        ]),
        "sex": Column(String),
    })

    # Check the 3 happy paths are successful:
    schema_pass_ttest_on_alpha_val_1.validate(df)
    schema_pass_ttest_on_alpha_val_2.validate(df)
    schema_pass_ttest_on_alpha_val_3.validate(df)
    schema_pass_ttest_on_custom_relationship.validate(df)

    schema_fail_ttest_on_alpha_val_1 = DataFrameSchema({
        "height_in_feet": Column(Float, [
            Hypothesis.two_sample_ttest(
                sample1="M",
                sample2="F",
                groupby="sex",
                relationship="greater_than",
                alpha=0.05),
        ]),
        "sex": Column(String)
    })

    schema_fail_ttest_on_alpha_val_2 = DataFrameSchema({
        "height_in_feet": Column(Float, [
            Hypothesis(test=stats.ttest_ind,
                       samples=["M", "F"],
                       groupby="sex",
                       relationship="greater_than",
                       relationship_kwargs={"alpha": 0.05}),