How to use the pandera.errors function in pandera

To help you get started, we’ve selected a few pandera examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pandera-dev / pandera / tests / test_hypotheses.py View on Github external
schema_fail_ttest_on_alpha_val_3 = DataFrameSchema({
        "height_in_feet": Column(Float, [
            Hypothesis.two_sample_ttest(
                sample1="M",
                sample2="F",
                groupby="sex",
                relationship="greater_than",
                alpha=0.05),
        ]),
        "sex": Column(String)
    })

    with pytest.raises(errors.SchemaError):
        schema_fail_ttest_on_alpha_val_1.validate(df)
    with pytest.raises(errors.SchemaError):
        schema_fail_ttest_on_alpha_val_2.validate(df)
    with pytest.raises(errors.SchemaError):
        schema_fail_ttest_on_alpha_val_3.validate(df)
github pandera-dev / pandera / tests / test_schemas.py View on Github external
"b": [1.1, 2.5, 9.9],
            "c": ["z", "y", "x"],
            "d": [True, True, False],
            "e": pd.Series(["c2", "c1", "c3"], dtype="category"),
            "f": [(3,), (2,), (1,)],
            "g": [pd.Timestamp("2015-02-01"),
                  pd.Timestamp("2015-02-02"),
                  pd.Timestamp("2015-02-03")],
            "i": [pd.Timedelta(1, unit="D"),
                  pd.Timedelta(5, unit="D"),
                  pd.Timedelta(9, unit="D")]
        })
    assert isinstance(schema.validate(df), pd.DataFrame)

    # error case
    with pytest.raises(errors.SchemaError):
        schema.validate(df.drop("a", axis=1))

    with pytest.raises(errors.SchemaError):
        schema.validate(df.assign(a=[-1, -2, -1]))

    # checks if 'a' is converted to float, while schema says int, will a schema
    # error be thrown
    with pytest.raises(errors.SchemaError):
        schema.validate(df.assign(a=[1.7, 2.3, 3.1]))
github pandera-dev / pandera / tests / test_checks.py View on Github external
# can't use groupby in Checks where element_wise == True
    with pytest.raises(
            errors.SchemaInitError,
            match=r"^Cannot use groupby when element_wise=True."):
        init_schema_element_wise()

    # raise errors.SchemaInitError even when the schema doesn't specify column
    # key for groupby column
    def init_schema_no_groupby_column():
        DataFrameSchema({
            "col1": Column(Int, [
                Check(lambda s: s["foo"] > 10, groupby=["col2"]),
            ]),
        })

    with pytest.raises(errors.SchemaInitError):
        init_schema_no_groupby_column()

    # can't use groupby argument in SeriesSchema or Index objects
    for SchemaClass in [SeriesSchema, Index]:
        with pytest.raises(
                errors.SchemaInitError,
                match="^Cannot use groupby checks with"):
            SchemaClass(Int, Check(lambda s: s["bar"] == 1, groupby="foo"))
github pandera-dev / pandera / tests / test_decorators.py View on Github external
def test_check_function_decorator_errors():
    """Test that the check_input and check_output decorators error properly."""
    # case 1: checks that the input and output decorators error when different
    # types are passed in and out
    @check_input(DataFrameSchema({"column1": Column(Int)}))
    @check_output(DataFrameSchema({"column2": Column(Float)}))
    def test_func(df):
        return df

    with pytest.raises(
            errors.SchemaError,
            match=r"^error in check_input decorator of function"):
        test_func(pd.DataFrame({"column2": ["a", "b", "c"]}))

    with pytest.raises(
            errors.SchemaError,
            match=r"^error in check_output decorator of function"):
        test_func(pd.DataFrame({"column1": [1, 2, 3]}))

    # case 2: check that if the input decorator refers to an index that's not
    # in the function signature, it will fail in a way that's easy to interpret
    @check_input(DataFrameSchema({"column1": Column(Int)}), 1)
    def test_incorrect_check_input_index(df):
        return df

    with pytest.raises(
            errors.SchemaError,
github pandera-dev / pandera / pandera / hypotheses.py View on Github external
def _relationships(self, relationship: Union[str, Callable]):
        """Impose a relationship on a supplied Test function.

        :param relationship: represents what relationship conditions are
            imposed on the hypothesis test. A function or lambda function can
            be supplied. If a string is provided, a lambda function will be
            returned from Hypothesis.relationships. Available relationships
            are: "greater_than", "less_than", "not_equal"

        """
        if isinstance(relationship, str):
            if relationship not in self._RELATIONSHIPS:
                raise errors.SchemaError(
                    "The relationship %s isn't a built in method"
                    % relationship)
            else:
                relationship = self._RELATIONSHIPS[relationship]
        elif not callable(relationship):
            raise ValueError(
                "expected relationship to be str or callable, found %s" % type(
                    relationship)
            )
        return relationship
github pandera-dev / pandera / pandera / schema_components.py View on Github external
... })
        >>>
        >>> schema.validate(pd.DataFrame({"column": ["foo", "bar"]}))
          column
        0    foo
        1    bar

        See :ref:`here` for more usage details.
        """
        super(Column, self).__init__(
            pandas_dtype, checks, nullable, allow_duplicates, coerce)
        self.required = required
        self.pandas_dtype = pandas_dtype

        if coerce and pandas_dtype is None:
            raise errors.SchemaInitError(
                "Must specify dtype if coercing a Column's type")
github pandera-dev / pandera / pandera / decorators.py View on Github external
)
                    )
        elif isinstance(obj_getter, str):
            if obj_getter in kwargs:
                kwargs[obj_getter] = schema.validate(kwargs[obj_getter])
            else:
                arg_spec_args = _get_fn_argnames(fn)
                args_dict = OrderedDict(
                    zip(arg_spec_args, args))
                args_dict[obj_getter] = schema.validate(args_dict[obj_getter])
                args = list(args_dict.values())
        elif obj_getter is None:
            try:
                args[0] = schema.validate(
                    args[0], head, tail, sample, random_state)
            except errors.SchemaError as e:
                raise errors.SchemaError(
                    "error in check_input decorator of function '%s': %s" %
                    (fn.__name__, e))
        else:
            raise ValueError(
                "obj_getter is unrecognized type: %s" % type(obj_getter))
        return fn(*args, **kwargs)
github pandera-dev / pandera / pandera / hypotheses.py View on Github external
def prepare_dataframe_input(self, dataframe: pd.DataFrame):
        """Prepare input for DataFrameSchema Hypothesis check."""
        if self.groupby is not None:
            raise errors.SchemaDefinitionError(
                "`groupby` cannot be used for DataFrameSchema checks, must "
                "be used in Column checks.")
        if self.is_one_sample_test:
            return dataframe[self.samples[0]]
        check_obj = [(sample, dataframe[sample]) for sample in self.samples]
        return self._format_groupby_input(check_obj, self.samples)