Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
schema_fail_ttest_on_alpha_val_3 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.05),
]),
"sex": Column(String)
})
with pytest.raises(errors.SchemaError):
schema_fail_ttest_on_alpha_val_1.validate(df)
with pytest.raises(errors.SchemaError):
schema_fail_ttest_on_alpha_val_2.validate(df)
with pytest.raises(errors.SchemaError):
schema_fail_ttest_on_alpha_val_3.validate(df)
"b": [1.1, 2.5, 9.9],
"c": ["z", "y", "x"],
"d": [True, True, False],
"e": pd.Series(["c2", "c1", "c3"], dtype="category"),
"f": [(3,), (2,), (1,)],
"g": [pd.Timestamp("2015-02-01"),
pd.Timestamp("2015-02-02"),
pd.Timestamp("2015-02-03")],
"i": [pd.Timedelta(1, unit="D"),
pd.Timedelta(5, unit="D"),
pd.Timedelta(9, unit="D")]
})
assert isinstance(schema.validate(df), pd.DataFrame)
# error case
with pytest.raises(errors.SchemaError):
schema.validate(df.drop("a", axis=1))
with pytest.raises(errors.SchemaError):
schema.validate(df.assign(a=[-1, -2, -1]))
# checks if 'a' is converted to float, while schema says int, will a schema
# error be thrown
with pytest.raises(errors.SchemaError):
schema.validate(df.assign(a=[1.7, 2.3, 3.1]))
# can't use groupby in Checks where element_wise == True
with pytest.raises(
errors.SchemaInitError,
match=r"^Cannot use groupby when element_wise=True."):
init_schema_element_wise()
# raise errors.SchemaInitError even when the schema doesn't specify column
# key for groupby column
def init_schema_no_groupby_column():
DataFrameSchema({
"col1": Column(Int, [
Check(lambda s: s["foo"] > 10, groupby=["col2"]),
]),
})
with pytest.raises(errors.SchemaInitError):
init_schema_no_groupby_column()
# can't use groupby argument in SeriesSchema or Index objects
for SchemaClass in [SeriesSchema, Index]:
with pytest.raises(
errors.SchemaInitError,
match="^Cannot use groupby checks with"):
SchemaClass(Int, Check(lambda s: s["bar"] == 1, groupby="foo"))
def test_check_function_decorator_errors():
"""Test that the check_input and check_output decorators error properly."""
# case 1: checks that the input and output decorators error when different
# types are passed in and out
@check_input(DataFrameSchema({"column1": Column(Int)}))
@check_output(DataFrameSchema({"column2": Column(Float)}))
def test_func(df):
return df
with pytest.raises(
errors.SchemaError,
match=r"^error in check_input decorator of function"):
test_func(pd.DataFrame({"column2": ["a", "b", "c"]}))
with pytest.raises(
errors.SchemaError,
match=r"^error in check_output decorator of function"):
test_func(pd.DataFrame({"column1": [1, 2, 3]}))
# case 2: check that if the input decorator refers to an index that's not
# in the function signature, it will fail in a way that's easy to interpret
@check_input(DataFrameSchema({"column1": Column(Int)}), 1)
def test_incorrect_check_input_index(df):
return df
with pytest.raises(
errors.SchemaError,
def _relationships(self, relationship: Union[str, Callable]):
"""Impose a relationship on a supplied Test function.
:param relationship: represents what relationship conditions are
imposed on the hypothesis test. A function or lambda function can
be supplied. If a string is provided, a lambda function will be
returned from Hypothesis.relationships. Available relationships
are: "greater_than", "less_than", "not_equal"
"""
if isinstance(relationship, str):
if relationship not in self._RELATIONSHIPS:
raise errors.SchemaError(
"The relationship %s isn't a built in method"
% relationship)
else:
relationship = self._RELATIONSHIPS[relationship]
elif not callable(relationship):
raise ValueError(
"expected relationship to be str or callable, found %s" % type(
relationship)
)
return relationship
... })
>>>
>>> schema.validate(pd.DataFrame({"column": ["foo", "bar"]}))
column
0 foo
1 bar
See :ref:`here` for more usage details.
"""
super(Column, self).__init__(
pandas_dtype, checks, nullable, allow_duplicates, coerce)
self.required = required
self.pandas_dtype = pandas_dtype
if coerce and pandas_dtype is None:
raise errors.SchemaInitError(
"Must specify dtype if coercing a Column's type")
)
)
elif isinstance(obj_getter, str):
if obj_getter in kwargs:
kwargs[obj_getter] = schema.validate(kwargs[obj_getter])
else:
arg_spec_args = _get_fn_argnames(fn)
args_dict = OrderedDict(
zip(arg_spec_args, args))
args_dict[obj_getter] = schema.validate(args_dict[obj_getter])
args = list(args_dict.values())
elif obj_getter is None:
try:
args[0] = schema.validate(
args[0], head, tail, sample, random_state)
except errors.SchemaError as e:
raise errors.SchemaError(
"error in check_input decorator of function '%s': %s" %
(fn.__name__, e))
else:
raise ValueError(
"obj_getter is unrecognized type: %s" % type(obj_getter))
return fn(*args, **kwargs)
def prepare_dataframe_input(self, dataframe: pd.DataFrame):
"""Prepare input for DataFrameSchema Hypothesis check."""
if self.groupby is not None:
raise errors.SchemaDefinitionError(
"`groupby` cannot be used for DataFrameSchema checks, must "
"be used in Column checks.")
if self.is_one_sample_test:
return dataframe[self.samples[0]]
check_obj = [(sample, dataframe[sample]) for sample in self.samples]
return self._format_groupby_input(check_obj, self.samples)