Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
init_schema_element_wise()
# raise errors.SchemaInitError even when the schema doesn't specify column
# key for groupby column
def init_schema_no_groupby_column():
DataFrameSchema({
"col1": Column(Int, [
Check(lambda s: s["foo"] > 10, groupby=["col2"]),
]),
})
with pytest.raises(errors.SchemaInitError):
init_schema_no_groupby_column()
# can't use groupby argument in SeriesSchema or Index objects
for SchemaClass in [SeriesSchema, Index]:
with pytest.raises(
errors.SchemaInitError,
match="^Cannot use groupby checks with"):
SchemaClass(Int, Check(lambda s: s["bar"] == 1, groupby="foo"))
def test_check_groupby():
schema = DataFrameSchema(
columns={
"col1": Column(Int, [
Check(lambda s: s["foo"] > 10, groupby="col2"),
Check(lambda s: s["bar"] < 10, groupby=["col2"]),
Check(lambda s: s["foo"] > 10,
groupby=lambda df: df.groupby("col2")),
Check(lambda s: s["bar"] < 10,
groupby=lambda df: df.groupby("col2"))
]),
"col2": Column(String, Check(lambda s: s.isin(["foo", "bar"]))),
},
index=Index(Int, name="data_id"),
)
df_pass = pd.DataFrame(
data={
"col1": [7, 8, 9, 11, 12, 13],
"col2": ["bar", "bar", "bar", "foo", "foo", "foo"],
},
index=pd.Series([1, 2, 3, 4, 5, 6], name="data_id"),
)
df = schema.validate(df_pass)
assert isinstance(df, pd.DataFrame)
assert len(df.columns) == 2
assert set(df.columns) == {"col1", "col2"}
# raise errors.SchemaError when Check fails
def test_multi_index_index():
schema = DataFrameSchema(
columns={
"column1": Column(Float, Check(lambda s: s > 0)),
"column2": Column(Float, Check(lambda s: s > 0)),
},
index=MultiIndex(
indexes=[
Index(Int,
Check(lambda s: (s < 5) & (s >= 0)),
name="index0"),
Index(String,
Check(lambda s: s.isin(["foo", "bar"])),
name="index1"),
]
)
)
df = pd.DataFrame(
data={
"column1": [0.1, 0.5, 123.1, 10.6, 22.31],
"column2": [0.1, 0.5, 123.1, 10.6, 22.31],
},
index=pd.MultiIndex.from_arrays(
[[0, 1, 2, 3, 4], ["foo", "bar", "foo", "bar", "foo"]],
def test_dataframe_schema_str_repr():
schema = DataFrameSchema(
columns={
"col1": Column(Int),
"col2": Column(String),
"col3": Column(DateTime),
},
index=Index(Int, name="my_index"),
)
for x in [schema.__str__(), schema.__repr__()]:
assert isinstance(x, str)
assert schema.__class__.__name__ in x
for name in ["col1", "col2", "col3", "my_index"]:
assert name in x
def test_index_schema():
schema = DataFrameSchema(
columns={},
index=Index(
Int, [
Check(lambda x: 1 <= x <= 11, element_wise=True),
Check(lambda index: index.mean() > 1)]
))
df = pd.DataFrame(index=range(1, 11), dtype="int64")
assert isinstance(schema.validate(df), pd.DataFrame)
with pytest.raises(errors.SchemaError):
schema.validate(pd.DataFrame(index=range(1, 20)))
def test_multi_index_index():
schema = DataFrameSchema(
columns={
"column1": Column(Float, Check(lambda s: s > 0)),
"column2": Column(Float, Check(lambda s: s > 0)),
},
index=MultiIndex(
indexes=[
Index(Int,
Check(lambda s: (s < 5) & (s >= 0)),
name="index0"),
Index(String,
Check(lambda s: s.isin(["foo", "bar"])),
name="index1"),
]
)
)
df = pd.DataFrame(
data={
"column1": [0.1, 0.5, 123.1, 10.6, 22.31],
"column2": [0.1, 0.5, 123.1, 10.6, 22.31],
},
index=pd.MultiIndex.from_arrays(
[[0, 1, 2, 3, 4], ["foo", "bar", "foo", "bar", "foo"]],
names=["index0", "index1"],
)
)
def test_dataframe_schema_str_repr():
schema = DataFrameSchema(
columns={
"col1": Column(Int),
"col2": Column(String),
"col3": Column(DateTime),
},
index=Index(Int, name="my_index"),
)
for x in [schema.__str__(), schema.__repr__()]:
assert isinstance(x, str)
assert schema.__class__.__name__ in x
for name in ["col1", "col2", "col3", "my_index"]:
assert name in x