Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_series_schema():
int_schema = SeriesSchema(
Int, Check(lambda x: 0 <= x <= 100, element_wise=True))
assert isinstance(int_schema.validate(
pd.Series([0, 30, 50, 100])), pd.Series)
str_schema = SeriesSchema(
String, Check(lambda s: s.isin(["foo", "bar", "baz"])),
nullable=True, coerce=True)
assert isinstance(str_schema.validate(
pd.Series(["foo", "bar", "baz", None])), pd.Series)
assert isinstance(str_schema.validate(
pd.Series(["foo", "bar", "baz", np.nan])), pd.Series)
# error cases
for data in [-1, 101, 50.1, "foo"]:
with pytest.raises(errors.SchemaError):
int_schema.validate(pd.Series([data]))
def test_series_schema():
schema = SeriesSchema(
Int, Check(lambda x: 0 <= x <= 100, element_wise=True))
validated_series = schema.validate(pd.Series([0, 30, 50, 100]))
assert isinstance(validated_series, pd.Series)
# error cases
for data in [-1, 101, 50.1, "foo"]:
with pytest.raises(errors.SchemaError):
schema.validate(pd.Series([data]))
for data in [-1, {"a": 1}, -1.0]:
with pytest.raises(TypeError):
schema.validate(TypeError)
non_duplicate_schema = SeriesSchema(
Int, allow_duplicates=False)
with pytest.raises(errors.SchemaError):
non_duplicate_schema.validate(pd.Series([0, 1, 2, 3, 4, 1]))
# when series name doesn't match schema
named_schema = SeriesSchema(Int, name="my_series")
with pytest.raises(
errors.SchemaError,
match=r"^Expected .+ to have name"):
named_schema.validate(pd.Series(range(5), name="your_series"))
# when series floats are declared to be integer
with pytest.raises(
errors.SchemaError,
match=r"^after dropping null values, expected values in series"):
SeriesSchema(Int, nullable=True).validate(
def test_no_dtype_series():
schema = SeriesSchema(nullable=False)
validated_series = schema.validate(pd.Series([0, 1, 2, 3, 4, 1]))
assert isinstance(validated_series, pd.Series)
schema = SeriesSchema(nullable=True)
validated_series = schema.validate(pd.Series([0, 1, 2, None, 4, 1]))
assert isinstance(validated_series, pd.Series)
with pytest.raises(errors.SchemaError):
schema = SeriesSchema(nullable=False)
schema.validate(pd.Series([0, 1, 2, None, 4, 1]))
def test_series_schema_multiple_validators():
schema = SeriesSchema(
Int, [
Check(lambda x: 0 <= x <= 50, element_wise=True),
Check(lambda s: (s == 21).any())])
validated_series = schema.validate(pd.Series([1, 5, 21, 50]))
assert isinstance(validated_series, pd.Series)
# raise error if any of the validators fails
with pytest.raises(errors.SchemaError):
schema.validate(pd.Series([1, 5, 20, 50]))
def test_no_dtype_series():
schema = SeriesSchema(nullable=False)
validated_series = schema.validate(pd.Series([0, 1, 2, 3, 4, 1]))
assert isinstance(validated_series, pd.Series)
schema = SeriesSchema(nullable=True)
validated_series = schema.validate(pd.Series([0, 1, 2, None, 4, 1]))
assert isinstance(validated_series, pd.Series)
with pytest.raises(errors.SchemaError):
schema = SeriesSchema(nullable=False)
schema.validate(pd.Series([0, 1, 2, None, 4, 1]))
Int, allow_duplicates=False)
with pytest.raises(errors.SchemaError):
non_duplicate_schema.validate(pd.Series([0, 1, 2, 3, 4, 1]))
# when series name doesn't match schema
named_schema = SeriesSchema(Int, name="my_series")
with pytest.raises(
errors.SchemaError,
match=r"^Expected .+ to have name"):
named_schema.validate(pd.Series(range(5), name="your_series"))
# when series floats are declared to be integer
with pytest.raises(
errors.SchemaError,
match=r"^after dropping null values, expected values in series"):
SeriesSchema(Int, nullable=True).validate(
pd.Series([1.1, 2.3, 5.5, np.nan]))
# when series contains null values when schema is not nullable
with pytest.raises(
errors.SchemaError,
match=r"^non-nullable series .+ contains null values"):
SeriesSchema(Float, nullable=False).validate(
pd.Series([1.1, 2.3, 5.5, np.nan]))
# when series contains null values when schema is not nullable in addition
# to having the wrong data type
with pytest.raises(
errors.SchemaError,
match=(
r"^expected series '.+' to have type .+, got .+ and "
"non-nullable series contains null values")):
def test_series_schema():
int_schema = SeriesSchema(
Int, Check(lambda x: 0 <= x <= 100, element_wise=True))
assert isinstance(int_schema.validate(
pd.Series([0, 30, 50, 100])), pd.Series)
str_schema = SeriesSchema(
String, Check(lambda s: s.isin(["foo", "bar", "baz"])),
nullable=True, coerce=True)
assert isinstance(str_schema.validate(
pd.Series(["foo", "bar", "baz", None])), pd.Series)
assert isinstance(str_schema.validate(
pd.Series(["foo", "bar", "baz", np.nan])), pd.Series)
# error cases
for data in [-1, 101, 50.1, "foo"]:
with pytest.raises(errors.SchemaError):
int_schema.validate(pd.Series([data]))
for data in [-1, {"a": 1}, -1.0]:
with pytest.raises(TypeError):
int_schema.validate(TypeError)
def test_vectorized_checks():
schema = SeriesSchema(
Int, Check(
lambda s: s.value_counts() == 2, element_wise=False))
validated_series = schema.validate(pd.Series([1, 1, 2, 2, 3, 3]))
assert isinstance(validated_series, pd.Series)
# error case
with pytest.raises(errors.SchemaError):
schema.validate(pd.Series([1, 2, 3]))