Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_infinity_and_beyond():
df = pd.DataFrame(
[
{"a": np.inf, "b": np.inf, "expected": True},
{"a": -np.inf, "b": -np.inf, "expected": True},
{"a": -np.inf, "b": np.inf, "expected": False},
{"a": np.inf, "b": -np.inf, "expected": False},
{"a": 1, "b": 1, "expected": True},
{"a": 1, "b": 0, "expected": False},
]
)
actual_out = datacompy.columns_equal(df.a, df.b)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
data = """a|b|expected
Hi|Hi|True
Yo|Yo|True
Hey|Hey |False
résumé|resume|False
résumé|résumé|True
💩|💩|True
💩|🤔|False
| |True
| |False
datacompy|DataComPy|False
something||False
|something|False
||True"""
df = pd.read_csv(io.StringIO(data), sep="|")
actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
def test_bad_date_columns():
"""If strings can't be coerced into dates then it should be false for the
whole column.
"""
df = pd.DataFrame(
[{"a": "2017-01-01", "b": "2017-01-01"}, {"a": "2017-01-01", "b": "217-01-01"}]
)
df["a_dt"] = pd.to_datetime(df["a"])
assert not datacompy.columns_equal(df.a_dt, df.b).any()
def test_date_columns_unequal():
"""I want datetime fields to match with dates stored as strings
"""
df = pd.DataFrame([{"a": "2017-01-01", "b": "2017-01-02"}, {"a": "2017-01-01"}])
df["a_dt"] = pd.to_datetime(df["a"])
df["b_dt"] = pd.to_datetime(df["b"])
assert datacompy.columns_equal(df.a, df.a_dt).all()
assert datacompy.columns_equal(df.b, df.b_dt).all()
assert datacompy.columns_equal(df.a_dt, df.a).all()
assert datacompy.columns_equal(df.b_dt, df.b).all()
assert not datacompy.columns_equal(df.b_dt, df.a).any()
assert not datacompy.columns_equal(df.a_dt, df.b).any()
assert not datacompy.columns_equal(df.a, df.b_dt).any()
assert not datacompy.columns_equal(df.b, df.a_dt).any()
|2017-01-01|False
||True"""
df = pd.read_csv(io.StringIO(data), sep="|")
# First compare just the strings
actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
# Then compare converted to datetime objects
df["a"] = pd.to_datetime(df["a"])
df["b"] = pd.to_datetime(df["b"])
actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
# and reverse
actual_out_rev = datacompy.columns_equal(df.b, df.a, rel_tol=0.2, ignore_spaces=True)
assert_series_equal(expect_out, actual_out_rev, check_names=False)
def test_rounded_date_columns():
"""If strings can't be coerced into dates then it should be false for the
whole column.
"""
df = pd.DataFrame(
[
{"a": "2017-01-01", "b": "2017-01-01 00:00:00.000000", "exp": True},
{"a": "2017-01-01", "b": "2017-01-01 00:00:00.123456", "exp": False},
{"a": "2017-01-01", "b": "2017-01-01 00:00:01.000000", "exp": False},
{"a": "2017-01-01", "b": "2017-01-01 00:00:00", "exp": True},
]
)
df["a_dt"] = pd.to_datetime(df["a"])
actual = datacompy.columns_equal(df.a_dt, df.b)
expected = df["exp"]
assert_series_equal(actual, expected, check_names=False)
def test_mixed_column():
df = pd.DataFrame(
[
{"a": "hi", "b": "hi", "expected": True},
{"a": 1, "b": 1, "expected": True},
{"a": np.inf, "b": np.inf, "expected": True},
{"a": Decimal("1"), "b": Decimal("1"), "expected": True},
{"a": 1, "b": "1", "expected": False},
{"a": 1, "b": "yo", "expected": False},
]
)
actual_out = datacompy.columns_equal(df.a, df.b)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
def test_date_columns_unequal():
"""I want datetime fields to match with dates stored as strings
"""
df = pd.DataFrame([{"a": "2017-01-01", "b": "2017-01-02"}, {"a": "2017-01-01"}])
df["a_dt"] = pd.to_datetime(df["a"])
df["b_dt"] = pd.to_datetime(df["b"])
assert datacompy.columns_equal(df.a, df.a_dt).all()
assert datacompy.columns_equal(df.b, df.b_dt).all()
assert datacompy.columns_equal(df.a_dt, df.a).all()
assert datacompy.columns_equal(df.b_dt, df.b).all()
assert not datacompy.columns_equal(df.b_dt, df.a).any()
assert not datacompy.columns_equal(df.a_dt, df.b).any()
assert not datacompy.columns_equal(df.a, df.b_dt).any()
assert not datacompy.columns_equal(df.b, df.a_dt).any()
def test_decimal_columns_equal_rel():
df = pd.DataFrame(
[
{"a": Decimal("1"), "b": Decimal("1"), "expected": True},
{"a": Decimal("1.3"), "b": Decimal("1.3"), "expected": True},
{"a": Decimal("1.000003"), "b": Decimal("1.000003"), "expected": True},
{"a": Decimal("1.000000004"), "b": Decimal("1.000000003"), "expected": True},
{"a": Decimal("1.3"), "b": Decimal("1.2"), "expected": False},
{"a": np.nan, "b": np.nan, "expected": True},
{"a": np.nan, "b": Decimal("1"), "expected": False},
{"a": Decimal("1"), "b": np.nan, "expected": False},
]
)
actual_out = datacompy.columns_equal(df.a, df.b, abs_tol=0.001)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)