Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
|2017-01-01|False
||True"""
df = pd.read_csv(six.StringIO(data), sep="|")
# First compare just the strings
actual_out = utils.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True, ignore_case=True)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
# Then compare converted to datetime objects
df["a"] = pd.to_datetime(df["a"])
df["b"] = pd.to_datetime(df["b"])
actual_out = utils.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
# and reverse
actual_out_rev = utils.columns_equal(df.b, df.a, rel_tol=0.2, ignore_spaces=True)
assert_series_equal(expect_out, actual_out_rev, check_names=False)
def test_decimal_columns_equal():
df = pd.DataFrame(
[
{"a": Decimal("1"), "b": Decimal("1"), "expected": True},
{"a": Decimal("1.3"), "b": Decimal("1.3"), "expected": True},
{"a": Decimal("1.000003"), "b": Decimal("1.000003"), "expected": True},
{"a": Decimal("1.000000004"), "b": Decimal("1.000000003"), "expected": False},
{"a": Decimal("1.3"), "b": Decimal("1.2"), "expected": False},
{"a": np.nan, "b": np.nan, "expected": True},
{"a": np.nan, "b": Decimal("1"), "expected": False},
{"a": Decimal("1"), "b": np.nan, "expected": False},
]
)
actual_out = utils.columns_equal(df.a, df.b)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
def test_mixed_column():
df = pd.DataFrame(
[
{"a": "hi", "b": "hi", "expected": True},
{"a": 1, "b": 1, "expected": True},
{"a": np.inf, "b": np.inf, "expected": True},
{"a": Decimal("1"), "b": Decimal("1"), "expected": True},
{"a": 1, "b": "1", "expected": False},
{"a": 1, "b": "yo", "expected": False},
]
)
actual_out = utils.columns_equal(df.a, df.b)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
def test_date_columns_unequal():
"""I want datetime fields to match with dates stored as strings
"""
df = pd.DataFrame([{"a": "2017-01-01", "b": "2017-01-02"}, {"a": "2017-01-01"}])
df["a_dt"] = pd.to_datetime(df["a"])
df["b_dt"] = pd.to_datetime(df["b"])
assert utils.columns_equal(df.a, df.a_dt).all()
assert utils.columns_equal(df.b, df.b_dt).all()
assert utils.columns_equal(df.a_dt, df.a).all()
assert utils.columns_equal(df.b_dt, df.b).all()
assert not utils.columns_equal(df.b_dt, df.a).any()
assert not utils.columns_equal(df.a_dt, df.b).any()
assert not utils.columns_equal(df.a, df.b_dt).any()
assert not utils.columns_equal(df.b, df.a_dt).any()
def test_date_columns_unequal():
"""I want datetime fields to match with dates stored as strings
"""
df = pd.DataFrame([{"a": "2017-01-01", "b": "2017-01-02"}, {"a": "2017-01-01"}])
df["a_dt"] = pd.to_datetime(df["a"])
df["b_dt"] = pd.to_datetime(df["b"])
assert utils.columns_equal(df.a, df.a_dt).all()
assert utils.columns_equal(df.b, df.b_dt).all()
assert utils.columns_equal(df.a_dt, df.a).all()
assert utils.columns_equal(df.b_dt, df.b).all()
assert not utils.columns_equal(df.b_dt, df.a).any()
assert not utils.columns_equal(df.a_dt, df.b).any()
assert not utils.columns_equal(df.a, df.b_dt).any()
assert not utils.columns_equal(df.b, df.a_dt).any()
def test_decimal_float_columns_equal():
df = pd.DataFrame(
[
{"a": Decimal("1"), "b": 1, "expected": True},
{"a": Decimal("1.3"), "b": 1.3, "expected": True},
{"a": Decimal("1.000003"), "b": 1.000003, "expected": True},
{"a": Decimal("1.000000004"), "b": 1.000000003, "expected": False},
{"a": Decimal("1.3"), "b": 1.2, "expected": False},
{"a": np.nan, "b": np.nan, "expected": True},
{"a": np.nan, "b": 1, "expected": False},
{"a": Decimal("1"), "b": np.nan, "expected": False},
]
)
actual_out = utils.columns_equal(df.a, df.b)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
def test_rounded_date_columns():
"""If strings can't be coerced into dates then it should be false for the
whole column.
"""
df = pd.DataFrame(
[
{"a": "2017-01-01", "b": "2017-01-01 00:00:00.000000", "exp": True},
{"a": "2017-01-01", "b": "2017-01-01 00:00:00.123456", "exp": False},
{"a": "2017-01-01", "b": "2017-01-01 00:00:01.000000", "exp": False},
{"a": "2017-01-01", "b": "2017-01-01 00:00:00", "exp": True},
]
)
df["a_dt"] = pd.to_datetime(df["a"])
actual = utils.columns_equal(df.a_dt, df.b)
expected = df["exp"]
assert_series_equal(actual, expected, check_names=False)
def test_mixed_column_with_ignore_spaces_and_case():
df = pd.DataFrame(
[
{"a": "hi", "b": "hi ", "expected": True},
{"a": 1, "b": 1, "expected": True},
{"a": np.inf, "b": np.inf, "expected": True},
{"a": Decimal("1"), "b": Decimal("1"), "expected": True},
{"a": 1, "b": "1 ", "expected": False},
{"a": 1, "b": "yo ", "expected": False},
{"a": "Hi", "b": "hI ", "expected": True},
{"a": "HI", "b": "HI ", "expected": True},
{"a": "hi", "b": "hi ", "expected": True},
]
)
actual_out = utils.columns_equal(df.a, df.b, ignore_spaces=True, ignore_case=True)
expect_out = df["expected"]
assert_series_equal(expect_out, actual_out, check_names=False)
def test_date_columns_unequal():
"""I want datetime fields to match with dates stored as strings
"""
df = pd.DataFrame([{"a": "2017-01-01", "b": "2017-01-02"}, {"a": "2017-01-01"}])
df["a_dt"] = pd.to_datetime(df["a"])
df["b_dt"] = pd.to_datetime(df["b"])
assert utils.columns_equal(df.a, df.a_dt).all()
assert utils.columns_equal(df.b, df.b_dt).all()
assert utils.columns_equal(df.a_dt, df.a).all()
assert utils.columns_equal(df.b_dt, df.b).all()
assert not utils.columns_equal(df.b_dt, df.a).any()
assert not utils.columns_equal(df.a_dt, df.b).any()
assert not utils.columns_equal(df.a, df.b_dt).any()
assert not utils.columns_equal(df.b, df.a_dt).any()