How to use the datacompy.columns_equal function in datacompy

To help you get started, we’ve selected a few datacompy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github capitalone / datacompy / tests / test_core.py View on Github external
def test_infinity_and_beyond():
    df = pd.DataFrame(
        [
            {"a": np.inf, "b": np.inf, "expected": True},
            {"a": -np.inf, "b": -np.inf, "expected": True},
            {"a": -np.inf, "b": np.inf, "expected": False},
            {"a": np.inf, "b": -np.inf, "expected": False},
            {"a": 1, "b": 1, "expected": True},
            {"a": 1, "b": 0, "expected": False},
        ]
    )
    actual_out = datacompy.columns_equal(df.a, df.b)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
github capitalone / datacompy / tests / test_core.py View on Github external
data = """a|b|expected
Hi|Hi|True
Yo|Yo|True
Hey|Hey |False
résumé|resume|False
résumé|résumé|True
💩|💩|True
💩|🤔|False
 | |True
  | |False
datacompy|DataComPy|False
something||False
|something|False
||True"""
    df = pd.read_csv(io.StringIO(data), sep="|")
    actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
github capitalone / datacompy / tests / test_core.py View on Github external
def test_bad_date_columns():
    """If strings can't be coerced into dates then it should be false for the
    whole column.
    """
    df = pd.DataFrame(
        [{"a": "2017-01-01", "b": "2017-01-01"}, {"a": "2017-01-01", "b": "217-01-01"}]
    )
    df["a_dt"] = pd.to_datetime(df["a"])
    assert not datacompy.columns_equal(df.a_dt, df.b).any()
github capitalone / datacompy / tests / test_core.py View on Github external
def test_date_columns_unequal():
    """I want datetime fields to match with dates stored as strings
    """
    df = pd.DataFrame([{"a": "2017-01-01", "b": "2017-01-02"}, {"a": "2017-01-01"}])
    df["a_dt"] = pd.to_datetime(df["a"])
    df["b_dt"] = pd.to_datetime(df["b"])
    assert datacompy.columns_equal(df.a, df.a_dt).all()
    assert datacompy.columns_equal(df.b, df.b_dt).all()
    assert datacompy.columns_equal(df.a_dt, df.a).all()
    assert datacompy.columns_equal(df.b_dt, df.b).all()
    assert not datacompy.columns_equal(df.b_dt, df.a).any()
    assert not datacompy.columns_equal(df.a_dt, df.b).any()
    assert not datacompy.columns_equal(df.a, df.b_dt).any()
    assert not datacompy.columns_equal(df.b, df.a_dt).any()
github capitalone / datacompy / tests / test_core.py View on Github external
|2017-01-01|False
||True"""
    df = pd.read_csv(io.StringIO(data), sep="|")
    # First compare just the strings
    actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)

    # Then compare converted to datetime objects
    df["a"] = pd.to_datetime(df["a"])
    df["b"] = pd.to_datetime(df["b"])
    actual_out = datacompy.columns_equal(df.a, df.b, rel_tol=0.2, ignore_spaces=True)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
    # and reverse
    actual_out_rev = datacompy.columns_equal(df.b, df.a, rel_tol=0.2, ignore_spaces=True)
    assert_series_equal(expect_out, actual_out_rev, check_names=False)
github capitalone / datacompy / tests / test_core.py View on Github external
def test_rounded_date_columns():
    """If strings can't be coerced into dates then it should be false for the
    whole column.
    """
    df = pd.DataFrame(
        [
            {"a": "2017-01-01", "b": "2017-01-01 00:00:00.000000", "exp": True},
            {"a": "2017-01-01", "b": "2017-01-01 00:00:00.123456", "exp": False},
            {"a": "2017-01-01", "b": "2017-01-01 00:00:01.000000", "exp": False},
            {"a": "2017-01-01", "b": "2017-01-01 00:00:00", "exp": True},
        ]
    )
    df["a_dt"] = pd.to_datetime(df["a"])
    actual = datacompy.columns_equal(df.a_dt, df.b)
    expected = df["exp"]
    assert_series_equal(actual, expected, check_names=False)
github capitalone / datacompy / tests / test_core.py View on Github external
def test_mixed_column():
    df = pd.DataFrame(
        [
            {"a": "hi", "b": "hi", "expected": True},
            {"a": 1, "b": 1, "expected": True},
            {"a": np.inf, "b": np.inf, "expected": True},
            {"a": Decimal("1"), "b": Decimal("1"), "expected": True},
            {"a": 1, "b": "1", "expected": False},
            {"a": 1, "b": "yo", "expected": False},
        ]
    )
    actual_out = datacompy.columns_equal(df.a, df.b)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)
github capitalone / datacompy / tests / test_core.py View on Github external
def test_date_columns_unequal():
    """I want datetime fields to match with dates stored as strings
    """
    df = pd.DataFrame([{"a": "2017-01-01", "b": "2017-01-02"}, {"a": "2017-01-01"}])
    df["a_dt"] = pd.to_datetime(df["a"])
    df["b_dt"] = pd.to_datetime(df["b"])
    assert datacompy.columns_equal(df.a, df.a_dt).all()
    assert datacompy.columns_equal(df.b, df.b_dt).all()
    assert datacompy.columns_equal(df.a_dt, df.a).all()
    assert datacompy.columns_equal(df.b_dt, df.b).all()
    assert not datacompy.columns_equal(df.b_dt, df.a).any()
    assert not datacompy.columns_equal(df.a_dt, df.b).any()
    assert not datacompy.columns_equal(df.a, df.b_dt).any()
    assert not datacompy.columns_equal(df.b, df.a_dt).any()
github capitalone / datacompy / tests / test_core.py View on Github external
def test_decimal_columns_equal_rel():
    df = pd.DataFrame(
        [
            {"a": Decimal("1"), "b": Decimal("1"), "expected": True},
            {"a": Decimal("1.3"), "b": Decimal("1.3"), "expected": True},
            {"a": Decimal("1.000003"), "b": Decimal("1.000003"), "expected": True},
            {"a": Decimal("1.000000004"), "b": Decimal("1.000000003"), "expected": True},
            {"a": Decimal("1.3"), "b": Decimal("1.2"), "expected": False},
            {"a": np.nan, "b": np.nan, "expected": True},
            {"a": np.nan, "b": Decimal("1"), "expected": False},
            {"a": Decimal("1"), "b": np.nan, "expected": False},
        ]
    )
    actual_out = datacompy.columns_equal(df.a, df.b, abs_tol=0.001)
    expect_out = df["expected"]
    assert_series_equal(expect_out, actual_out, check_names=False)