Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def comparison_neg_tol_fixture(base_tol, compare_both_tol, spark):
return SparkCompare(
spark,
base_tol,
compare_both_tol,
join_columns=["account_identifier"],
rel_tol=-0.2,
abs_tol=0.01,
)
def comparison_decimal_fixture(base_decimal, compare_decimal, spark):
return SparkCompare(spark, base_decimal, compare_decimal, join_columns=["acct"])
def comparison_rel_tol_fixture(base_tol, compare_rel_tol, spark):
return SparkCompare(
spark, base_tol, compare_rel_tol, join_columns=["account_identifier"], rel_tol=0.1
)
def comparison1_fixture(base_df1, compare_df1, spark):
return SparkCompare(
spark, base_df1, compare_df1, join_columns=["acct"], cache_intermediates=CACHE_INTERMEDIATES
)
def comparison_abs_tol_fixture(base_tol, compare_abs_tol, spark):
return SparkCompare(
spark, base_tol, compare_abs_tol, join_columns=["account_identifier"], abs_tol=0.01
)
def comparison_known_diffs2(base_td, compare_source, spark):
return SparkCompare(
spark,
base_td,
compare_source,
join_columns=[("acct", "ACCOUNT_IDENTIFIER"), ("acct_seq", "SEQ_NUMBER")],
column_mapping=[("stat_cd", "STATC"), ("open_dt", "ACCOUNT_OPEN"), ("cd", "CODE")],
known_differences=[
{
"name": "Left-padded, four-digit numeric code",
"types": datacompy.NUMERIC_SPARK_TYPES,
"transformation": "lpad(cast({input} AS bigint), 4, '0')",
},
{
"name": "Null to *2",
"types": ["string"],
"transformation": "case when {input} is null then '*2' else {input} end",
},
def comparison4_fixture(base_df2, compare_df1, spark):
return SparkCompare(
spark,
base_df2,
compare_df1,
join_columns=["acct"],
column_mapping=[("super_duper_big_long_name", "name")],
)
def show_all_columns_and_match_rate_fixture(base_tol, compare_both_tol, spark):
return SparkCompare(
spark,
base_tol,
compare_both_tol,
join_columns=["account_identifier"],
show_all_columns=True,
match_rates=True,
)
def test_negative_tolerances(spark, base_tol, compare_both_tol):
with pytest.raises(ValueError, match="Please enter positive valued tolerances"):
comp = SparkCompare(
spark,
base_tol,
compare_both_tol,
join_columns=["account_identifier"],
rel_tol=-0.2,
abs_tol=0.01,
)
comp.report()
pass
def comparison2_fixture(base_df1, compare_df2, spark):
return SparkCompare(spark, base_df1, compare_df2, join_columns=["acct"])