How to use the datacompy.sparkcompare.MatchType.MATCH.value function in datacompy

To help you get started, we’ve selected a few datacompy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github capitalone / datacompy / datacompy / sparkcompare.py View on Github external
def _print_num_of_rows_with_column_equality(self, myfile):
        # match_dataframe contains columns from both dataframes with flag to indicate if columns matched
        match_dataframe = self._get_or_create_joined_dataframe().select(*self.columns_compared)
        match_dataframe.createOrReplaceTempView("matched_df")

        where_cond = " AND ".join(
            ["A." + name + "=" + str(MatchType.MATCH.value) for name in self.columns_compared]
        )
        match_query = r"""SELECT count(*) AS row_count FROM matched_df A WHERE {}""".format(
            where_cond
        )
        all_rows_matched = self.spark.sql(match_query)
        matched_rows = all_rows_matched.head()[0]

        print("\n****** Row Comparison ******", file=myfile)
        print(
            "Number of rows with some columns unequal: {}".format(
                self.common_row_count - matched_rows
            ),
            file=myfile,
        )
        print("Number of rows with all columns equal: {}".format(matched_rows), file=myfile)
github capitalone / datacompy / datacompy / sparkcompare.py View on Github external
+ kd["transformation"].format(new_input, input=new_input)
                            + ") = A.{name})"
                        )

        case_string = (
            "( CASE WHEN ("
            + " OR ".join(equal_comparisons)
            + ") THEN {match_success} WHEN ("
            + " OR ".join(known_diff_comparisons)
            + ") THEN {match_known_difference} ELSE {match_failure} END) "
            + "AS {name}, A.{name} AS {name}_base, B.{name} AS {name}_compare"
        )

        return case_string.format(
            name=name,
            match_success=MatchType.MATCH.value,
            match_known_difference=MatchType.KNOWN_DIFFERENCE.value,
            match_failure=MatchType.MISMATCH.value,
        )