How to use the pydruid.utils.dimensions.RegexExtraction function in pydruid

To help you get started, we’ve selected a few pydruid examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github apache / incubator-superset / tests / druid_func_tests.py View on Github external
def test_get_filters_extraction_fn_regex(self):
        filters = [{"col": "buildPrefix", "val": ["22B"], "op": "in"}]
        dimension_spec = {
            "type": "extraction",
            "dimension": "build",
            "outputName": "buildPrefix",
            "outputType": "STRING",
            "extractionFn": {"type": "regex", "expr": "(^[0-9A-Za-z]{3})"},
        }
        spec_json = json.dumps(dimension_spec)
        col = DruidColumn(column_name="buildPrefix", dimension_spec_json=spec_json)
        column_dict = {"buildPrefix": col}
        f = DruidDatasource.get_filters(filters, [], column_dict)
        assert isinstance(f.extraction_function, RegexExtraction)
        dim_ext_fn = dimension_spec["extractionFn"]
        f_ext_fn = f.extraction_function
        self.assertEqual(dim_ext_fn["expr"], f_ext_fn._expr)
github apache / incubator-superset / superset / connectors / druid / models.py View on Github external
if dim_spec and "extractionFn" in dim_spec:
            col = dim_spec["dimension"]
            fn = dim_spec["extractionFn"]
            ext_type = fn.get("type")
            if ext_type == "lookup" and fn["lookup"].get("type") == "map":
                replace_missing_values = fn.get("replaceMissingValueWith")
                retain_missing_values = fn.get("retainMissingValue", False)
                injective = fn.get("isOneToOne", False)
                extraction_fn = MapLookupExtraction(
                    fn["lookup"]["map"],
                    replace_missing_values=replace_missing_values,
                    retain_missing_values=retain_missing_values,
                    injective=injective,
                )
            elif ext_type == "regex":
                extraction_fn = RegexExtraction(fn["expr"])
            elif ext_type == "registeredLookup":
                extraction_fn = RegisteredLookupExtraction(fn.get("lookup"))
            elif ext_type == "timeFormat":
                extraction_fn = TimeFormatExtraction(
                    fn.get("format"), fn.get("locale"), fn.get("timeZone")
                )
            else:
                raise Exception(_("Unsupported extraction function: " + ext_type))
        return (col, extraction_fn)