How to use the pydruid.utils.aggregators.count function in pydruid

To help you get started, we’ve selected a few pydruid examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github apache / incubator-superset / superset / connectors / druid / models.py View on Github external
def values_for_column(self, column_name: str, limit: int = 10000) -> List:
        """Retrieve some values for the given column"""
        logging.info(
            "Getting values for columns [{}] limited to [{}]".format(column_name, limit)
        )
        # TODO: Use Lexicographic TopNMetricSpec once supported by PyDruid
        if self.fetch_values_from:
            from_dttm = utils.parse_human_datetime(self.fetch_values_from)
        else:
            from_dttm = datetime(1970, 1, 1)

        qry = dict(
            datasource=self.datasource_name,
            granularity="all",
            intervals=from_dttm.isoformat() + "/" + datetime.now().isoformat(),
            aggregations=dict(count=count("count")),
            dimension=column_name,
            metric="count",
            threshold=limit,
        )

        client = self.cluster.get_pydruid_client()
        client.topn(**qry)
        df = client.export_pandas()
        return df[column_name].to_list()
github apache / incubator-superset / superset / models.py View on Github external
def values_for_column(self,
                          column_name,
                          from_dttm,
                          to_dttm,
                          limit=500):
        """Retrieve some values for the given column"""
        # TODO: Use Lexicographic TopNMeticSpec onces supported by PyDruid
        from_dttm = from_dttm.replace(tzinfo=config.get("DRUID_TZ"))
        to_dttm = to_dttm.replace(tzinfo=config.get("DRUID_TZ"))

        qry = dict(
            datasource=self.datasource_name,
            granularity="all",
            intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(),
            aggregations=dict(count=count("count")),
            dimension=column_name,
            metric="count",
            threshold=limit,
        )

        client = self.cluster.get_pydruid_client()
        client.topn(**qry)
        df = client.export_pandas()

        if df is None or df.size == 0:
            raise Exception(_("No data was returned."))

        return df
github apache / incubator-superset / superset / models.py View on Github external
def values_for_column(self,
                          column_name,
                          from_dttm,
                          to_dttm,
                          limit=500):
        """Retrieve some values for the given column"""
        # TODO: Use Lexicographic TopNMetricSpec once supported by PyDruid
        from_dttm = from_dttm.replace(tzinfo=config.get("DRUID_TZ"))
        to_dttm = to_dttm.replace(tzinfo=config.get("DRUID_TZ"))

        qry = dict(
            datasource=self.datasource_name,
            granularity="all",
            intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(),
            aggregations=dict(count=count("count")),
            dimension=column_name,
            metric="count",
            threshold=limit,
        )

        client = self.cluster.get_pydruid_client()
        client.topn(**qry)
        df = client.export_pandas()

        if df is None or df.size == 0:
            raise Exception(_("No data was returned."))

        return df