Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def values_for_column(self, column_name: str, limit: int = 10000) -> List:
"""Retrieve some values for the given column"""
logging.info(
"Getting values for columns [{}] limited to [{}]".format(column_name, limit)
)
# TODO: Use Lexicographic TopNMetricSpec once supported by PyDruid
if self.fetch_values_from:
from_dttm = utils.parse_human_datetime(self.fetch_values_from)
else:
from_dttm = datetime(1970, 1, 1)
qry = dict(
datasource=self.datasource_name,
granularity="all",
intervals=from_dttm.isoformat() + "/" + datetime.now().isoformat(),
aggregations=dict(count=count("count")),
dimension=column_name,
metric="count",
threshold=limit,
)
client = self.cluster.get_pydruid_client()
client.topn(**qry)
df = client.export_pandas()
return df[column_name].to_list()
def values_for_column(self,
column_name,
from_dttm,
to_dttm,
limit=500):
"""Retrieve some values for the given column"""
# TODO: Use Lexicographic TopNMeticSpec onces supported by PyDruid
from_dttm = from_dttm.replace(tzinfo=config.get("DRUID_TZ"))
to_dttm = to_dttm.replace(tzinfo=config.get("DRUID_TZ"))
qry = dict(
datasource=self.datasource_name,
granularity="all",
intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(),
aggregations=dict(count=count("count")),
dimension=column_name,
metric="count",
threshold=limit,
)
client = self.cluster.get_pydruid_client()
client.topn(**qry)
df = client.export_pandas()
if df is None or df.size == 0:
raise Exception(_("No data was returned."))
return df
def values_for_column(self,
column_name,
from_dttm,
to_dttm,
limit=500):
"""Retrieve some values for the given column"""
# TODO: Use Lexicographic TopNMetricSpec once supported by PyDruid
from_dttm = from_dttm.replace(tzinfo=config.get("DRUID_TZ"))
to_dttm = to_dttm.replace(tzinfo=config.get("DRUID_TZ"))
qry = dict(
datasource=self.datasource_name,
granularity="all",
intervals=from_dttm.isoformat() + '/' + to_dttm.isoformat(),
aggregations=dict(count=count("count")),
dimension=column_name,
metric="count",
threshold=limit,
)
client = self.cluster.get_pydruid_client()
client.topn(**qry)
df = client.export_pandas()
if df is None or df.size == 0:
raise Exception(_("No data was returned."))
return df