Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if agg is not None:
data = data.groupby(group_col + [x_col])
data = getattr(data, agg)().reset_index()
max_groups = 15
if len(data[group_col].drop_duplicates()) > max_groups:
msg = (
'Group ({}) contains more than {} unique values, please add additional filter'
' or else chart will be unreadable'
).format(', '.join(group_col), max_groups)
raise Exception(msg)
f = grid_formatter(
grid_columns(data[[x_col, y_col]]), overrides={'D': lambda f, i, c: f.add_timestamp(i, c)}, nan_display=None
)
y_fmt = next((fmt for _, name, fmt in f.fmts if name == y_col), None)
ret_data = dict(data={}, min=y_fmt(data[y_col].min(), None), max=y_fmt(data[y_col].max(), None))
dtypes = get_dtypes(data)
group_fmts = {c: find_dtype_formatter(dtypes[c]) for c in group_col}
for group_val, grp in data.groupby(group_col):
group_val = '/'.join([
group_fmts[gc](gv) for gv, gc in zip(make_list(group_val), group_col)
])
ret_data['data'][group_val] = f.format_lists(grp)
return ret_data
data = data[[x, y]].sort_values(x)
data.columns = [x_col, y_col]
if agg is not None:
data = data.groupby(x_col)
data = getattr(data, agg)().reset_index()
if any(data[x_col].duplicated()):
raise Exception('{} contains duplicates, please specify group or additional filtering'.format(x))
f = grid_formatter(
def build_dtypes_state(data):
"""
Helper function to build globally managed state pertaining to a D-Tale instances columns & data types
:param data: dataframe to build data type information for
:type data: :class:`pandas:pandas.DataFrame`
:return: a list of dictionaries containing column names, indexes and data types
"""
dtypes = get_dtypes(data)
mins = data.min().to_dict()
maxs = data.max().to_dict()
def _format_dtype(col_index, col):
dtype = dtypes[col]
dtype_data = dict(name=col, dtype=dtype, index=col_index)
if classify_type(dtype) == 'F' and not data[col].isnull().all(): # floats
dtype_data['min'] = mins[col]
dtype_data['max'] = maxs[col]
return dtype_data
return [_format_dtype(i, c) for i, c in enumerate(data.columns)]
}
"""
try:
data = DATA[data_id]
additional_aggs = None
dtype = next((dtype_info['dtype'] for dtype_info in DTYPES[data_id] if dtype_info['name'] == column), None)
if classify_type(dtype) in ['I', 'F']:
additional_aggs = ['sum', 'median', 'mode', 'var', 'sem', 'skew', 'kurt']
desc = load_describe(data[column], additional_aggs=additional_aggs)
return_data = dict(describe=desc, success=True)
uniq_vals = data[column].unique()
if 'unique' not in return_data['describe']:
return_data['describe']['unique'] = json_int(len(uniq_vals), as_string=True)
if len(uniq_vals) <= 100:
uniq_f = find_dtype_formatter(get_dtypes(data)[column])
return_data['uniques'] = dict(
data=[uniq_f(u, nan_display='N/A') for u in uniq_vals],
top=False
)
else: # get top 100 most common values
uniq_vals = data[column].value_counts().sort_values(ascending=False).head(100).index.values
uniq_f = find_dtype_formatter(get_dtypes(data)[column])
return_data['uniques'] = dict(
data=[uniq_f(u, nan_display='N/A') for u in uniq_vals],
top=True
)
return jsonify(return_data)
except BaseException as e:
return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))