Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _view(args, cell):
csv = datalab.data.Csv(args['input'])
num_lines = int(args['count'] or 5)
headers = None
if cell:
ipy = IPython.get_ipython()
config = _utils.parse_config(cell, ipy.user_ns)
if 'columns' in config:
headers = [e.strip() for e in config['columns'].split(',')]
df = pd.DataFrame(csv.browse(num_lines, headers))
if args['profile']:
# TODO(gram): We need to generate a schema and type-convert the columns before this
# will be useful for CSV
return _utils.profile_df(df)
else:
return IPython.core.display.HTML(df.to_html(index=False))
first_row/count parameters).
Raises:
Exception if the request could not be fulfilled.
"""
ipy = IPython.get_ipython()
if env is None:
env = {}
env.update(ipy.user_ns)
if isinstance(source, basestring):
source = datalab.utils.get_item(ipy.user_ns, source, source)
if isinstance(source, basestring):
source = datalab.bigquery.Table(source)
if isinstance(source, types.ModuleType) or isinstance(source, datalab.data.SqlStatement):
source = datalab.bigquery.Query(source, values=env)
if isinstance(source, list):
if len(source) == 0:
return _get_data_from_empty_list(source, fields, first_row, count, schema)
elif isinstance(source[0], dict):
return _get_data_from_list_of_dicts(source, fields, first_row, count, schema)
elif isinstance(source[0], list):
return _get_data_from_list_of_lists(source, fields, first_row, count, schema)
else:
raise Exception("To get tabular data from a list it must contain dictionaries or lists.")
elif isinstance(source, pandas.DataFrame):
return _get_data_from_dataframe(source, fields, first_row, count, schema)
elif (isinstance(source, google.datalab.bigquery.Query) or
isinstance(source, google.datalab.bigquery.Table)):
return google.datalab.utils.commands._utils.get_data(
else:
define_wild_match = define_wild_re.match(line)
if define_wild_match:
raise Exception('Expected "DEFINE QUERY "')
if last_def >= 0:
# We were in a query so save this tail query.
query = '\n'.join([line for line in lines[last_def:] if len(line)]).strip()
statement = datalab.data.SqlStatement(query, module)
module.__dict__[name] = statement
module.__dict__[datalab.data._utils._SQL_MODULE_LAST] = statement
if code is None:
code = ''
module.__dict__[datalab.data._utils._SQL_MODULE_ARGPARSE] = _arguments(code, module)
return module.__dict__.get(datalab.data._utils._SQL_MODULE_LAST, None)
context = datalab.context.Context.default()
self._context = context
self._api = _api.Api(context)
self._data_sources = data_sources
self._udfs = udfs
if data_sources is None:
data_sources = {}
self._results = None
self._code = None
self._imports = []
if values is None:
values = kwargs
self._sql = datalab.data.SqlModule.expand(sql, values)
# We need to take care not to include the same UDF code twice so we use sets.
udfs = set(udfs if udfs else [])
for value in list(values.values()):
if isinstance(value, _udf.UDF):
udfs.add(value)
included_udfs = set([])
tokens = datalab.data.tokenize(self._sql)
udf_dict = {udf.name: udf for udf in udfs}
for i, token in enumerate(tokens):
# Find the preceding and following non-whitespace tokens
prior = i - 1
while prior >= 0 and tokens[prior].isspace():
prior -= 1
if define_match or select_match or standard_sql_match:
# If this is the first query, get the preceding Python code.
if code is None:
code = ('\n'.join(lines[:i])).strip()
if len(code):
code += '\n'
elif last_def >= 0:
# This is not the first query, so gather the previous query text.
query = '\n'.join([line for line in lines[last_def:i] if len(line)]).strip()
if select_match and name != datalab.data._utils._SQL_MODULE_MAIN and len(query) == 0:
# Avoid DEFINE query name\nSELECT ... being seen as an empty DEFINE followed by SELECT
continue
# Save the query
statement = datalab.data.SqlStatement(query, module)
module.__dict__[name] = statement
# And set the 'last' query to be this too
module.__dict__[datalab.data._utils._SQL_MODULE_LAST] = statement
# Get the query name and strip off our syntactic sugar if appropriate.
if define_match:
name = define_match.group(1)
lines[i] = define_match.group(2)
else:
name = datalab.data._utils._SQL_MODULE_MAIN
# Save the starting line index of the new query
last_def = i
else:
define_wild_match = define_wild_re.match(line)
if define_wild_match:
name = define_match.group(1)
lines[i] = define_match.group(2)
else:
name = datalab.data._utils._SQL_MODULE_MAIN
# Save the starting line index of the new query
last_def = i
else:
define_wild_match = define_wild_re.match(line)
if define_wild_match:
raise Exception('Expected "DEFINE QUERY "')
if last_def >= 0:
# We were in a query so save this tail query.
query = '\n'.join([line for line in lines[last_def:] if len(line)]).strip()
statement = datalab.data.SqlStatement(query, module)
module.__dict__[name] = statement
module.__dict__[datalab.data._utils._SQL_MODULE_LAST] = statement
if code is None:
code = ''
module.__dict__[datalab.data._utils._SQL_MODULE_ARGPARSE] = _arguments(code, module)
return module.__dict__.get(datalab.data._utils._SQL_MODULE_LAST, None)