Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _create_udf():
inputs = [('field1', 'string'), ('field2', 'integer')]
outputs = [('output1', 'integer'), ('output2', 'string')]
impl = 'function(r,emit) { emit({output1: r.field2, output2: r.field1 }); }'
udf = datalab.bigquery.UDF(inputs, outputs, 'foo', impl)
return udf
def test_parse_named_tuple_name(self):
dataset = TestCases._create_dataset(datalab.bigquery._utils.DatasetName('test', 'requestlogs'))
self._check_name_parts(dataset)
def test_udf_expansion(self):
sql = 'SELECT * FROM udf(source)'
udf = datalab.bigquery.UDF('inputs', [('foo', 'string'), ('bar', 'integer')], 'udf', 'code')
context = TestCases._create_context()
query = datalab.bigquery.Query(sql, udf=udf, context=context)
self.assertEquals('SELECT * FROM (SELECT foo, bar FROM udf(source))', query.sql)
# Alternate form
query = datalab.bigquery.Query(sql, udfs=[udf], context=context)
self.assertEquals('SELECT * FROM (SELECT foo, bar FROM udf(source))', query.sql)
def test_view_result(self, mock_api_tables_get, mock_api_jobs_get, mock_api_jobs_query_results,
mock_api_insert_query, mock_api_tabledata_list, mock_api_tables_insert):
mock_api_insert_query.return_value = TestCases._create_insert_done_result()
mock_api_tables_insert.return_value = TestCases._create_tables_insert_success_result()
mock_api_jobs_query_results.return_value = {'jobComplete': True}
mock_api_tables_get.return_value = TestCases._create_tables_get_result()
mock_api_jobs_get.return_value = {'status': {'state': 'DONE'}}
mock_api_tabledata_list.return_value = TestCases._create_single_row_result()
name = 'test:testds.testView0'
sql = 'select * from test:testds.testTable0'
view = datalab.bigquery.View(name, TestCases._create_context())
view.create(sql)
results = view.results()
self.assertEqual(1, results.length)
first_result = results[0]
self.assertEqual('value1', first_result['field1'])
'legacy' : Use BigQuery's legacy SQL dialect.
'standard' : Use BigQuery's standard SQL (beta), which is
compliant with the SQL 2011 standard.
billing_tier: Limits the billing tier for this job. Queries that have resource
usage beyond this tier will fail (without incurring a charge). If unspecified, this
will be set to your project default. This can also be used to override your
project-wide default billing tier on a per-query basis.
Returns:
A parsed result object.
Raises:
Exception if there is an error performing the operation.
"""
url = Api._ENDPOINT + (Api._JOBS_PATH % (self._project_id, ''))
if dialect is None:
dialect = datalab.bigquery.Dialect.default().bq_dialect
data = {
'kind': 'bigquery#job',
'configuration': {
'query': {
'query': sql,
'useQueryCache': use_cache,
'allowLargeResults': allow_large_results,
'useLegacySql': dialect == 'legacy'
},
'dryRun': dry_run,
'priority': 'BATCH' if batch else 'INTERACTIVE',
},
}
query_config = data['configuration']['query']
def sample(self, n):
"""Samples data into a Pandas DataFrame. Note that it calls BigQuery so it will
incur cost.
Args:
n: number of sampled counts. Note that the number of counts returned is approximated.
Returns:
A dataframe containing sampled data.
Raises:
Exception if n is larger than number of rows.
"""
source = self._query or self._table
total = bq.Query('select count(*) from (%s)' % source).results()[0].values()[0]
if n > total:
raise ValueError('sample larger than population')
sampling = bq.Sampling.random(n*100.0/float(total))
sample = bq.Query(source).sample(sampling=sampling)
df = sample.to_dataframe()
return df
def _resolve_table(v, format, delta):
try:
when = _date(v, delta)
v = time.strftime(format, when.timetuple())
except Exception:
pass
return datalab.bigquery.Table(v)
def _set_bq_dialect(bq_dialect):
datalab.bigquery.Dialect.default().set_bq_dialect(bq_dialect)