How to use the datalab.bigquery function in datalab

To help you get started, we’ve selected a few datalab examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github googledatalab / pydatalab / legacy_tests / bigquery / udf_tests.py View on Github external
def _create_udf():
    inputs = [('field1', 'string'), ('field2', 'integer')]
    outputs = [('output1', 'integer'), ('output2', 'string')]
    impl = 'function(r,emit) { emit({output1: r.field2, output2: r.field1 }); }'
    udf = datalab.bigquery.UDF(inputs, outputs, 'foo', impl)
    return udf
github googledatalab / pydatalab / legacy_tests / bigquery / dataset_tests.py View on Github external
def test_parse_named_tuple_name(self):
    dataset = TestCases._create_dataset(datalab.bigquery._utils.DatasetName('test', 'requestlogs'))
    self._check_name_parts(dataset)
github googledatalab / pydatalab / legacy_tests / bigquery / query_tests.py View on Github external
def test_udf_expansion(self):
    sql = 'SELECT * FROM udf(source)'
    udf = datalab.bigquery.UDF('inputs', [('foo', 'string'), ('bar', 'integer')], 'udf', 'code')
    context = TestCases._create_context()
    query = datalab.bigquery.Query(sql, udf=udf, context=context)
    self.assertEquals('SELECT * FROM (SELECT foo, bar FROM udf(source))', query.sql)

    # Alternate form
    query = datalab.bigquery.Query(sql, udfs=[udf], context=context)
    self.assertEquals('SELECT * FROM (SELECT foo, bar FROM udf(source))', query.sql)
github googledatalab / pydatalab / legacy_tests / bigquery / view_tests.py View on Github external
def test_view_result(self, mock_api_tables_get, mock_api_jobs_get, mock_api_jobs_query_results,
                       mock_api_insert_query, mock_api_tabledata_list, mock_api_tables_insert):

    mock_api_insert_query.return_value = TestCases._create_insert_done_result()
    mock_api_tables_insert.return_value = TestCases._create_tables_insert_success_result()
    mock_api_jobs_query_results.return_value = {'jobComplete': True}
    mock_api_tables_get.return_value = TestCases._create_tables_get_result()
    mock_api_jobs_get.return_value = {'status': {'state': 'DONE'}}
    mock_api_tabledata_list.return_value = TestCases._create_single_row_result()

    name = 'test:testds.testView0'
    sql = 'select * from test:testds.testTable0'
    view = datalab.bigquery.View(name, TestCases._create_context())
    view.create(sql)
    results = view.results()

    self.assertEqual(1, results.length)
    first_result = results[0]
    self.assertEqual('value1', first_result['field1'])
github googledatalab / pydatalab / datalab / bigquery / _api.py View on Github external
'legacy' : Use BigQuery's legacy SQL dialect.
          'standard' : Use BigQuery's standard SQL (beta), which is
          compliant with the SQL 2011 standard.
      billing_tier: Limits the billing tier for this job. Queries that have resource
          usage beyond this tier will fail (without incurring a charge). If unspecified, this
          will be set to your project default. This can also be used to override your
          project-wide default billing tier on a per-query basis.
    Returns:
      A parsed result object.
    Raises:
      Exception if there is an error performing the operation.
    """
    url = Api._ENDPOINT + (Api._JOBS_PATH % (self._project_id, ''))

    if dialect is None:
        dialect = datalab.bigquery.Dialect.default().bq_dialect

    data = {
        'kind': 'bigquery#job',
        'configuration': {
            'query': {
                'query': sql,
                'useQueryCache': use_cache,
                'allowLargeResults': allow_large_results,
                'useLegacySql': dialect == 'legacy'
            },
            'dryRun': dry_run,
            'priority': 'BATCH' if batch else 'INTERACTIVE',
        },
    }

    query_config = data['configuration']['query']
github googledatalab / pydatalab / datalab / ml / _dataset.py View on Github external
def sample(self, n):
    """Samples data into a Pandas DataFrame. Note that it calls BigQuery so it will
       incur cost.
    Args:
      n: number of sampled counts. Note that the number of counts returned is approximated.
    Returns:
      A dataframe containing sampled data.
    Raises:
      Exception if n is larger than number of rows.
    """
    source = self._query or self._table
    total = bq.Query('select count(*) from (%s)' % source).results()[0].values()[0]
    if n > total:
      raise ValueError('sample larger than population')
    sampling = bq.Sampling.random(n*100.0/float(total))
    sample = bq.Query(source).sample(sampling=sampling)
    df = sample.to_dataframe()
    return df
github googledatalab / pydatalab / datalab / data / commands / _sql.py View on Github external
def _resolve_table(v, format, delta):
  try:
    when = _date(v, delta)
    v = time.strftime(format, when.timetuple())
  except Exception:
    pass
  return datalab.bigquery.Table(v)
github googledatalab / pydatalab / datalab / kernel / __init__.py View on Github external
def _set_bq_dialect(bq_dialect):
    datalab.bigquery.Dialect.default().set_bq_dialect(bq_dialect)