How to use the starthinker.util.google_api.API_BigQuery function in starthinker

To help you get started, we’ve selected a few starthinker examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
'sourceUris': [
          'gs://%s' % path.replace(':', '/'),
        ],
      }
    }
  }

  if schema:
    body['configuration']['load']['schema'] = { 'fields':schema }
    body['configuration']['load']['autodetect'] = False

  if structure == 'CSV':
    body['configuration']['load']['sourceFormat'] = 'CSV'
    body['configuration']['load']['skipLeadingRows'] = skip_rows

  job = API_BigQuery(auth).jobs().insert(projectId=project_id, body=body).execute()
  if wait:
    try: job_wait(auth, job)
    except Exception as e: print('BIGQUERY SKIPPING: %s, %s' % (path, str(e)))
  else:
    return job
github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
'ignoreUnknownValues': True,
        }
      }
    }
 
    if schema:
      body['configuration']['load']['schema'] = { 'fields':schema }
      body['configuration']['load']['autodetect'] = False

    if disposition == 'WRITE_APPEND':
      body['configuration']['load']['autodetect'] = False

    if source_format == 'CSV':
      body['configuration']['load']['skipLeadingRows'] = skip_rows

    job = API_BigQuery(auth).jobs().insert(projectId=project.id, body=body, media_body=media).execute(run=False)
    execution = job.execute()

    response = None
    while response is None:
      status, response = job.next_chunk()
      if project.verbose and status: print("Uploaded %d%%." % int(status.progress() * 100))
    if project.verbose: print("Uploaded 100%")
    if wait: job_wait(auth, job.execute())
    else: return job

  # if it does not exist and write, clear the table
  elif disposition == 'WRITE_TRUNCATE':
    if project.verbose: print("BIGQUERY: No data, clearing table.")

    body = {
      "tableReference": {
github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
if not billing_project_id:
    billing_project_id = project_id

  query = ('DROP TABLE `' 
    + project_id + '.' + dataset_id + '.' + table_id + '` ')

  body = {
    "kind": "bigquery#queryRequest",
    'query': query,
    'defaultDataset': {
      'datasetId' : dataset_id,
    },
    'useLegacySql': False,
  }

  job_wait(auth, API_BigQuery(auth).jobs().query(projectId=billing_project_id, body=body).execute())
github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
def job_wait(auth, job):
  if job:
    if project.verbose: print('BIGQUERY JOB WAIT:', job['jobReference']['jobId'])

    request = API_BigQuery(auth).jobs().get(
      projectId=job['jobReference']['projectId'],
      jobId=job['jobReference']['jobId']
    )
  
    while True:
      sleep(5)
      if project.verbose: print('.', end='')
      sys.stdout.flush()
      result = API_Retry(request)
      if 'errors' in result['status']:
        raise Exception('BigQuery Job Error: %s' % ' '.join([e['message'] for e in result['status']['errors']]))
      elif 'errorResult' in result['status']: 
        raise Exception('BigQuery Job Error: %s' % result['status']['errorResult']['message'])
      elif result['status']['state'] == 'DONE':
        if project.verbose: print('JOB COMPLETE:', result['id'])
        break
github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
body = {
    "copy": {
      "sourceTable": {
        "projectId": from_project,
        "datasetId": from_dataset,
        "tableId": from_table
      },
      "destinationTable": {
        "projectId": to_project,
        "datasetId": to_dataset,
        "tableId": to_table
      }
    }
  }

  job_wait(auth, API_BigQuery(auth).jobs().insert(projectId=project.id, body=body).execute())
github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
def datasets_access(auth, project_id, dataset_id, role='READER', emails=[], groups=[], views=[]):

  if emails or groups or views:
    access = API_BigQuery(auth).datasets().get(projectId=project_id, datasetId=dataset_id).execute()["access"]

    # if emails
    for email in emails:
      access.append({
        "userByEmail":email,
        "role":role,
      })

    # if groups
    for group in groups:
      access.append({
        "groupByEmail":group,
        "role":role,
      })

    for view in views:
github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
"dryRun": False,
    "useQueryCache": True,
    "useLegacySql": legacy
  }

  if row_max: body['maxResults'] = row_max

  if dataset_id:
    body['defaultDataset'] = {
      "projectId": project_id,
      "datasetId": dataset_id
    }

  # wait for query to complete

  response = API_BigQuery(auth).jobs().query(projectId=project_id, body=body).execute()
  while not response['jobComplete']:
    sleep(5)
    response = API_BigQuery(auth).jobs().getQueryResults(projectId=project_id, jobId=response['jobReference']['jobId']).execute(iterate=False)

  # fetch query results

  row_count = 0
  while 'rows' in response:
    converters = _build_converter_array(response.get('schema', None), None, len(response['rows'][0].get('f')))

    for row in response['rows']:
      yield [converters[i](next(iter(r.values()))) for i, r in enumerate(row['f'])] # may break if we attempt nested reads
      row_count += 1

    if 'PageToken' in response:
      response = API_BigQuery(auth).jobs().getQueryResults(projectId=project_id, jobId=response['jobReference']['jobId'], pageToken=response['PageToken']).execute(iterate=False)
github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
def table_get(auth, project_id, dataset_id, table_id):
  return API_BigQuery(auth).tables().get(projectId=project_id, datasetId=dataset_id, tableId=table_id).execute()
github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
def table_to_schema(auth, project_id, dataset_id, table_id):
  if project.verbose: print('TABLE SCHEMA:', project_id, dataset_id, table_id)
  return API_BigQuery(auth).tables().get(projectId=project_id, datasetId=dataset_id, tableId=table_id).execute()['schema']
github google / starthinker / starthinker / util / bigquery / __init__.py View on Github external
def table_create(auth, project_id, dataset_id, table_id, is_time_partition=False):

  body = {
    "tableReference": {
      "projectId": project_id,
      "tableId": table_id,
      "datasetId": dataset_id,
    }
  }

  if is_time_partition:
    body['timePartitioning'] = {
      "type": "DAY"
    }

  API_BigQuery(auth).tables().insert(projectId=project_id, datasetId=dataset_id, body=body).execute()