Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
'sourceUris': [
'gs://%s' % path.replace(':', '/'),
],
}
}
}
if schema:
body['configuration']['load']['schema'] = { 'fields':schema }
body['configuration']['load']['autodetect'] = False
if structure == 'CSV':
body['configuration']['load']['sourceFormat'] = 'CSV'
body['configuration']['load']['skipLeadingRows'] = skip_rows
job = API_BigQuery(auth).jobs().insert(projectId=project_id, body=body).execute()
if wait:
try: job_wait(auth, job)
except Exception as e: print('BIGQUERY SKIPPING: %s, %s' % (path, str(e)))
else:
return job
'ignoreUnknownValues': True,
}
}
}
if schema:
body['configuration']['load']['schema'] = { 'fields':schema }
body['configuration']['load']['autodetect'] = False
if disposition == 'WRITE_APPEND':
body['configuration']['load']['autodetect'] = False
if source_format == 'CSV':
body['configuration']['load']['skipLeadingRows'] = skip_rows
job = API_BigQuery(auth).jobs().insert(projectId=project.id, body=body, media_body=media).execute(run=False)
execution = job.execute()
response = None
while response is None:
status, response = job.next_chunk()
if project.verbose and status: print("Uploaded %d%%." % int(status.progress() * 100))
if project.verbose: print("Uploaded 100%")
if wait: job_wait(auth, job.execute())
else: return job
# if it does not exist and write, clear the table
elif disposition == 'WRITE_TRUNCATE':
if project.verbose: print("BIGQUERY: No data, clearing table.")
body = {
"tableReference": {
if not billing_project_id:
billing_project_id = project_id
query = ('DROP TABLE `'
+ project_id + '.' + dataset_id + '.' + table_id + '` ')
body = {
"kind": "bigquery#queryRequest",
'query': query,
'defaultDataset': {
'datasetId' : dataset_id,
},
'useLegacySql': False,
}
job_wait(auth, API_BigQuery(auth).jobs().query(projectId=billing_project_id, body=body).execute())
def job_wait(auth, job):
if job:
if project.verbose: print('BIGQUERY JOB WAIT:', job['jobReference']['jobId'])
request = API_BigQuery(auth).jobs().get(
projectId=job['jobReference']['projectId'],
jobId=job['jobReference']['jobId']
)
while True:
sleep(5)
if project.verbose: print('.', end='')
sys.stdout.flush()
result = API_Retry(request)
if 'errors' in result['status']:
raise Exception('BigQuery Job Error: %s' % ' '.join([e['message'] for e in result['status']['errors']]))
elif 'errorResult' in result['status']:
raise Exception('BigQuery Job Error: %s' % result['status']['errorResult']['message'])
elif result['status']['state'] == 'DONE':
if project.verbose: print('JOB COMPLETE:', result['id'])
break
body = {
"copy": {
"sourceTable": {
"projectId": from_project,
"datasetId": from_dataset,
"tableId": from_table
},
"destinationTable": {
"projectId": to_project,
"datasetId": to_dataset,
"tableId": to_table
}
}
}
job_wait(auth, API_BigQuery(auth).jobs().insert(projectId=project.id, body=body).execute())
def datasets_access(auth, project_id, dataset_id, role='READER', emails=[], groups=[], views=[]):
if emails or groups or views:
access = API_BigQuery(auth).datasets().get(projectId=project_id, datasetId=dataset_id).execute()["access"]
# if emails
for email in emails:
access.append({
"userByEmail":email,
"role":role,
})
# if groups
for group in groups:
access.append({
"groupByEmail":group,
"role":role,
})
for view in views:
"dryRun": False,
"useQueryCache": True,
"useLegacySql": legacy
}
if row_max: body['maxResults'] = row_max
if dataset_id:
body['defaultDataset'] = {
"projectId": project_id,
"datasetId": dataset_id
}
# wait for query to complete
response = API_BigQuery(auth).jobs().query(projectId=project_id, body=body).execute()
while not response['jobComplete']:
sleep(5)
response = API_BigQuery(auth).jobs().getQueryResults(projectId=project_id, jobId=response['jobReference']['jobId']).execute(iterate=False)
# fetch query results
row_count = 0
while 'rows' in response:
converters = _build_converter_array(response.get('schema', None), None, len(response['rows'][0].get('f')))
for row in response['rows']:
yield [converters[i](next(iter(r.values()))) for i, r in enumerate(row['f'])] # may break if we attempt nested reads
row_count += 1
if 'PageToken' in response:
response = API_BigQuery(auth).jobs().getQueryResults(projectId=project_id, jobId=response['jobReference']['jobId'], pageToken=response['PageToken']).execute(iterate=False)
def table_get(auth, project_id, dataset_id, table_id):
return API_BigQuery(auth).tables().get(projectId=project_id, datasetId=dataset_id, tableId=table_id).execute()
def table_to_schema(auth, project_id, dataset_id, table_id):
if project.verbose: print('TABLE SCHEMA:', project_id, dataset_id, table_id)
return API_BigQuery(auth).tables().get(projectId=project_id, datasetId=dataset_id, tableId=table_id).execute()['schema']
def table_create(auth, project_id, dataset_id, table_id, is_time_partition=False):
body = {
"tableReference": {
"projectId": project_id,
"tableId": table_id,
"datasetId": dataset_id,
}
}
if is_time_partition:
body['timePartitioning'] = {
"type": "DAY"
}
API_BigQuery(auth).tables().insert(projectId=project_id, datasetId=dataset_id, body=body).execute()