Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
rows = []
with b.source_fs.open('source_schema.csv',encoding='utf8') as f:
r = csv.reader(f)
headers = next(r)
for row in r:
d = dict(zip(headers, row))
d['dest_header'] = 'X'+d['source_header']
rows.append(d)
# Fails with: TypeError: must be unicode, not str
# with b.source_fs.open('source_schema.csv', 'w',encoding='utf8') as f:
path = b.source_fs.getsyspath('source_schema.csv')
with open(path, 'w') as f:
w = csv.DictWriter(f,fieldnames=headers)
w.writeheader()
for row in rows:
w.writerow(row)
b.sync_in()
self.assertEqual([u'int', u'float', u'string', u'time', u'date'],
[ c.source_header for c in b.dataset.source_table('types1').columns])
b.clean_ingested()
b.ingest(tables=['types'])
self.assertEqual([u'int', u'float', u'string', u'time', u'date'],
[ c.source_header for c in b.dataset.source_table('types1').columns])
lookup_key = self.strip_hash.sub('', attrs['xlink:href'])
if self.lookup['thoroughfare'].get(lookup_key) is not None:
self.object['street'] = self.lookup['thoroughfare'].get(lookup_key)
elif self.lookup['admin'].get(lookup_key) is not None:
self.object['admin'] = self.lookup['admin'].get(lookup_key)
elif self.lookup['postal'].get(lookup_key) is not None:
self.object['postcode'] = self.lookup['postal'].get(lookup_key)
# detect SRS, create CSV writer if necessary
if name == 'gml:Point':
self.srs = attrs.get('srsName', None)
if self.srs is not None:
self.srs = self.srs.split(':')[-1]
if not self.srs in self.writers:
self.writers[self.srs] = csv.DictWriter(open(self.out_dir + 'es-%s.csv' % self.srs, 'a'), ('lon', 'lat', 'number', 'street', 'postcode', 'admin'))
self.writers[self.srs].writeheader()
def generate_csv(self, sheet, row_set, temp_dir):
out_path = safe_filename(row_set.name, extension='csv')
out_path = join_path(temp_dir, out_path)
offset, headers = headers_guess(row_set.sample)
row_set.register_processor(headers_processor(headers))
row_set.register_processor(offset_processor(offset + 1))
with open(out_path, 'w') as fh:
writer = None
for row in row_set:
try:
if writer is None:
writer = DictWriter(fh, [c.column for c in row])
writer.writeheader()
data = {c.column: string_value(c.value) for c in row}
writer.writerow(data)
except Exception as ex:
log.exception(ex)
child_id = join_path(self.result.id, row_set.name)
self.manager.handle_child(self.result, out_path,
id=child_id,
title=row_set.name,
mime_type='text/csv')
)
geojson = args[0]
if geojson.startswith('http'):
fh = urllib2.urlopen(geojson)
else:
fh = open(args[0])
self.data = json.load(fh)
suc_dir = os.path.join(settings.DATASETS_ROOT, 'sucursales')
if not os.path.exists(suc_dir):
os.makedirs(suc_dir)
FILENAME = self.FILENAME % datetime.now().strftime("%Y-%m-%d-%H%M%S")
FILENAME = os.path.join(suc_dir, FILENAME)
writer = unicodecsv.DictWriter(open(FILENAME, 'wb'),
fieldnames=self.get_columnas())
writer.writeheader()
bar = Bar('Convirtiendo ', suffix='%(percent)d%%')
for feature in bar.iter(self.entrada()):
sucursal = self.parse_sucursal(feature)
writer.writerow(sucursal)
raise NoRecordsFoundError
else:
# we should only do it for count == 1 but eh.
csv_buf = getbuff()
if only_last:
pifilter = [pifilter[0]]
rows = [pi.to_dict_for_mongo() for pi in pifilter]
if headers_to_use is None:
headers_to_use = [key for key in rows[0].keys()
if not key.startswith('_')]
w = unicodecsv.DictWriter(csv_buf, fieldnames=headers_to_use,
extrasaction='ignore',
lineterminator='\n',
encoding='utf-8')
if with_header:
w.writeheader()
w.writerows(rows)
csv_buf.flush()
if not csv_buf.len:
raise NoRecordsFoundError
return csv_buf.getvalue()
# check if bom should be generated
ubom = kwargs.pop('ubom', None)
if ubom is None:
ubom = False
else:
ubom = int(ubom[0])
cstart = start
if ubom:
LOG.debug('BOM')
yield '\xef\xbb\xbf'
with _buffer() as current_line:
w = unicodecsv.DictWriter(
current_line,
fieldnames=columns,
encoding='utf-8'
)
if header:
w.writeheader()
yield current_line.getvalue()
while cstart < (start + num):
ilist = zrange(feed, cstart,
cstart - 1 + min(start + num - cstart, FEED_INTERVAL))
for indicator in ilist:
v = SR.hget(feed + '.value', indicator)
v = None if v is None else json.loads(v)
if subjurisdiction.url not in self.no_precinct_urls:
url_paths.append({
'date': election['start_date'],
'office': '',
'race_type': election['race_type'],
'party': '',
'special': election['special'],
'url': subjurisdiction.report_url(fmt),
'reporting_level': 'precinct',
'jurisdiction': subjurisdiction.name,
})
with open(url_paths_filename, 'wb') as f:
fieldnames = ['date', 'office', 'race_type', 'party',
'special', 'url', 'reporting_level', 'jurisdiction']
writer = unicodecsv.DictWriter(f, fieldnames)
writer.writeheader()
writer.writerows(url_paths)
return url_paths
'denominator_column_id',
'topics'
]
table_csv = unicodecsv.DictWriter(table_file, table_metadata_fieldnames)
table_csv.writeheader()
with open("%s/census_column_metadata.csv" % root_dir, 'wb') as column_file:
column_metadata_fieldnames = [
'table_id',
'line_number',
'column_id',
'column_title',
'indent',
'parent_column_id'
]
column_csv = unicodecsv.DictWriter(column_file, column_metadata_fieldnames)
column_csv.writeheader()
for table_id, table in sorted(tables.items()):
if not table:
# don't write out a table that was marked to be skipped on purpose
continue
columns = table.pop('columns')
table_csv.writerow(table)
for column in sorted(columns, key=lambda a: a['column_id']):
column_csv.writerow(column)
dataset_ref = client.dataset(dataset_id)
table_ref = dataset_ref.table(table_id)
table = client.get_table(table_ref)
fieldnames = [schema.name for schema in table.schema]
query = ('SELECT * FROM `unpaywall-bhd.{}` '.format(bq_tablename))
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location='US') # API request - starts the query
rows = list(query_job)
with open(temp_data_filename, 'wb') as f:
# delimiter workaround from https://stackoverflow.com/questions/43048618/csv-reader-refuses-tab-delimiter?noredirect=1&lq=1#comment73182042_43048618
writer = unicodecsv.DictWriter(f, fieldnames=fieldnames, delimiter=str(u'\t').encode('utf-8'))
if header:
writer.writeheader()
for row in rows:
writer.writerow(dict(zip(fieldnames, row)))
print('Saved {} rows from {}.'.format(len(rows), bq_tablename))
return fieldnames