Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_utf8(self):
with open('examples/test_utf8.csv') as f:
reader = csvkit.CSVKitReader(f, encoding='utf-8')
self.assertEqual(next(reader), ['a', 'b', 'c'])
self.assertEqual(next(reader), ['1', '2', '3'])
self.assertEqual(next(reader), ['4', '5', u'ʤ'])
def get_headers(self, csv_path):
"""
Returns the column headers from the csv as a list.
"""
with open(csv_path, 'r') as infile:
csv_reader = CSVKitReader(infile)
headers = next(csv_reader)
return headers
def car_wash(clean_data, file_name):
new_csv_name = file_name.lower() + '.csv'
new_csv_path = os.path.join(clean_data_dir, new_csv_name)
outfile = open(new_csv_path, 'wb')
writer = CSVKitWriter(outfile, quoting=csv.QUOTE_ALL)
infile = StringIO(clean_data)
for line in infile:
l = line.decode("ascii", "replace").encode('utf-8')
reader = CSVKitReader(StringIO(l), delimiter='\t')
writer.writerow(reader.next())
outfile.close()
infile.close()
def main(self):
reader = CSVKitReader(self.args.file, **self.reader_kwargs)
cnames = reader.next()
cids = parse_column_identifiers(self.args.columns, cnames, self.args.zero_based)
mods = {idx: self.args.expr for idx in cids}
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
reader = sed.CsvFilter(reader, mods, header=False)
output.writerow(cnames)
for row in reader:
output.writerow(row)
task_status = dataset.current_task
task_status.begin(ugettext('Preparing to import'))
line_count = self._count_lines(upload.get_path())
if self.is_aborted():
task_status.abort('Aborted during preperation')
log.warning('Import aborted, dataset_slug: %s' % dataset_slug)
return
f = open(upload.get_path(), 'r')
reader = CSVKitReader(f, encoding=upload.encoding, **upload.dialect_as_parameters())
reader.next()
add_buffer = []
data_typer = DataTyper(dataset.column_schema)
throttle = config_value('PERF', 'TASK_THROTTLE')
i = 0
while True:
# The row number which is about to be read, for error handling and indexing
i += 1
try:
row = reader.next()
except StopIteration:
i -= 1
def guess_column_types(path, dialect, sample_size, encoding='utf-8'):
"""
Guess column types based on a sample of data.
"""
with open(path, 'r') as f:
reader = CSVKitReader(f, encoding=encoding, **dialect)
headers = reader.next()
sample = islice(reader, sample_size)
normal_types, normal_values = normalize_table(sample)
type_names = []
for t in normal_types:
if t is NoneType:
type_names.append(None)
else:
type_names.append(t.__name__)
# If a final column had no values csvkit will have dropped it
while len(type_names) < len(headers):
type_names.append(None)
def csv_sample_data(f, dialect, sample_size=settings.PANDA_SAMPLE_DATA_ROWS):
reader = CSVKitReader(f, **dialect)
reader.next() # skip headers
samples = []
for row in islice(reader, sample_size):
samples.append(row)
return samples
user_data = request.POST.get('user-data', '')
if not user_data:
raise Exception(_('No user data provided.'))
context['user_data'] = user_data
try:
csv_dialect = csvkit_sniff(user_data)
except UnicodeDecodeError:
raise Exception(_('Only UTF-8 data is supported.'))
if not csv_dialect:
raise Exception(_('Unable to determine the format of the data you entered. Please ensure it is valid CSV data.'))
reader = CSVKitReader(StringIO(user_data), dialect=csv_dialect)
emails = 0
for i, row in enumerate(reader):
if len(row) < 4:
raise Exception(_('Row %i has less than 4 columns.') % i)
if len(row) > 4:
raise Exception(_('Row %i has more than 4 columns.') % i)
if UserProxy.objects.filter(email=row[0]).count():
raise Exception(_('User "%s" already exists') % row[0])
user = UserProxy.objects.create_user(row[0], row[0], row[1] or None)
user.is_active = bool(row[1]) # active if a password is provided
user.first_name = row[2]
user.last_name = row[3]