How to use unicodecsv - 10 common examples

To help you get started, we’ve selected a few unicodecsv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biocommons / hgvs / tests / test_hgvs_variantmapper_gcp.py View on Github external
def gcp_file_reader(fn):
    rdr = csv.DictReader(open(fn, 'r'), delimiter=str('\t'))
    for rec in rdr:
        if rec['id'].startswith('#'):
            continue
        yield rec
github CivicSpleen / ambry / test / bundles / cde.ca.gov / api-combined / bundle.py View on Github external
code_set = self.filesystem.read_yaml(
            self.config.build.codes_file.format(table_name))
            
        except IOError:
            # For the first run, when the field analysis hasn't yet been done. 
            from collections import defaultdict
            datatypes = defaultdict(lambda:{'type':'varchar'})
            code_set = defaultdict(lambda:[])
            
        with self.session as s:
            table = self.schema.add_table('api_{}'.format(group))

            with open(self.filesystem.path('meta',
                        'most_recent_fields_{}.csv'.format(group))) as f:
                reader = csv.DictReader(f)
        
                for row in reader:
                    
                    if row['name'] == 'id':
                        pk = True
                    else:
                        pk = False
                    
                    datatype = datatypes[row['name']]['type']
     
                    c = self.schema.add_column(table, row['name'],
                        description=row['description'],
                        datatype=datatype if not pk else 'integer',
                        is_primary_key = pk,
                        data = {'codes':','.join(code_set[row['name']]) 
                                if row['name'] in code_set else None}
github CivicSpleen / ambry / test / functional / test_schema.py View on Github external
rows = []
        with b.source_fs.open('source_schema.csv',encoding='utf8') as f:
            r = csv.reader(f)
            headers = next(r)

            for row in r:
                d = dict(zip(headers, row))
                d['dest_header'] = 'X'+d['source_header']
                rows.append(d)

        # Fails with: TypeError: must be unicode, not str
        # with b.source_fs.open('source_schema.csv', 'w',encoding='utf8') as f:

        path = b.source_fs.getsyspath('source_schema.csv')
        with open(path, 'w') as f:
            w = csv.DictWriter(f,fieldnames=headers)
            w.writeheader()
            for row in rows:
                w.writerow(row)

        b.sync_in()

        self.assertEqual([u'int', u'float', u'string', u'time', u'date'],
                         [ c.source_header for c in b.dataset.source_table('types1').columns])


        b.clean_ingested()
        b.ingest(tables=['types'])

        self.assertEqual([u'int', u'float', u'string', u'time', u'date'],
                         [ c.source_header for c in b.dataset.source_table('types1').columns])
github apache / airflow / tests / operators / test_operators.py View on Github external
conn.execute("""
                    INSERT INTO {} VALUES (
                        '{}', '{}'
                    )
                """.format(mysql_table, *db_record))

            from airflow.operators.mysql_to_hive import MySqlToHiveTransfer
            import unicodecsv as csv
            op = MySqlToHiveTransfer(
                task_id='test_m2h',
                hive_cli_conn_id='hive_cli_default',
                sql="SELECT * FROM {}".format(mysql_table),
                hive_table=hive_table,
                recreate=True,
                delimiter=",",
                quoting=csv.QUOTE_NONE,
                quotechar='',
                escapechar='@',
                dag=self.dag)
            op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)

            from airflow.hooks.hive_hooks import HiveServer2Hook
            hive_hook = HiveServer2Hook()
            result = hive_hook.get_records("SELECT * FROM {}".format(hive_table))
            self.assertEqual(result[0], db_record)
        finally:
            with hook.get_conn() as conn:
                conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
github BarraQDA / nvivotools / editNode.py View on Github external
verbosity, no_comments,
              comments, **dummy):

    try:

        # Read and skip comments at start of CSV file.
        csvcomments = ''
        if infile:
            csvFile = file(infile, 'r')

            while True:
                line = csvFile.readline()
                if line[:1] == '#':
                    csvcomments += line
                else:
                    csvfieldnames = next(unicodecsv.reader([line]))
                    break

        if not no_comments:
            logfilename = outfile.rsplit('.',1)[0] + '.log'
            if os.path.isfile(logfilename):
                incomments = open(logfilename, 'r').read()
            else:
                incomments = ''
            logfile = open(logfilename, 'w')
            logfile.write(comments)
            logfile.write(csvcomments)
            logfile.write(incomments)
            logfile.close()

        norm = NVivoNorm(outfile)
        norm.begin()
github BarraQDA / nvivotools / editTagging.py View on Github external
if tagging:
            exec("\
def evaltagging(sourceRow, csvRow):\n\
    return " + tagging, globals())

        # Read and skip comments at start of CSV file.
        csvcomments = ''
        if infile:
            csvFile = file(infile, 'r')

            while True:
                line = csvFile.readline()
                if line[:1] == '#':
                    csvcomments += line
                else:
                    csvfieldnames = next(unicodecsv.reader([line]))
                    break

        if not no_comments:
            logfilename = outfile.rsplit('.',1)[0] + '.log'
            if os.path.isfile(logfilename):
                incomments = open(logfilename, 'r').read()
            else:
                incomments = ''
            logfile = open(logfilename, 'w')
            logfile.write(comments.encode('utf8'))
            logfile.write(csvcomments)
            logfile.write(incomments)
            logfile.close()

        norm = NVivoNorm(outfile)
        norm.begin()
github neulab / cmu-ner / utils / segnerfts_2.py View on Github external
def load_gaz(gaz_fn):
    template = {'GPE': [], 'LOC': [], 'ORG': [], 'PER': []}
    gaz = {
        'amh': copy.copy(template),
        'eng': copy.copy(template),
        'deu': copy.copy(template),
        'orm': copy.copy(template),
        'som': copy.copy(template),
        'tir': copy.copy(template),
        }
    with open(gaz_fn, 'rb') as f:
        reader = csv.reader(f, encoding='utf-8')
        next(reader)
        for fields in reader:
            eng, lab, tir, tir_ipa, orm, orm_ipa, wik, id_, _ = fields
            if not lab:
                if len(eng.split()) == 1:
                    lab = 'GPE'
            if tir and lab:
                for v in get_variants(tir):
                    gaz['tir'][lab].append(v)
            if orm and lab:
                for v in get_variants(orm):
                    gaz['orm'][lab].append(v)
    return gaz
github Sefaria / Sefaria-Project / scripts / bios / parse_bios_tsv.py View on Github external
17: 'grandchild',
        18: 'childinlaw',
        19: 'student',
        20: 'member',
        21: 'correspondent',
        22: 'opposed',
        23: 'cousin',
    }

    tsv.seek(0)
    next(tsv)
    next(tsv)
    next(tsv)
    next(tsv)
    print "Adding relationships"
    for l in csv.reader(tsv, dialect="excel-tab"):
        key = l[0].encode('ascii', errors='ignore')
        p = Person().load({"key": key})
        for i, type in rowmap.items():
            if l[i]:
                for pkey in l[i].split(","):
                    pkey = pkey.strip().encode('ascii', errors='ignore')
                    print "{} - {}".format(key, pkey)
                    if Person().load({"key": pkey}):
                        pr = PersonRelationship({
                            "type": type,
                            "from_key": key,
                            "to_key": pkey
                        })
                        pr.save()
github SeldonIO / seldon-server / python / seldon / shell / import_items_utils.py View on Github external
def doAttrInserts(csv_file, db):
	inserts = defaultdict(list)
	insertNum = 0
	with open(csv_file) as csvFile:
		reader = unicodecsv.DictReader(csvFile,encoding='utf-8')
		for line in reader:
			for field_name in line:
				if field_name == 'id' or field_name== 'name':
					continue
				attr_type = available_attrs[str(field_name)][1]
				inserts[attr_type].append({'attr_name': field_name, 'value': line[field_name], 'id': line['id']})
				if len(inserts[attr_type]) > DB_BATCH_SIZE:
					insertNum+=1
					reallyDoInserts(inserts[attr_type], attr_insert_map[attr_type], insertNum, db)
					del inserts[attr_type]
	for index, insert_label in enumerate(inserts, start=1):
		insertNum+=1
		reallyDoInserts(inserts[insert_label], attr_insert_map[insert_label], insertNum, db)
	db.commit()
	print 'finished attribute inserts'
github openaddresses / openaddresses / scripts / es / gml_to_csv.py View on Github external
lookup_key = self.strip_hash.sub('', attrs['xlink:href'])

            if self.lookup['thoroughfare'].get(lookup_key) is not None:
                self.object['street'] = self.lookup['thoroughfare'].get(lookup_key)
            elif self.lookup['admin'].get(lookup_key) is not None:
                self.object['admin'] = self.lookup['admin'].get(lookup_key)
            elif self.lookup['postal'].get(lookup_key) is not None:
                self.object['postcode'] = self.lookup['postal'].get(lookup_key)

        # detect SRS, create CSV writer if necessary
        if name == 'gml:Point':
            self.srs = attrs.get('srsName', None)
            if self.srs is not None:
                self.srs = self.srs.split(':')[-1]
                if not self.srs in self.writers:
                    self.writers[self.srs] = csv.DictWriter(open(self.out_dir + 'es-%s.csv' % self.srs, 'a'), ('lon', 'lat', 'number', 'street', 'postcode', 'admin'))
                    self.writers[self.srs].writeheader()