Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def make_empty(head):
"""Pre-assign dataframe to put values into"""
cats = {e['name']: e['symbols'] for e in head['schema']['fields']
if e['type'] == 'enum'}
df, arrs = empty(head['dtypes'].values(), head['nrows'],
cols=head['dtypes'], cats=cats)
for entry in head['schema']['fields']:
# temporary array for decimal
if entry.get('logicalType', None) == 'decimal':
if entry['type'] == 'fixed':
arrs[entry['name']] = np.empty(head['nrows'],
'S%s' % entry['size'])
else:
arrs[entry['name']] = np.empty(head['nrows'], "O")
return df, arrs
C avro reader can interpret them.
"""
with open_with(URL, 'rb') as f:
f.seek(start_byte)
if start_byte == 0:
header = read_header(f)
f.seek(header['header_size'])
data = header['head_bytes'] + f.read(length)
if nrows is None:
b = io.BytesIO(data)
header['blocks'] = []
scan_blocks(b, header, len(data))
nrows = sum(b['nrows'] for b in header['blocks'])
f = cyavro.AvroReader()
f.init_bytes(data)
df, arrs = empty(header['dtypes'].values(), nrows, cols=header['dtypes'])
f.init_reader()
f.init_buffers(10000)
for i in range(0, nrows, 10000):
d = f.read_chunk()
for c in d:
s = [f for f in header['schema']['fields'] if f['name'] == c][0]
if 'logicalType' in s:
df[c].values[i:i + 10000] = time_convert(d[c], s)
else:
df[c].values[i:i + 10000] = d[c]
return df