Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if format == "7.3":
savemat(
str(name),
mdict,
long_field_names=True,
format="7.3",
delete_unused_variables=False,
oned_as=oned_as,
)
else:
savemat(str(name), mdict, long_field_names=True, oned_as=oned_as)
elif fmt == "parquet":
name = name.with_suffix(".parquet")
write_parquet(name, df)
else:
message = (
'Unsopported export type "{}". '
'Please select "csv", "excel", "hdf5", "mat" or "pandas"'
)
message.format(fmt)
logger.warning(message)
for i, key in enumerate(traffic):
edge = [i, nodes[key[1]], nodes[key[2]], key[0], traffic[key]]
edges.append(edge)
nodes_df = pd.DataFrame(np.arange(len(nodes)), columns=['id'])
nodes_df = nodes_df.set_index('id')
edges_df = pd.DataFrame(np.array(edges), columns=['id', 'source', 'target', 'protocol', 'weight'])
edges_df = edges_df.set_index('id')
edges_df['source'] = pd.to_numeric(edges_df['source'])
edges_df['target'] = pd.to_numeric(edges_df['target'])
edges_df['weight'] = pd.to_numeric(edges_df['weight'])
edges_df['protocol'] = edges_df['protocol'].astype('category')
fp.write('{}_nodes.parq'.format(prefix), nodes_df)
fp.write('{}_edges.parq'.format(prefix), edges_df)
----------------
file_name : str, optional
file name. If no name defined, it will use original mdf name and path with .parquet extension
"""
try:
from fastparquet import write as write_parquet
except ImportError:
warn('fastparquet not installed')
return
if file_name is None:
file_name = splitext(self.fileName)[0]
file_name = file_name + '.parquet'
for master_channel_name in self.masterChannelList:
frame = self.return_pandas_dataframe(master_channel_name)
if frame is not None:
write_parquet(file_name, frame, compression='GZIP')
edges = []
for i, key in enumerate(traffic):
edge = [i, nodes[key[1]], nodes[key[2]], key[0], traffic[key]]
edges.append(edge)
nodes_df = pd.DataFrame(np.arange(len(nodes)), columns=['id'])
nodes_df = nodes_df.set_index('id')
edges_df = pd.DataFrame(np.array(edges), columns=['id', 'source', 'target', 'protocol', 'weight'])
edges_df = edges_df.set_index('id')
edges_df['source'] = pd.to_numeric(edges_df['source'])
edges_df['target'] = pd.to_numeric(edges_df['target'])
edges_df['weight'] = pd.to_numeric(edges_df['weight'])
edges_df['protocol'] = edges_df['protocol'].astype('category')
fp.write('{}_nodes.parq'.format(prefix), nodes_df)
fp.write('{}_edges.parq'.format(prefix), edges_df)
def save_parquet(self, filename, df, col_names=None):
"""Write pandas DataFrame to parquet file."""
from fastparquet import write as fp_write
filename += ".parquet"
append = os.path.exists(filename)
if col_names is not None:
df.columns = col_names
fp_write(filename, df, row_group_offsets=len(df), compression="GZIP", file_scheme='hive', has_nulls=False, append=append)
def append(bucket, key1, key2, s3, output_filename):
s3_open = s3.open
path1='{}{}'.format(bucket,key1)
pf1 = ParquetFile(path1, open_with=s3_open)
df1=pf1.to_pandas()
path2='{}{}'.format(bucket,key2)
pf2 = ParquetFile(path2, open_with=s3_open)
df2=pf2.to_pandas()
data = df1.append(df2)
pwrite('{}{}'.format(bucket,output_filename), data, open_with=s3_open, compression='GZIP', append=False, has_nulls=True)
def save_parquet(self, filename, df, col_names=None):
"""Write pandas DataFrame to parquet file."""
from fastparquet import write as fp_write
filename += ".parquet"
append = os.path.exists(filename)
if col_names is not None:
df.columns = col_names
fp_write(filename, df, row_group_offsets=len(df), file_scheme='hive', has_nulls=False, append=append)
"""
print(_mywrap(msg))
if parquet_engine == 'pyarrow':
if i == 1:
if manual_schema:
schema = _create_parquet_schema(df.dtypes)
else:
schema = pa.Table.from_pandas(
df, preserve_index=False).schema
writer = pq.ParquetWriter(outfile, schema, flavor='spark')
writer.write_table(pa.Table.from_pandas(df, preserve_index=False))
elif parquet_engine == 'fastparquet':
if i == 1:
fp.write(
outfile,
df,
compression=compression_type,
has_nulls=False,
write_index=False,
object_encoding='utf8')
else:
fp.write(
outfile,
df,
compression=compression_type,
has_nulls=False,
write_index=False,
object_encoding='utf8',
append=True)