Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
else:
df = df[pf.columns + partition_on]
fmd = pf.fmd
i_offset = fastparquet.writer.find_max_part(fmd.row_groups)
if not ignore_divisions:
minmax = fastparquet.api.sorted_partitioned_columns(pf)
old_end = minmax[index_cols[0]]["max"][-1]
if divisions[0] < old_end:
raise ValueError(
"Appended divisions overlapping with the previous ones.\n"
"Previous: {} | New: {}".format(old_end, divisions[0])
)
else:
fmd = fastparquet.writer.make_metadata(
df._meta,
object_encoding=object_encoding,
index_cols=index_cols,
ignore_columns=partition_on,
**kwargs
)
i_offset = 0
filenames = ["part.%i.parquet" % (i + i_offset) for i in range(df.npartitions)]
write = delayed(_write_partition_fastparquet, pure=False)
writes = [
write(part, fs, path, filename, fmd, compression, partition_on)
for filename, part in zip(filenames, df.to_delayed())
]
i_offset = fastparquet.writer.find_max_part(fmd.row_groups)
if not ignore_divisions:
if not set(index_cols).intersection([division_info["name"]]):
ignore_divisions = True
if not ignore_divisions:
minmax = fastparquet.api.sorted_partitioned_columns(pf)
old_end = minmax[index_cols[0]]["max"][-1]
divisions = division_info["divisions"]
if divisions[0] < old_end:
raise ValueError(
"Appended divisions overlapping with previous ones."
"\n"
"Previous: {} | New: {}".format(old_end, divisions[0])
)
else:
fmd = fastparquet.writer.make_metadata(
df._meta,
object_encoding=object_encoding,
index_cols=index_cols,
ignore_columns=partition_on,
**kwargs
)
i_offset = 0
return (fmd, i_offset)