Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"Previous: {} | New: {}".format(pf.columns, list(df.columns))
)
elif (pd.Series(pf.dtypes).loc[pf.columns] != df[pf.columns].dtypes).any():
raise ValueError(
"Appended dtypes differ.\n{}".format(
set(pf.dtypes.items()) ^ set(df.dtypes.iteritems())
)
)
else:
df = df[pf.columns + partition_on]
fmd = pf.fmd
i_offset = fastparquet.writer.find_max_part(fmd.row_groups)
if not ignore_divisions:
minmax = fastparquet.api.sorted_partitioned_columns(pf)
old_end = minmax[index_cols[0]]["max"][-1]
if divisions[0] < old_end:
raise ValueError(
"Appended divisions overlapping with the previous ones.\n"
"Previous: {} | New: {}".format(old_end, divisions[0])
)
else:
fmd = fastparquet.writer.make_metadata(
df._meta,
object_encoding=object_encoding,
index_cols=index_cols,
ignore_columns=partition_on,
**kwargs
)
i_offset = 0
elif (pd.Series(pf.dtypes).loc[pf.columns] != df[pf.columns].dtypes).any():
raise ValueError(
"Appended dtypes differ.\n{}".format(
set(pf.dtypes.items()) ^ set(df.dtypes.iteritems())
)
)
else:
df = df[pf.columns + partition_on]
fmd = pf.fmd
i_offset = fastparquet.writer.find_max_part(fmd.row_groups)
if not ignore_divisions:
if not set(index_cols).intersection([division_info["name"]]):
ignore_divisions = True
if not ignore_divisions:
minmax = fastparquet.api.sorted_partitioned_columns(pf)
old_end = minmax[index_cols[0]]["max"][-1]
divisions = division_info["divisions"]
if divisions[0] < old_end:
raise ValueError(
"Appended divisions overlapping with previous ones."
"\n"
"Previous: {} | New: {}".format(old_end, divisions[0])
)
else:
fmd = fastparquet.writer.make_metadata(
df._meta,
object_encoding=object_encoding,
index_cols=index_cols,
ignore_columns=partition_on,
**kwargs
)
getattr(pf, "tz", {}),
)
for i, rg in enumerate(rgs)
}
if not dsk:
# empty dataframe
dsk = {(name, 0): meta}
divisions = (None, None)
return out_type(dsk, name, meta, divisions)
if index_names and infer_divisions is not False:
index_name = meta.index.name
try:
# is https://github.com/dask/fastparquet/pull/371 available in
# current fastparquet installation?
minmax = fastparquet.api.sorted_partitioned_columns(pf, filters)
except TypeError:
minmax = fastparquet.api.sorted_partitioned_columns(pf)
if index_name in minmax:
divisions = minmax[index_name]
divisions = divisions["min"] + [divisions["max"][-1]]
else:
if infer_divisions is True:
raise ValueError(
(
"Unable to infer divisions for index of '{index_name}'"
" because it is not known to be "
"sorted across partitions"
).format(index_name=index_name)
)
divisions = (None,) * (len(rgs) + 1)
for i, rg in enumerate(rgs)
}
if not dsk:
# empty dataframe
dsk = {(name, 0): meta}
divisions = (None, None)
return out_type(dsk, name, meta, divisions)
if index_names and infer_divisions is not False:
index_name = meta.index.name
try:
# is https://github.com/dask/fastparquet/pull/371 available in
# current fastparquet installation?
minmax = fastparquet.api.sorted_partitioned_columns(pf, filters)
except TypeError:
minmax = fastparquet.api.sorted_partitioned_columns(pf)
if index_name in minmax:
divisions = minmax[index_name]
divisions = divisions["min"] + [divisions["max"][-1]]
else:
if infer_divisions is True:
raise ValueError(
(
"Unable to infer divisions for index of '{index_name}'"
" because it is not known to be "
"sorted across partitions"
).format(index_name=index_name)
)
divisions = (None,) * (len(rgs) + 1)
else:
if infer_divisions is True: