Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
(see `split_result_of_axis_func_pandas`).
len_of_left: The number of values in `partitions` that belong to the
left data set.
kwargs: A dictionary of keyword arguments.
partitions: All partitions that make up the full axis (row or column)
for both data sets.
Returns:
A list of Pandas DataFrames.
"""
lt_frame = pandas.concat(list(partitions[:len_of_left]), axis=axis, copy=False)
rt_frame = pandas.concat(list(partitions[len_of_left:]), axis=axis, copy=False)
result = func(lt_frame, rt_frame, **kwargs)
return [
df.copy() for df in split_result_of_axis_func_pandas(axis, num_splits, result)
]
lengths = list(lengths)
# We set lengths to None so we don't use the old lengths for the resulting partition
# layout. This is done if the number of splits is changing or we are told not to
# keep the old partitioning.
elif num_splits != len(partitions) or not maintain_partitioning:
lengths = None
else:
if axis == 0:
lengths = [len(part) for part in partitions]
if sum(lengths) != len(result):
lengths = None
else:
lengths = [len(part.columns) for part in partitions]
if sum(lengths) != len(result.columns):
lengths = None
return split_result_of_axis_func_pandas(axis, num_splits, result, lengths)
# if num_splits != len(partitions) or isinstance(result, pandas.Series):
# import pdb; pdb.set_trace()
# lengths = None
else:
if axis == 0:
lengths = [len(part) for part in partitions]
if sum(lengths) != len(result):
lengths = None
else:
lengths = [len(part.columns) for part in partitions]
if sum(lengths) != len(result.columns):
lengths = None
return [
df.copy()
for df in split_result_of_axis_func_pandas(axis, num_splits, result, lengths)
]
def _split_result_for_readers(axis, num_splits, df): # pragma: no cover
"""Splits the DataFrame read into smaller DataFrames and handles all edge cases.
Args:
axis: Which axis to split over.
num_splits: The number of splits to create.
df: The DataFrame after it has been read.
Returns:
A list of pandas DataFrames.
"""
splits = split_result_of_axis_func_pandas(axis, num_splits, df)
if not isinstance(splits, list):
splits = [splits]
return splits
func: The function to perform.
num_splits: The number of splits to return
(see `split_result_of_axis_func_pandas`).
len_of_left: The number of values in `partitions` that belong to the
left data set.
kwargs: A dictionary of keyword arguments.
partitions: All partitions that make up the full axis (row or column)
for both data sets.
Returns:
A list of Pandas DataFrames.
"""
lt_frame = pandas.concat(partitions[:len_of_left], axis=axis, copy=False)
rt_frame = pandas.concat(partitions[len_of_left:], axis=axis, copy=False)
result = func(lt_frame, rt_frame, **kwargs)
return split_result_of_axis_func_pandas(axis, num_splits, result)