Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
raise ValueError(
"Item wrong length {} instead of {}.".format(
len(key), len(self.index)
)
)
key = check_bool_indexer(self.index, key)
# We convert to a RangeIndex because getitem_row_array is expecting a list
# of indices, and RangeIndex will give us the exact indices of each boolean
# requested.
key = pandas.RangeIndex(len(self.index))[key]
if len(key):
return DataFrame(
query_compiler=self._query_compiler.getitem_row_array(key)
)
else:
return DataFrame(columns=self.columns)
else:
if any(k not in self.columns for k in key):
raise KeyError(
"{} not index".format(
str([k for k in key if k not in self.columns]).replace(",", "")
)
)
return DataFrame(
query_compiler=self._query_compiler.getitem_column_array(key)
)
def read_parquet(path, engine="auto", columns=None, **kwargs):
"""Load a parquet object from the file path, returning a DataFrame.
Args:
path: The filepath of the parquet file.
We only support local files for now.
engine: This argument doesn't do anything for now.
kwargs: Pass into parquet's read_pandas function.
"""
return DataFrame(
query_compiler=BaseFactory.read_parquet(
path=path, columns=columns, engine=engine, **kwargs
)
if self._is_multi_by:
return self._default_to_pandas(map_func, **kwargs)
if not isinstance(self._by, type(self._query_compiler)):
return self._apply_agg_function(map_func, drop=drop, **kwargs)
# For aggregations, pandas behavior does this for the result.
# For other operations it does not, so we wait until there is an aggregation to
# actually perform this operation.
if self._idx_name is not None and drop:
groupby_qc = self._query_compiler.drop(columns=[self._idx_name])
else:
groupby_qc = self._query_compiler
from .dataframe import DataFrame
return DataFrame(
query_compiler=groupby_qc.groupby_reduce(
self._by,
self._axis,
self._kwargs,
map_func,
kwargs,
reduce_func=reduce_func,
reduce_args=kwargs,
numeric_only=numeric_only,
)
)
)
)
elif all(isinstance(o, Series) for o in to_append):
self.name = None
for i in range(len(to_append)):
to_append[i].name = None
to_append[i] = to_append[i]._query_compiler
else:
# Matching pandas behavior of naming the Series columns 0
self.name = 0
for i in range(len(to_append)):
if isinstance(to_append[i], Series):
to_append[i].name = 0
to_append[i] = DataFrame(to_append[i])
return DataFrame(self.copy()).append(
to_append,
ignore_index=ignore_index,
verify_integrity=verify_integrity,
)
elif isinstance(to_append, Series):
self.name = None
to_append.name = None
to_append = [to_append._query_compiler]
elif isinstance(to_append, DataFrame):
self.name = 0
return DataFrame(self.copy()).append(
to_append, ignore_index=ignore_index, verify_integrity=verify_integrity
)
else:
raise TypeError(bad_type_msg.format(type(to_append)))
# If ignore_index is False, by definition the Index will be correct.
return (
(
k,
DataFrame(
query_compiler=self._query_compiler.getitem_row_array(
self._index.get_indexer_for(self._index_grouped[k].unique())
)
),
)
for k in (sorted(group_ids) if self._sort else group_ids)
)
else:
return (
(
k,
DataFrame(
query_compiler=self._query_compiler.getitem_column_array(
self._index_grouped[k].unique()
)
),
)
for k in (sorted(group_ids) if self._sort else group_ids)
)
def read_clipboard(sep=r"\s+", **kwargs): # pragma: no cover
_, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
kwargs.update(kwargs.pop("kwargs", {}))
return DataFrame(query_compiler=BaseFactory.read_clipboard(**kwargs))
def add_suffix(self, suffix):
"""Add a suffix to each of the column names.
Returns:
A new DataFrame containing the new column names.
"""
return DataFrame(query_compiler=self._query_compiler.add_suffix(suffix))
def transpose(self, *args, **kwargs):
"""Transpose columns and rows for the DataFrame.
Returns:
A new DataFrame transposed from this DataFrame.
"""
return DataFrame(query_compiler=self._query_compiler.transpose(*args, **kwargs))
PendingDeprecationWarning,
stacklevel=3,
)
elif len(key) != len(self.index):
raise ValueError(
"Item wrong length {} instead of {}.".format(
len(key), len(self.index)
)
)
key = check_bool_indexer(self.index, key)
# We convert to a RangeIndex because getitem_row_array is expecting a list
# of indices, and RangeIndex will give us the exact indices of each boolean
# requested.
key = pandas.RangeIndex(len(self.index))[key]
if len(key):
return DataFrame(
query_compiler=self._query_compiler.getitem_row_array(key)
)
else:
return DataFrame(columns=self.columns)
else:
if any(k not in self.columns for k in key):
raise KeyError(
"{} not index".format(
str([k for k in key if k not in self.columns]).replace(",", "")
)
)
return DataFrame(
query_compiler=self._query_compiler.getitem_column_array(key)
)
value = value.iloc[:, 0]
if len(self.index) == 0:
if isinstance(value, Series):
# TODO: Remove broadcast of Series
value = value._to_pandas()
try:
value = pandas.Series(value)
except (TypeError, ValueError, IndexError):
raise ValueError(
"Cannot insert into a DataFrame with no defined index "
"and a value that cannot be converted to a "
"Series"
)
new_index = value.index.copy()
new_columns = self.columns.insert(loc, column)
new_query_compiler = DataFrame(
value, index=new_index, columns=new_columns
)._query_compiler
elif len(self.columns) == 0 and loc == 0:
new_query_compiler = DataFrame(
data=value, columns=[column], index=self.index
)._query_compiler
else:
if not is_list_like(value):
value = np.full(len(self.index), value)
if not isinstance(value, pandas.Series) and len(value) != len(self.index):
raise ValueError("Length of values does not match length of index")
if not allow_duplicates and column in self.columns:
raise ValueError("cannot insert {0}, already exists".format(column))
if loc > len(self.columns):
raise IndexError(
"index {0} is out of bounds for axis 0 with size {1}".format(