Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"quotechar": quotechar,
"quoting": quoting,
"escapechar": escapechar,
"comment": comment,
"encoding": encoding,
"dialect": dialect,
"error_bad_lines": error_bad_lines,
"warn_bad_lines": warn_bad_lines,
"skipfooter": skipfooter,
"doublequote": doublequote,
"delim_whitespace": delim_whitespace,
"low_memory": low_memory,
"memory_map": memory_map,
"float_precision": float_precision,
}
ErrorMessage.default_to_pandas("`read_csv`")
return cls._read(**kwargs)
Note: This function will replace all of the arguments passed to
methods of HDFStore with the pandas equivalent. It will convert
Modin DataFrame to pandas DataFrame, etc. Currently, pytables
does not accept Modin DataFrame objects, so we must convert to
pandas.
Returns:
A Modin DataFrame in place of a pandas DataFrame, or the same
return type as pandas.HDFStore.
"""
from .utils import to_pandas
# We don't want to constantly be giving this error message for
# internal methods.
if item[0] != "_":
ErrorMessage.default_to_pandas("`{}`".format(item))
args = [
to_pandas(arg) if isinstance(arg, DataFrame) else arg
for arg in args
]
kwargs = {
k: to_pandas(v) if isinstance(v, DataFrame) else v
for k, v in kwargs.items()
}
obj = super(HDFStore, self).__getattribute__(item)(*args, **kwargs)
if isinstance(obj, pandas.DataFrame):
return DataFrame(obj)
return obj
def from_records(
cls,
data,
index=None,
exclude=None,
columns=None,
coerce_float=False,
nrows=None,
): # pragma: no cover
ErrorMessage.default_to_pandas("`from_records`")
return from_pandas(
pandas.DataFrame.from_records(
data,
index=index,
exclude=exclude,
columns=columns,
coerce_float=coerce_float,
nrows=nrows,
)
def from_items(cls, items, columns=None, orient="columns"): # pragma: no cover
ErrorMessage.default_to_pandas("`from_items`")
return from_pandas(
pandas.DataFrame.from_items(items, columns=columns, orient=orient)
)
def ngroup(self, ascending=True):
if self._is_multi_by or isinstance(self._by, pandas.Grouper):
ErrorMessage.default_to_pandas(
"Gropuby with multiple columns or Grouper object"
)
return self._df._default_to_pandas(
lambda df: df.groupby(by=self._by).ngroup()
)
index = self._index if not self._axis else self._columns
return (
pandas.Series(index=index)
.groupby(by=self._by, **self._kwargs)
.ngroup(ascending)
)
na_values=None,
keep_default_na=True,
verbose=False,
parse_dates=False,
date_parser=None,
thousands=None,
comment=None,
skip_footer=0,
skipfooter=0,
convert_float=True,
mangle_dupe_cols=True,
**kwds
):
if skip_footer != 0:
skipfooter = skip_footer
ErrorMessage.default_to_pandas("`read_excel`")
intermediate = pandas.read_excel(
io,
sheet_name=sheet_name,
header=header,
names=names,
index_col=index_col,
usecols=usecols,
squeeze=squeeze,
dtype=dtype,
engine=engine,
converters=converters,
true_values=true_values,
false_values=false_values,
skiprows=skiprows,
nrows=nrows,
na_values=na_values,
@property
def _index_grouped(self):
if self._index_grouped_cache is None:
if self._is_multi_by or isinstance(self._by, pandas.Grouper):
# Because we are doing a collect (to_pandas) here and then groupby, we
# end up using pandas implementation. Add the warning so the user is
# aware.
ErrorMessage.catch_bugs_and_request_email(self._axis == 1)
ErrorMessage.default_to_pandas(
"Groupby with multiple columns or Grouper object"
)
self._index_grouped_cache = {
k: v.index
for k, v in self._df._query_compiler.to_pandas().groupby(
by=self._by
)
}
else:
if self._axis == 0:
self._index_grouped_cache = self._index.groupby(self._by)
else:
self._index_grouped_cache = self._columns.groupby(self._by)
return self._index_grouped_cache
Args:
path_or_buf: string, buffer or path object
Path to the file to open, or an open :class:`pandas.HDFStore` object.
kwargs: Pass into pandas.read_hdf function.
Returns:
DataFrame constructed from the h5 file.
"""
if cls.read_hdf_remote_task is None:
return super(RayIO, cls).read_hdf(path_or_buf, **kwargs)
format = cls._validate_hdf_format(path_or_buf=path_or_buf)
if format is None:
ErrorMessage.default_to_pandas(
"File format seems to be `fixed`. For better distribution consider saving the file in `table` format. "
"df.to_hdf(format=`table`)."
)
return cls.from_pandas(pandas.read_hdf(path_or_buf=path_or_buf, **kwargs))
columns = kwargs.get("columns", None)
if not columns:
start = kwargs.pop("start", None)
stop = kwargs.pop("stop", None)
empty_pd_df = pandas.read_hdf(path_or_buf, start=0, stop=0, **kwargs)
kwargs["start"] = start
kwargs["stop"] = stop
columns = empty_pd_df.columns
from modin.pandas import DEFAULT_NPARTITIONS
def read_clipboard(cls, sep=r"\s+", **kwargs): # pragma: no cover
ErrorMessage.default_to_pandas("`read_clipboard`")
return cls.from_pandas(pandas.read_clipboard(sep=sep, **kwargs))