How to use the modin.data_management.factories.BaseFactory function in modin

To help you get started, we’ve selected a few modin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github modin-project / modin / modin / pandas / io.py View on Github external
def read_feather(path, columns=None, use_threads=True):
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
    return DataFrame(query_compiler=BaseFactory.read_feather(**kwargs))
github modin-project / modin / modin / data_management / factories.py View on Github external
class ExperimentalPandasOnRayFactory(ExperimentalBaseFactory, PandasOnRayFactory):

    from modin.experimental.engines.pandas_on_ray.io_exp import (
        ExperimentalPandasOnRayIO,
    )

    io_cls = ExperimentalPandasOnRayIO


class ExperimentalPandasOnPythonFactory(ExperimentalBaseFactory, PandasOnPythonFactory):

    pass


class ExperimentalPyarrowOnRayFactory(BaseFactory):  # pragma: no cover

    from modin.experimental.engines.pyarrow_on_ray.io import PyarrowOnRayIO

    io_cls = PyarrowOnRayIO
github modin-project / modin / modin / data_management / factories.py View on Github external
def to_pickle(cls, *args, **kwargs):
        return cls._determine_engine()._to_pickle(*args, **kwargs)

    @classmethod
    def _to_pickle(cls, *args, **kwargs):
        return cls.io_cls.to_pickle(*args, **kwargs)


class PandasOnRayFactory(BaseFactory):

    from modin.engines.ray.pandas_on_ray.io import PandasOnRayIO

    io_cls = PandasOnRayIO


class PandasOnPythonFactory(BaseFactory):

    from modin.engines.python.pandas_on_python.io import PandasOnPythonIO

    io_cls = PandasOnPythonIO


class PandasOnDaskFactory(BaseFactory):

    from modin.engines.dask.pandas_on_dask_futures.io import PandasOnDaskIO

    io_cls = PandasOnDaskIO


class PyarrowOnRayFactory(BaseFactory):

    if partition_format == "Pyarrow" and not os.environ.get(
github modin-project / modin / modin / pandas / io.py View on Github external
def _read(**kwargs):
    """Read csv file from local disk.
    Args:
        filepath_or_buffer:
              The filepath of the csv file.
              We only support local files for now.
        kwargs: Keyword arguments in pandas.read_csv
    """
    pd_obj = BaseFactory.read_csv(**kwargs)
    # This happens when `read_csv` returns a TextFileReader object for iterating through
    if isinstance(pd_obj, pandas.io.parsers.TextFileReader):
        reader = pd_obj.read
        pd_obj.read = lambda *args, **kwargs: DataFrame(
            query_compiler=reader(*args, **kwargs)
        )
        return pd_obj
    return DataFrame(query_compiler=pd_obj)
github modin-project / modin / modin / pandas / io.py View on Github external
def read_parquet(path, engine="auto", columns=None, **kwargs):
    """Load a parquet object from the file path, returning a DataFrame.

    Args:
        path: The filepath of the parquet file.
              We only support local files for now.
        engine: This argument doesn't do anything for now.
        kwargs: Pass into parquet's read_pandas function.
    """
    return DataFrame(
        query_compiler=BaseFactory.read_parquet(
            path=path, columns=columns, engine=engine, **kwargs
        )
github modin-project / modin / modin / data_management / factories.py View on Github external
class PandasOnRayFactory(BaseFactory):

    from modin.engines.ray.pandas_on_ray.io import PandasOnRayIO

    io_cls = PandasOnRayIO


class PandasOnPythonFactory(BaseFactory):

    from modin.engines.python.pandas_on_python.io import PandasOnPythonIO

    io_cls = PandasOnPythonIO


class PandasOnDaskFactory(BaseFactory):

    from modin.engines.dask.pandas_on_dask_futures.io import PandasOnDaskIO

    io_cls = PandasOnDaskIO


class PyarrowOnRayFactory(BaseFactory):

    if partition_format == "Pyarrow" and not os.environ.get(
        "MODIN_EXPERIMENTAL", False
    ):
        raise ImportError(
            "Pyarrow on Ray is only accessible through the experimental API.\nRun "
            "`import modin.experimental.pandas as pd` to use Pyarrow on Ray."
        )
github modin-project / modin / modin / pandas / utils.py View on Github external
def from_non_pandas(df, index, columns, dtype):
    new_qc = BaseFactory.from_non_pandas(df, index, columns, dtype)
    if new_qc is not None:
        from .dataframe import DataFrame

        return DataFrame(query_compiler=new_qc)
    return new_qc
github modin-project / modin / modin / data_management / factories.py View on Github external
return cls._determine_engine()._to_sql(*args, **kwargs)

    @classmethod
    def _to_sql(cls, *args, **kwargs):
        return cls.io_cls.to_sql(*args, **kwargs)

    @classmethod
    def to_pickle(cls, *args, **kwargs):
        return cls._determine_engine()._to_pickle(*args, **kwargs)

    @classmethod
    def _to_pickle(cls, *args, **kwargs):
        return cls.io_cls.to_pickle(*args, **kwargs)


class PandasOnRayFactory(BaseFactory):

    from modin.engines.ray.pandas_on_ray.io import PandasOnRayIO

    io_cls = PandasOnRayIO


class PandasOnPythonFactory(BaseFactory):

    from modin.engines.python.pandas_on_python.io import PandasOnPythonIO

    io_cls = PandasOnPythonIO


class PandasOnDaskFactory(BaseFactory):

    from modin.engines.dask.pandas_on_dask_futures.io import PandasOnDaskIO
github modin-project / modin / modin / pandas / utils.py View on Github external
def from_pandas(df):
    """Converts a pandas DataFrame to a Ray DataFrame.
    Args:
        df (pandas.DataFrame): The pandas DataFrame to convert.

    Returns:
        A new Ray DataFrame object.
    """
    from .dataframe import DataFrame

    return DataFrame(query_compiler=BaseFactory.from_pandas(df))
github modin-project / modin / modin / data_management / factories.py View on Github external
class PandasOnPythonFactory(BaseFactory):

    from modin.engines.python.pandas_on_python.io import PandasOnPythonIO

    io_cls = PandasOnPythonIO


class PandasOnDaskFactory(BaseFactory):

    from modin.engines.dask.pandas_on_dask_futures.io import PandasOnDaskIO

    io_cls = PandasOnDaskIO


class PyarrowOnRayFactory(BaseFactory):

    if partition_format == "Pyarrow" and not os.environ.get(
        "MODIN_EXPERIMENTAL", False
    ):
        raise ImportError(
            "Pyarrow on Ray is only accessible through the experimental API.\nRun "
            "`import modin.experimental.pandas as pd` to use Pyarrow on Ray."
        )


class ExperimentalBaseFactory(BaseFactory):
    @classmethod
    def _determine_engine(cls):
        factory_name = "Experimental{}On{}Factory".format(
            partition_format, execution_engine
        )