How to use the yellowbrick.exceptions.DatasetsError function in yellowbrick

To help you get started, we’ve selected a few yellowbrick examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

DistrictDataLabs / yellowbrick / tests / test_datasets / test_loaders.py View on Github

def assert_valid_numpy(data):
    __tracebackhide__ = True
    X, y = data.to_numpy()
    assert isinstance(X, np.ndarray), "X is not a numpy array"
    assert isinstance(y, np.ndarray), "y is not a numpy array"
    assert X.ndim == 2 and y.ndim == 1, "X and y dimensions are incorrect"

    # Patch pandas and make defaults assertions
    X, y = data.to_data()
    assert isinstance(X, np.ndarray), "to_data does not return numpy"
    assert isinstance(y, np.ndarray), "to_data does not return numpy"

    with pytest.raises(DatasetsError):
        data.to_pandas(), "exception not raised when pandas unavailable"

DistrictDataLabs / yellowbrick / tests / test_datasets / test_path.py View on Github

def test_missing_find_dataset_path(tmpdir):
    """
    Test find_dataset_path when the dataset does not exist
    """
    data_home = tmpdir.mkdir("fixtures")

    # When the data directory doesn't exist
    with pytest.raises(DatasetsError):
        find_dataset_path("foo", data_home=str(data_home))

    # When the data directory exists but no file is in the directory
    foo = data_home.mkdir("foo")
    with pytest.raises(DatasetsError):
        find_dataset_path("foo", data_home=str(data_home))

    # When the specified file doesn't exist
    fpath = foo.join("foo.csv")
    fpath.write("1,2,3")
    with pytest.raises(DatasetsError):
        find_dataset_path("foo", data_home=str(data_home), ext=".npz")

DistrictDataLabs / yellowbrick / yellowbrick / datasets / base.py View on Github

def to_pandas(self):
        """
        Returns the dataset as two pandas objects: X and y.

        Returns
        -------
        X : DataFrame with shape (n_instances, n_features)
            A pandas DataFrame containing feature data and named columns.

        y : Series with shape (n_instances,)
            A pandas Series containing target data and an index that matches
            the feature DataFrame index.
        """
        # Ensure the metadata is valid before continuing
        if self.meta is None:
            raise DatasetsError(
                (
                    "the downloaded dataset was improperly packaged without meta.json "
                    "- please report this bug to the Yellowbrick maintainers!"
                )
            )

        if "features" not in self.meta or "target" not in self.meta:
            raise DatasetsError(
                (
                    "the downloaded dataset was improperly packaged without features "
                    "or target - please report this bug to the Yellowbrick maintainers!"
                )
            )

        # Load data frame and return features and target
        # TODO: Return y as None if there is no self.meta["target"]

DistrictDataLabs / yellowbrick / yellowbrick / datasets / base.py View on Github

def to_dataframe(self):
        """
        Returns the entire dataset as a single pandas DataFrame.

        Returns
        -------
        df : DataFrame with shape (n_instances, n_columns)
            A pandas DataFrame containing the complete original data table
            including all targets (specified by the meta data) and all
            features (including those that might have been filtered out).
        """
        if pd is None:
            raise DatasetsError(
                "pandas is required to load DataFrame, it can be installed with pip"
            )

        path = find_dataset_path(self.name, ext=".csv.gz", data_home=self.data_home)
        return pd.read_csv(path, compression="gzip")

DistrictDataLabs / yellowbrick / yellowbrick / datasets / path.py View on Github

if fname is None:
        if ext is None:
            path = os.path.join(data_home, dataset)
        else:
            path = os.path.join(data_home, dataset, "{}{}".format(dataset, ext))
    else:
        path = os.path.join(data_home, dataset, fname)

    # Determine if the path exists
    if not os.path.exists(path):

        # Suppress exceptions if required
        if not raises:
            return None

        raise DatasetsError(
            ("could not find dataset at {} - does it need to be downloaded?").format(
                path
            )
        )

    return path

DistrictDataLabs / yellowbrick / yellowbrick / datasets / base.py View on Github

y : Series with shape (n_instances,)
            A pandas Series containing target data and an index that matches
            the feature DataFrame index.
        """
        # Ensure the metadata is valid before continuing
        if self.meta is None:
            raise DatasetsError(
                (
                    "the downloaded dataset was improperly packaged without meta.json "
                    "- please report this bug to the Yellowbrick maintainers!"
                )
            )

        if "features" not in self.meta or "target" not in self.meta:
            raise DatasetsError(
                (
                    "the downloaded dataset was improperly packaged without features "
                    "or target - please report this bug to the Yellowbrick maintainers!"
                )
            )

        # Load data frame and return features and target
        # TODO: Return y as None if there is no self.meta["target"]
        df = self.to_dataframe()
        return df[self.meta["features"]], df[self.meta["target"]]

DistrictDataLabs / yellowbrick / yellowbrick / datasets / base.py View on Github

def to_numpy(self):
        """
        Returns the dataset as two numpy arrays: X and y.

        Returns
        -------
        X : array-like with shape (n_instances, n_features)
            A numpy array describing the instance features.

        y : array-like with shape (n_instances,)
            A numpy array describing the target vector.
        """
        path = find_dataset_path(self.name, ext=".npz", data_home=self.data_home)
        with np.load(path, allow_pickle=False) as npf:
            if "X" not in npf or "y" not in npf:
                raise DatasetsError(
                    (
                        "the downloaded dataset was improperly packaged without numpy "
                        "arrays - please report this bug to the Yellowbrick maintainers!"
                    )
                )

            # TODO: How to handle the case where y is None?
            return npf["X"], npf["y"]

DistrictDataLabs / yellowbrick / yellowbrick / datasets / download.py View on Github

Extract the archive file after downloading it
    """
    data_home = get_data_home(data_home)

    # Get the name of the file from the URL
    basename = os.path.basename(url)
    name, _ = os.path.splitext(basename)

    # Get the archive and data directory paths
    archive = os.path.join(data_home, basename)
    datadir = os.path.join(data_home, name)

    # If the archive exists cleanup or raise override exception
    if os.path.exists(archive):
        if not replace:
            raise DatasetsError((
                "dataset already exists at {}, set replace=False to overwrite"
            ).format(archive))

        cleanup_dataset(name, data_home=data_home)

    # Create the output directory if it does not exist
    if not os.path.exists(datadir):
        os.mkdir(datadir)

    # Fetch the response in a streaming fashion and write it to disk.
    response = urlopen(url)

    with open(archive, 'wb') as f:
        while True:
            chunk = response.read(CHUNK)
            if not chunk:

How to use the yellowbrick.exceptions.DatasetsError function in yellowbrick

To help you get started, we’ve selected a few yellowbrick examples, based on popular ways it is used in public projects.

yellowbrick

Package Health Score

Popular yellowbrick functions

Similar packages