How to use the dask.array.from_array function in dask

To help you get started, we’ve selected a few dask examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / dask-ml / tests / test_pca.py View on Github external
def test_pca_inverse():
    # Test that the projection of data can be inverted
    rng = np.random.RandomState(0)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= 0.00001  # make middle component relatively small
    X += [5, 4, 3]  # make a large mean
    dX = da.from_array(X, chunks=(n // 2, p))

    # same check that we can find the original data from the transformed
    # signal (since the data is almost of rank n_components)
    pca = dd.PCA(n_components=2, svd_solver="full").fit(dX)
    Y = pca.transform(dX)
    Y_inverse = pca.inverse_transform(Y)
    assert_almost_equal(X, Y_inverse, decimal=3)

    # same as above with whitening (approximate reconstruction)
    for solver in solver_list:
        pca = dd.PCA(n_components=2, whiten=True, svd_solver=solver)
        pca.fit(dX)
        Y = pca.transform(dX)
        Y_inverse = pca.inverse_transform(Y)
        assert_eq(dX, Y_inverse, atol=1e-3)
github mpes-kit / mpes / mpes / fprocessing.py View on Github external
dfParts = []
            chunkSize = min(self.CHUNK_SIZE, self.nEvents / self.ncores)
            nPartitions = int(self.nEvents // chunkSize) + 1
            # Determine the column names
            gNames = kwds.pop('groupnames', self.getGroupNames(wexpr='Stream'))
            colNames = self.name2alias(gNames)

            for p in range(nPartitions): # Generate partitioned dataframe

                # Calculate the starting and ending index of every chunk of events
                eventIDStart = int(p * chunkSize)
                eventIDEnd = int(min(eventIDStart + chunkSize, self.nEvents))
                dfParts.append(d.delayed(self._assembleGroups)(gNames, amin=eventIDStart, amax=eventIDEnd, **kwds))

            # Construct eda (event dask array) and edf (event dask dataframe)
            eda = da.from_array(np.concatenate(d.compute(*dfParts), axis=1).T, chunks=self.CHUNK_SIZE)
            self.edf = ddf.from_dask_array(eda, columns=colNames)

            if ret == True:
                return self.edf

        # Delayed array for loading an HDF5 file of reasonable size (e.g. < 1GB)
        elif form == 'darray':

            gNames = kwds.pop('groupnames', self.getGroupNames(wexpr='Stream'))
            darray = d.delayed(self._assembleGroups)(gNames, amin=None, amax=None, timeStamps=timeStamps, ret='array', **kwds)


            if ret == True:
                return darray
github dask / dask-ml / dask_ml / preprocessing / data.py View on Github external
def _check_inputs(self, X, accept_sparse_negative=False, copy=False):
        kwargs = {}
        if SK_022:
            kwargs["copy"] = copy

        if isinstance(X, (pd.DataFrame, dd.DataFrame)):
            X = X.values
        if isinstance(X, np.ndarray):
            C = len(X) // min(multiprocessing.cpu_count(), 2)
            X = da.from_array(X, chunks=C)

        rng = check_random_state(self.random_state)
        # TODO: non-float dtypes?
        # TODO: sparse arrays?
        # TODO: mix of sparse, dense?
        sample = rng.uniform(size=(5, X.shape[1])).astype(X.dtype)
        super(QuantileTransformer, self)._check_inputs(
            sample, accept_sparse_negative=accept_sparse_negative, **kwargs
        )
        return X
github dfitzgerald3 / d2geo / attributes / NoiseReduction.py View on Github external
Optimizes chunk size in different orientations to facilitate rapid
            screening of algorithm output
        
        Returns
        -------
        darray : Dask Array
        chunk_init : tuple (len 3), chunk size before ghosting.  Used in select cases
        """
    
        # Compute chunk size and convert if not a Dask Array
        if not isinstance(darray, da.core.Array):  
            chunk_size = util.compute_chunk_size(darray.shape, 
                                               darray.dtype.itemsize, 
                                               kernel=kernel,
                                               preview=preview)
            darray = da.from_array(darray, chunks=chunk_size)
            chunks_init = darray.chunks            
                
        else:
            chunks_init = darray.chunks
        
        # Ghost Dask Array if operation specifies a kernel
        if kernel != None:
                hw = tuple(np.array(kernel) // 2)
                darray = da.ghost.ghost(darray, depth=hw, boundary='reflect')
                
        return(darray, chunks_init)
github dask / dask-ml / tests / test_partial.py View on Github external
def test_fit_shuffle_blocks():
    N = 10
    X = da.from_array(1 + np.arange(N).reshape(-1, 1), chunks=1)
    y = da.from_array(np.ones(N), chunks=1)
    classes = [0, 1]

    sgd = SGDClassifier(
        max_iter=5, random_state=0, fit_intercept=False, shuffle=False, tol=1e-3
    )

    sgd1 = fit(clone(sgd), X, y, random_state=0, classes=classes)
    sgd2 = fit(clone(sgd), X, y, random_state=42, classes=classes)
    assert len(sgd1.coef_) == len(sgd2.coef_) == 1
    assert not np.allclose(sgd1.coef_, sgd2.coef_)

    X, y = make_classification(random_state=0, chunks=20)
    sgd_a = fit(clone(sgd), X, y, random_state=0, classes=classes, shuffle_blocks=False)
    sgd_b = fit(
        clone(sgd), X, y, random_state=42, classes=classes, shuffle_blocks=False
github janpipek / physt / tests / test_dask.py View on Github external
def huge_2d_array():
    array = np.vstack(1000 * [np.arange(0, 1000)])
    return da.from_array(array, chunks=(500, 500))
github pytroll / satpy / satpy / readers / maia.py View on Github external
def read(self, filename):
        self.h5 = h5py.File(filename, 'r')
        missing = -9999.
        self.Lat = da.from_array(self.h5[u'DATA/Latitude'], chunks=CHUNK_SIZE) / 10000.
        self.Lon = da.from_array(self.h5[u'DATA/Longitude'], chunks=CHUNK_SIZE) / 10000.
        self.selected = (self.Lon > missing)
        self.file_content = {}
        for key in self.h5['DATA'].keys():
            self.file_content[key] = da.from_array(self.h5[u'DATA/' + key], chunks=CHUNK_SIZE)
        for key in self.h5[u'HEADER'].keys():
            self.file_content[key] = self.h5[u'HEADER/' + key][:]

        # Cloud Mask on pixel
        mask = 2**0 + 2**1 + 2**2
        lst = self.file_content[u'CloudMask'] & mask
        lst = lst / 2**0
        self.file_content[u"cma"] = lst

        # Cloud Mask confidence
        mask = 2**5 + 2**6
        lst = self.file_content[u'CloudMask'] & mask
github xgcm / xgcm / xgcm / mdsxray.py View on Github external
vardata = {}
            for k in varnames:
                vardata[k] = []
            for i in iters:
                for f in fnames:
                    try:
                        data = _read_mds(f, i, force_dict=True, endian=endian)
                        # this can screw up if the same variable appears in
                        # multiple diagnostic files
                        for k in data:
                            if k in varnames:
                                mwrap = MemmapArrayWrapper(data[k])
                                # for some reason, da.from_array does not
                                # necessarily give a unique name
                                # need to specify array name
                                myda = da.from_array(mwrap, mwrap.shape,
                                        name='%s_%010d' % (k, i))
                                vardata[k].append(myda)
                    except IOError:
                        # couldn't find the variable, remove it from the list
                        #print 'Removing %s from list (iter %g)' % (k, i)
                        varnames.remove(k)

            # final loop to create Variable objects
            for k in varnames:
                try:
                    dims, desc, units = _state_variables[k]
                except KeyError:
                    try:
                        dims, desc, units = _ptracers[k]
                    except KeyError:
                        dims, desc, units = diag_meta[k]
github dask / dask-image / dask_image / ndmeasure / _compat.py View on Github external
"""
    Creates a Dask array based on ``a``.

    Parameters
    ----------
    a : array-like
        Object to convert to a Dask Array.

    Returns
    -------
    a : Dask Array
    """

    if not isinstance(a, dask.array.Array):
        a = numpy.asarray(a)
        a = dask.array.from_array(a, a.shape)

    return a
github pytroll / satpy / satpy / readers / aapp_l1b.py View on Github external
def create_xarray(arr):
    res = da.from_array(arr, chunks=(CHUNK_SIZE, CHUNK_SIZE))
    res = xr.DataArray(res, dims=['y', 'x'])
    return res