How to use the dask.compute function in dask

To help you get started, we’ve selected a few dask examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github JDASoftwareGroup / kartothek / tests / io / dask / delayed / test_delete.py View on Github external
def _delete(*args, **kwargs):
    tasks = delete_dataset__delayed(*args, **kwargs)
    s = pickle.dumps(tasks, pickle.HIGHEST_PROTOCOL)
    tasks = pickle.loads(s)
    dask.compute(tasks)
github pySTEPS / pysteps / pysteps / nowcasts / steps.py View on Github external
# for time step t
            extrap_kwargs.update({"D_prev": D[j], "return_displacement": True})
            R_f_, D_ = extrapolator_method(R_c_, V_, 1, **extrap_kwargs)
            D[j] = D_
            R_f_ = R_f_[0]

            return R_f_

        res = []
        for j in range(n_ens_members):
            if not DASK_IMPORTED or n_ens_members == 1:
                res.append(worker(j))
            else:
                res.append(dask.delayed(worker)(j))

        R_f_ = dask.compute(*res, num_workers=num_ensemble_workers) \
            if DASK_IMPORTED and n_ens_members > 1 else res
        res = None

        if measure_time:
            print("%.2f seconds." % (time.time() - starttime))
        else:
            print("done.")

        if callback is not None:
            callback(np.stack(R_f_))
            R_f_ = None

        if return_output:
            for j in range(n_ens_members):
                R_f[j].append(R_f_[j])
github HDI-Project / SDV / sdv / benchmark.py View on Github external
datasets = get_available_demos().name
        else:
            datasets = os.listdir(datasets_path)

    if distributed:
        import dask

        global score_dataset
        score_dataset = dask.delayed(score_dataset)

    scores = list()
    for dataset in datasets:
        scores.append(score_dataset(dataset, datasets_path, timeout))

    if distributed:
        scores = dask.compute(*scores)

    return pd.DataFrame(scores)
github PyPSA / atlite / atlite / data.py View on Github external
Load the feature data for a given module.

    This get the data for a set of features from a module. All modules in
    `atlite.datasets` are allowed.
    """
    parameters = cutout.data.attrs
    lock = SerializableLock()
    datasets = []
    get_data = datamodules[module].get_data

    for feature in features:
        feature_data = get_data(cutout, feature, tmpdir=tmpdir, lock=lock, **parameters)
        datasets.append(feature_data)

    if len(datasets) >= 1 and isinstance(datasets[0], Delayed):
        datasets = dask.compute(*datasets)

    ds = xr.merge(datasets, compat='equals')
    for v in ds:
        ds[v].attrs['module'] = module
    return ds
github bluesky / databroker / databroker / intake_xarray_core / xarray_container.py View on Github external
-------
    dictionary with .z* keys for the various elements of the original dataset.
    """
    import dask
    s = ZarrSerialiser()
    try:
        attrs = ds.attrs.copy()
        ds.attrs.pop('_ARRAY_DIMENSIONS', None)  # zarr implementation detail
        x = ds.to_zarr(s, compute=False)
        x.dask = dict(x.dask)
        for k, v in x.dask.items():
            # replace the data writing funcs with no-op, so as not to waste
            # time on serialization, when all we want is metadata
            if isinstance(k, tuple) and k[0].startswith('store-'):
                x.dask[k] = (noop, ) + x.dask[k][1:]
        dask.compute(x, scheduler='threads')
    finally:
        ds.attrs = attrs
    return s
github dask / dask / dask / dataframe / io / json.py View on Github external
outfiles = open_files(
        url_path,
        "wt",
        encoding=encoding,
        errors=errors,
        name_function=kwargs.pop("name_function", None),
        num=df.npartitions,
        compression=compression,
        **(storage_options or {})
    )
    parts = [
        dask.delayed(write_json_partition)(d, outfile, kwargs)
        for outfile, d in zip(outfiles, df.to_delayed())
    ]
    if compute:
        dask.compute(parts)
        return [f.path for f in outfiles]
    else:
        return parts
github jlevy44 / PathFlowAI / pathflowai / utils.py View on Github external
patch_info=patch_info.loc[valid_patches]
			if not basic_preprocess:
				area_info=[]
				if segmentation:
					patch_info.loc[:,'annotation']='segment'
					for xs,ys in patch_info[['x','y']].values.tolist():
						xf=xs+patch_size
						yf=ys+patch_size
						#print(xs,ys)
						area_info.append(da.histogram(segmentation_mask[xs:xf,ys:yf],range=[0,target_class-1],bins=target_class)[0])
						#area_info.append(dask.delayed(seg_line)(xs,ys,patch_size,segmentation_mask,target_class))
				else:
					for xs,ys in patch_info[['x','y']].values.tolist():
						area_info.append([dask.delayed(is_coords_in_box)([xs,ys],patch_size,masks[annotation]) for annotation in annotations])
				#area_info=da.concatenate(area_info,axis=0).compute()
				area_info=np.array(dask.compute(*area_info)).astype(float)#da.concatenate(area_info,axis=0).compute(dtype=np.float16,scheduler='threaded')).astype(np.float16)
				print('Area Info Complete')
				area_info = area_info/(patch_size**2)
				patch_info.iloc[:,5:]=area_info
				#print(patch_info.dtypes)
				annot=list(patch_info.iloc[:,5:])
				patch_info.loc[:,'annotation']=np.vectorize(lambda i: annot[patch_info.iloc[i,5:].values.argmax()])(np.arange(patch_info.shape[0]))#patch_info[np.arange(target_class).astype(str).tolist()].values.argmax(1).astype(str)
				#client.close()
	except Exception as e:
		print(e)
		kargs['tries']+=1
		if kargs['tries']==max_tries:
			raise Exception('Exceeded past maximum number of tries.')
		else:
			print('Restarting preprocessing again.')
			extract_patch_information(**kargs)
	# print(patch_info)
github haruiz / CvStudio / view / widgets / gallery / gallery.py View on Github external
thumbnail_array = imutils.resize(image, width=150)
                    else:
                        thumbnail_array = imutils.resize(image, height=150)
                    thumbnail_array = cv2.cvtColor(thumbnail_array, cv2.COLOR_BGR2RGB)
                    thumbnail = GUIUtilities.array_to_qimage(thumbnail_array)
                    thumbnail = QPixmap.fromImage(thumbnail)
                    del thumbnail_array
                    del image
                    return item, h, w, thumbnail, os.path.getsize(file_path), False
                thumbnail = GUIUtilities.get_image("placeholder.png")
                thumbnail = thumbnail.scaledToHeight(100)
                h, w = thumbnail.height(), thumbnail.width()
                return item, h, w, thumbnail, 0, True

            delayed_tasks = [dask.delayed(create_thumbnail)(item) for item in items]
            images = dask.compute(*delayed_tasks)
            return images
github glotaran / pyglotaran / glotaran / analysis / optimize.py View on Github external
clp_labels, matrices, constraint_labels_and_matrices = \
                calculate_index_independend_ungrouped_matrices(
                    scheme, parameter
                )
            reduced_clp_labels, reduced_clps, residuals, _ = \
                residual_calculation.create_index_independend_ungrouped_residual(
                    scheme, parameter, bag, constraint_labels_and_matrices, residual_function
                )

    indices = None

    if model.grouped():
        indices = bag.map(lambda group: [d.index for d in group.descriptor])
        if model.index_dependend():
            groups, indices, clp_labels, matrices, reduced_clp_labels, reduced_clps, residuals = \
                    dask.compute(groups, indices, clp_labels, matrices,
                                 reduced_clp_labels, reduced_clps, residuals)
        else:
            groups, indices, reduced_clp_labels, reduced_clps, residuals = dask.compute(
                groups, indices, reduced_clp_labels, reduced_clps, residuals)

    for label, dataset in datasets.items():
        if model.grouped():
            if model.index_dependend():
                groups = bag.map(lambda group: [d.dataset for d in group.descriptor]).compute()
                for i, group in enumerate(groups):
                    if label in group:
                        group_index = group.index(label)
                        if 'matrix' not in dataset:
                            # we assume that the labels are the same, this might not be true in
                            # future models
                            dataset.coords['clp_label'] = clp_labels[i][group_index]