How to use the dask.config.set function in dask

To help you get started, we’ve selected a few dask examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / dask-gateway / tests / test_client.py View on Github external
def test_http_client_proxy_true(monkeypatch):
    http_proxy = "http://alice:password@host:80/path"
    proxy_sol = yarl.URL("http://host:80/path")
    proxy_auth_sol = aiohttp.BasicAuth("alice", "password")

    with dask.config.set(gateway__http_client__proxy=True):
        with monkeypatch.context() as m:
            for k in ["http_proxy", "https_proxy"]:
                m.delenv(k, raising=False)
                m.delenv(k.upper(), raising=False)

            with m.context() as m2:
                m2.setenv("http_proxy", http_proxy)

                # Properly inferred from environment
                g = Gateway("http://myhost:80")
                assert g._request_kwargs["proxy"] == proxy_sol
                assert g._request_kwargs["proxy_auth"] == proxy_auth_sol

                # No HTTPS proxy set
                g = Gateway("https://myhost:80")
                assert g._request_kwargs == {"proxy": None, "proxy_auth": None}
github Oslandia / osm-data-classification / osmdq / process-changesets-user-history.py View on Github external
return grp.compute()


if __name__ == '__main__':
    import os
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('analyze', help='type of the analyze: changeset or editor', type=str, choices=("changeset", "editor"))
    parser.add_argument('-i', '--input', required=True, help='name of the input csv file (changesets history)')
    parser.add_argument('-o', '--output', help='name of the output file')
    parser.add_argument('--blocksize', type=int, default=500, help='Blocksize of chunk (Dask) in MB')
    parser.add_argument('--num-workers', type=int, default=8, help='Number of workers (Dask)')
    args = parser.parse_args()

    dask.config.set(num_workers=args.num_workers)

    analyze = args.analyze

    if args.output is None:
        outpath = os.path.join('./data', 'output-extracts', 'all-' + analyze  + 's-by-user.csv')

    if not os.path.isfile(args.input):
        print("The file '{}' does not exist.".format(args.input))
        parser.exit(1)

    print("dask read the CSV '{}'".format(args.input))
    blocksize = args.blocksize * 1e6  # chunks in MB
    data = dd.read_csv(args.input, blocksize=blocksize, dtype=DTYPE)

    print("data processing")
    if analyze == 'changeset':
github CATIA-Systems / FMPy / fmpy / examples / parameter_variation.py View on Github external
def run_experiment(show_plot=True):

    if platform not in ['win32', 'win64']:
        raise Exception("Rectifier.fmu is only available for Windows")

    print("Parameter variation on %s:" % fmu_filename)
    print("  VAC", v_ac)
    print("  IDC", i_dc)

    if sync:
        dask.config.set(scheduler='synchronous')  # synchronized scheduler

    # download the FMU
    download_test_file('2.0', 'CoSimulation', 'Dymola', '2017', 'Rectifier', fmu_filename)

    # read the model description
    model_description = read_model_description(fmu_filename)

    # collect the value references for the variables to read / write
    vrs = {}
    for variable in model_description.modelVariables:
        vrs[variable.name] = variable.valueReference

    # extract the FMU
    unzipdir = fmpy.extract(fmu_filename)

    fmu_args = {'guid': model_description.guid,
github ssec / polar2grid / polar2grid / glue.py View on Github external
if args.log_fn is None:
        rename_log = True
        args.log_fn = glue_name + "_fail.log"
    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn)
    logging.getLogger('rasterio').setLevel(levels[min(2, args.verbosity)])
    sys.excepthook = create_exc_handler(LOG.name)
    if levels[min(3, args.verbosity)] > logging.DEBUG:
        import warnings
        warnings.filterwarnings("ignore")
    LOG.debug("Starting script with arguments: %s", " ".join(sys.argv))

    # Set up dask and the number of workers
    if args.num_workers:
        from multiprocessing.pool import ThreadPool
        dask.config.set(pool=ThreadPool(args.num_workers))

    # Parse provided files and search for files if provided directories
    scene_args['filenames'] = get_input_files(scene_args['filenames'])
    # Create a Scene, analyze the provided files
    LOG.info("Sorting and reading input files...")
    try:
        scn = Scene(**scene_args)
    except ValueError as e:
        LOG.error("{} | Enable debug message (-vvv) or see log file for details.".format(str(e)))
        LOG.debug("Further error information: ", exc_info=True)
        return -1
    except OSError:
        LOG.error("Could not open files. Enable debug message (-vvv) or see log file for details.")
        LOG.debug("Further error information: ", exc_info=True)
        return -1
github MPAS-Dev / MPAS-Analysis / mpas_analysis / ocean / time_series_ocean_regions.py View on Github external
endDate=endDate)
            datasets.append(dsTimeSlice)

        chunk = {'Time': timeChunk, 'nCells': cellsChunk}

        if config.has_option(sectionName, 'zmin'):
            config_zmin = config.getfloat(sectionName, 'zmin')
        else:
            config_zmin = None

        if config.has_option(sectionName, 'zmax'):
            config_zmax = config.getfloat(sectionName, 'zmax')
        else:
            config_zmax = None

        with dask.config.set(schedular='threads',
                             pool=ThreadPool(self.daskThreads)):
            # combine data sets into a single data set
            dsIn = xarray.concat(datasets, 'Time').chunk(chunk)

            chunk = {'nCells': cellsChunk}
            dsRestart = xarray.open_dataset(restartFileName)
            dsRestart = dsRestart.isel(Time=0).chunk(chunk)
            dsIn['areaCell'] = dsRestart.areaCell
            if 'landIceMask' in dsRestart:
                # only the region outside of ice-shelf cavities
                dsIn['openOceanMask'] = dsRestart.landIceMask == 0

            dsIn['zMid'] = compute_zmid(dsRestart.bottomDepth,
                                        dsRestart.maxLevelCell,
                                        dsRestart.layerThickness)
github romeric / florence / Florence / Solver / FEMSolver.py View on Github external
def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None):

        if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False:

            from multiprocessing.pool import ThreadPool
            try:
                import dask
                from dask.distributed import Client, LocalCluster
            except ImportError:
                raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'")

            dask.config.set(pool=ThreadPool(self.no_of_cpu_cores))
            # INITIALISE CLUSTER
            if scheduler_ip is None:
                cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None)
                client = Client(cluster)
            else:
                client = Client(scheduler_ip)

            self.dask_client = client

            self.is_dask_scheduler_initialised = True
github MPAS-Dev / MPAS-Analysis / mpas_analysis / ocean / time_series_antarctic_melt.py View on Github external
# The file already exists so load it
                dsOut = xarray.open_dataset(outFileName)
                if numpy.all(dsOut.Time.values == dsIn.Time.values):
                    return
                else:
                    self.logger.warning('File {} is incomplete. Deleting '
                                        'it.'.format(outFileName))
                    os.remove(outFileName)
        except OSError:
            # something is potentailly wrong with the file, so let's delete
            # it and try again
            self.logger.warning('Problems reading file {}. Deleting '
                                'it.'.format(outFileName))
            os.remove(outFileName)

        with dask.config.set(schedular='threads',
                             pool=ThreadPool(self.daskThreads)):
            # work on data from simulations
            freshwaterFlux = dsIn.timeMonthly_avg_landIceFreshwaterFlux.chunk(
                {'Time': 12})

            restartFileName = \
                mpasTimeSeriesTask.runStreams.readpath('restart')[0]

            dsRestart = xarray.open_dataset(restartFileName)
            areaCell = \
                dsRestart.landIceFraction.isel(Time=0) * dsRestart.areaCell

            regionMaskFileName = self.masksSubtask.maskFileName

            dsRegionMask = xarray.open_dataset(regionMaskFileName)
github pydata / xarray / asv_bench / benchmarks / dataset_io.py View on Github external
def time_load_dataset_scipy_with_block_chunks(self):
        with dask.config.set(scheduler="multiprocessing"):
            xr.open_dataset(
                self.filepath, engine="scipy", chunks=self.block_chunks
            ).load()
github jMetal / jMetalPy / examples / multiobjective / distributed_nsgaii.py View on Github external
g = constant * g
        g = g + 1.0

        return g

    def __eval_h(self, f: float, g: float) -> float:
        return 1.0 - sqrt(f / g)

    def get_name(self):
        return 'ZDT11'


if __name__ == '__main__':
    problem = ZDT11()

    dask.config.set(scheduler='threads', pool=ThreadPool(8))
    client = Client()

    algorithm = DistributedNSGAII(
        problem=problem,
        population_size=10,
        max_evaluations=100,
        mutation=Polynomial(probability=1.0 / problem.number_of_variables, distribution_index=20),
        crossover=SBX(probability=1.0, distribution_index=20),
        selection=BinaryTournamentSelection(comparator=RankingAndCrowdingDistanceComparator()),
        number_of_cores=8,
        client=client
    )

    progress_bar = ProgressBarObserver(max=100)
    algorithm.observable.register(observer=progress_bar)