Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_http_client_proxy_true(monkeypatch):
http_proxy = "http://alice:password@host:80/path"
proxy_sol = yarl.URL("http://host:80/path")
proxy_auth_sol = aiohttp.BasicAuth("alice", "password")
with dask.config.set(gateway__http_client__proxy=True):
with monkeypatch.context() as m:
for k in ["http_proxy", "https_proxy"]:
m.delenv(k, raising=False)
m.delenv(k.upper(), raising=False)
with m.context() as m2:
m2.setenv("http_proxy", http_proxy)
# Properly inferred from environment
g = Gateway("http://myhost:80")
assert g._request_kwargs["proxy"] == proxy_sol
assert g._request_kwargs["proxy_auth"] == proxy_auth_sol
# No HTTPS proxy set
g = Gateway("https://myhost:80")
assert g._request_kwargs == {"proxy": None, "proxy_auth": None}
return grp.compute()
if __name__ == '__main__':
import os
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('analyze', help='type of the analyze: changeset or editor', type=str, choices=("changeset", "editor"))
parser.add_argument('-i', '--input', required=True, help='name of the input csv file (changesets history)')
parser.add_argument('-o', '--output', help='name of the output file')
parser.add_argument('--blocksize', type=int, default=500, help='Blocksize of chunk (Dask) in MB')
parser.add_argument('--num-workers', type=int, default=8, help='Number of workers (Dask)')
args = parser.parse_args()
dask.config.set(num_workers=args.num_workers)
analyze = args.analyze
if args.output is None:
outpath = os.path.join('./data', 'output-extracts', 'all-' + analyze + 's-by-user.csv')
if not os.path.isfile(args.input):
print("The file '{}' does not exist.".format(args.input))
parser.exit(1)
print("dask read the CSV '{}'".format(args.input))
blocksize = args.blocksize * 1e6 # chunks in MB
data = dd.read_csv(args.input, blocksize=blocksize, dtype=DTYPE)
print("data processing")
if analyze == 'changeset':
def run_experiment(show_plot=True):
if platform not in ['win32', 'win64']:
raise Exception("Rectifier.fmu is only available for Windows")
print("Parameter variation on %s:" % fmu_filename)
print(" VAC", v_ac)
print(" IDC", i_dc)
if sync:
dask.config.set(scheduler='synchronous') # synchronized scheduler
# download the FMU
download_test_file('2.0', 'CoSimulation', 'Dymola', '2017', 'Rectifier', fmu_filename)
# read the model description
model_description = read_model_description(fmu_filename)
# collect the value references for the variables to read / write
vrs = {}
for variable in model_description.modelVariables:
vrs[variable.name] = variable.valueReference
# extract the FMU
unzipdir = fmpy.extract(fmu_filename)
fmu_args = {'guid': model_description.guid,
if args.log_fn is None:
rename_log = True
args.log_fn = glue_name + "_fail.log"
levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn)
logging.getLogger('rasterio').setLevel(levels[min(2, args.verbosity)])
sys.excepthook = create_exc_handler(LOG.name)
if levels[min(3, args.verbosity)] > logging.DEBUG:
import warnings
warnings.filterwarnings("ignore")
LOG.debug("Starting script with arguments: %s", " ".join(sys.argv))
# Set up dask and the number of workers
if args.num_workers:
from multiprocessing.pool import ThreadPool
dask.config.set(pool=ThreadPool(args.num_workers))
# Parse provided files and search for files if provided directories
scene_args['filenames'] = get_input_files(scene_args['filenames'])
# Create a Scene, analyze the provided files
LOG.info("Sorting and reading input files...")
try:
scn = Scene(**scene_args)
except ValueError as e:
LOG.error("{} | Enable debug message (-vvv) or see log file for details.".format(str(e)))
LOG.debug("Further error information: ", exc_info=True)
return -1
except OSError:
LOG.error("Could not open files. Enable debug message (-vvv) or see log file for details.")
LOG.debug("Further error information: ", exc_info=True)
return -1
endDate=endDate)
datasets.append(dsTimeSlice)
chunk = {'Time': timeChunk, 'nCells': cellsChunk}
if config.has_option(sectionName, 'zmin'):
config_zmin = config.getfloat(sectionName, 'zmin')
else:
config_zmin = None
if config.has_option(sectionName, 'zmax'):
config_zmax = config.getfloat(sectionName, 'zmax')
else:
config_zmax = None
with dask.config.set(schedular='threads',
pool=ThreadPool(self.daskThreads)):
# combine data sets into a single data set
dsIn = xarray.concat(datasets, 'Time').chunk(chunk)
chunk = {'nCells': cellsChunk}
dsRestart = xarray.open_dataset(restartFileName)
dsRestart = dsRestart.isel(Time=0).chunk(chunk)
dsIn['areaCell'] = dsRestart.areaCell
if 'landIceMask' in dsRestart:
# only the region outside of ice-shelf cavities
dsIn['openOceanMask'] = dsRestart.landIceMask == 0
dsIn['zMid'] = compute_zmid(dsRestart.bottomDepth,
dsRestart.maxLevelCell,
dsRestart.layerThickness)
def LaunchDaskDistributedClient(self, scheduler_ip=None, scheduler_port=None):
if self.parallel and self.parallel_model == "dask" and self.is_dask_scheduler_initialised is False:
from multiprocessing.pool import ThreadPool
try:
import dask
from dask.distributed import Client, LocalCluster
except ImportError:
raise ImportError("dask is not installed. Install it 'using pip install dask[complete]'")
dask.config.set(pool=ThreadPool(self.no_of_cpu_cores))
# INITIALISE CLUSTER
if scheduler_ip is None:
cluster = LocalCluster(n_workers=self.no_of_cpu_cores, processes=False, threads_per_worker=None)
client = Client(cluster)
else:
client = Client(scheduler_ip)
self.dask_client = client
self.is_dask_scheduler_initialised = True
# The file already exists so load it
dsOut = xarray.open_dataset(outFileName)
if numpy.all(dsOut.Time.values == dsIn.Time.values):
return
else:
self.logger.warning('File {} is incomplete. Deleting '
'it.'.format(outFileName))
os.remove(outFileName)
except OSError:
# something is potentailly wrong with the file, so let's delete
# it and try again
self.logger.warning('Problems reading file {}. Deleting '
'it.'.format(outFileName))
os.remove(outFileName)
with dask.config.set(schedular='threads',
pool=ThreadPool(self.daskThreads)):
# work on data from simulations
freshwaterFlux = dsIn.timeMonthly_avg_landIceFreshwaterFlux.chunk(
{'Time': 12})
restartFileName = \
mpasTimeSeriesTask.runStreams.readpath('restart')[0]
dsRestart = xarray.open_dataset(restartFileName)
areaCell = \
dsRestart.landIceFraction.isel(Time=0) * dsRestart.areaCell
regionMaskFileName = self.masksSubtask.maskFileName
dsRegionMask = xarray.open_dataset(regionMaskFileName)
def time_load_dataset_scipy_with_block_chunks(self):
with dask.config.set(scheduler="multiprocessing"):
xr.open_dataset(
self.filepath, engine="scipy", chunks=self.block_chunks
).load()
g = constant * g
g = g + 1.0
return g
def __eval_h(self, f: float, g: float) -> float:
return 1.0 - sqrt(f / g)
def get_name(self):
return 'ZDT11'
if __name__ == '__main__':
problem = ZDT11()
dask.config.set(scheduler='threads', pool=ThreadPool(8))
client = Client()
algorithm = DistributedNSGAII(
problem=problem,
population_size=10,
max_evaluations=100,
mutation=Polynomial(probability=1.0 / problem.number_of_variables, distribution_index=20),
crossover=SBX(probability=1.0, distribution_index=20),
selection=BinaryTournamentSelection(comparator=RankingAndCrowdingDistanceComparator()),
number_of_cores=8,
client=client
)
progress_bar = ProgressBarObserver(max=100)
algorithm.observable.register(observer=progress_bar)