Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
of nodes from the associated scheduler, and failing will count the local cpus.
If workdir is not given, will default to scheduler's workdir or $WORKDIR.
If scheduler is not given, will default to only run on the current node.
If pickle is not given, will attempt to minimially use TemporaryFiles.
For more details, see the docstrings for the "map" method, or the man page
for the associated launcher (e.g mpirun, mpiexec).
"""
Mapper.__init__(self, *args, **kwds)
self.scatter = bool(kwds.get('scatter', False)) #XXX: hang w/ nodes=1 ?
#self.nodes = kwds.get('nodes', None)
if not len(args) and 'nodes' not in kwds:
if self.scheduler:
self.nodes = self.scheduler.nodes
else:
self.nodes = cpu_count()
return
if AbstractWorkerPool.__init__.__doc__: __init__.__doc__ = AbstractWorkerPool.__init__.__doc__ + __init__.__doc__
to each element of the given arrays in
parallel in order with a progress bar.
"""
# Convert tuple to list
arrays = list(arrays)
# Extract kwargs
num_cpus = kwargs.get('num_cpus', None)
num_iter = kwargs.get('num_iter', 1)
# Determine num_cpus
if num_cpus is None:
num_cpus = cpu_count()
elif type(num_cpus) == float:
num_cpus = int(round(num_cpus * cpu_count()))
# Determine num_iter when at least one list is present
if any([type(array) == list for array in arrays]):
num_iter = max([len(array) for array in arrays if type(array) == list])
# Convert single variables to lists
# and confirm lists are same length
for i, array in enumerate(arrays):
if type(array) != list:
arrays[i] = [array for _ in range(num_iter)]
else:
assert len(array) == num_iter
# Create parallel iterator
map_type = 'imap' if ordered else 'uimap'
iterator = tqdm(getattr(Pool(num_cpus), map_type)(function, *arrays),
function(Callable): The function to apply to each element of the given Iterables.
iterables(Tuple[Iterable]): One or more Iterables containing the data to be mapped.
Returns:
A generator which will apply the function to each element of the given Iterables
in parallel in order with a progress bar.
"""
# Extract num_cpus
num_cpus = kwargs.pop('num_cpus', None)
# Determine num_cpus
if num_cpus is None:
num_cpus = cpu_count()
elif type(num_cpus) == float:
num_cpus = int(round(num_cpus * cpu_count()))
# Determine length of tqdm (equal to length of shortest iterable)
length = min(len(iterable) for iterable in iterables if isinstance(iterable, Sized))
# Create parallel generator
map_type = 'imap' if ordered else 'uimap'
pool = Pool(num_cpus)
map_func = getattr(pool, map_type)
for item in tqdm(map_func(function, *iterables), total=length, **kwargs):
yield item
pool.clear()
def __init__(self, *args, **kwds):
"""\nNOTE: if number of nodes is not given, will autodetect processors
"""
hasnodes = 'nodes' in kwds; arglen = len(args)
if 'ncpus' in kwds and (hasnodes or arglen):
msg = "got multiple values for keyword argument 'ncpus'"
raise TypeError(msg)
elif hasnodes: #XXX: multiple try/except is faster?
if arglen:
msg = "got multiple values for keyword argument 'nodes'"
raise TypeError(msg)
kwds['ncpus'] = kwds.pop('nodes')
elif arglen:
kwds['ncpus'] = args[0]
self.__nodes = kwds.get('ncpus', cpu_count())
# Create an identifier for the pool
self._id = 'pool'
# Create a new server if one isn't already initialized
self._serve()
return
if AbstractWorkerPool.__init__.__doc__: __init__.__doc__ = AbstractWorkerPool.__init__.__doc__ + __init__.__doc__
Returns:
An iterator which will apply the function
to each element of the given arrays in
parallel in order with a progress bar.
"""
# Convert tuple to list
arrays = list(arrays)
# Extract kwargs
num_cpus = kwargs.get('num_cpus', None)
num_iter = kwargs.get('num_iter', 1)
# Determine num_cpus
if num_cpus is None:
num_cpus = cpu_count()
elif type(num_cpus) == float:
num_cpus = int(round(num_cpus * cpu_count()))
# Determine num_iter when at least one list is present
if any([type(array) == list for array in arrays]):
num_iter = max([len(array) for array in arrays if type(array) == list])
# Convert single variables to lists
# and confirm lists are same length
for i, array in enumerate(arrays):
if type(array) != list:
arrays[i] = [array for _ in range(num_iter)]
else:
assert len(array) == num_iter
# Create parallel iterator
def __init__( self ,
ncpus = 'autodetect' ,
ppservers = () ,
silent = False , **kwargs ) :
if isinstance ( ncpus , int ) and 0 <= ncpus : self.ncpus = ncpus
else :
from pathos.helpers import cpu_count
self.ncpus = cpu_count ()
self.__ppservers = ()
self.__locals = ()
self.__pool = ()
import socket
local_host = socket.getfqdn ().lower()
from ostap.core.ostap_types import string_types
if isinstance ( ppservers , string_types ) and ppservers.lower() in ( 'config' , 'auto' ) :
from ostap.parallel.utils import get_ppservers
ppservers = get_ppservers ( local_host )
if ppservers :
## remove duplicates (if any)
def __init__(self, *args, **kwds):
"""\nNOTE: if number of nodes is not given, will autodetect processors
"""
hasnodes = 'nodes' in kwds; arglen = len(args)
if 'nthreads' in kwds and (hasnodes or arglen):
msg = "got multiple values for keyword argument 'nthreads'"
raise TypeError(msg)
elif hasnodes: #XXX: multiple try/except is faster?
if arglen:
msg = "got multiple values for keyword argument 'nodes'"
raise TypeError(msg)
kwds['nthreads'] = kwds.pop('nodes')
elif arglen:
kwds['nthreads'] = args[0]
self.__nodes = kwds.get('nthreads', cpu_count())
# Create an identifier for the pool
self._id = 'threads'
# Create a new server if one isn't already initialized
self._serve()
return
if AbstractWorkerPool.__init__.__doc__: __init__.__doc__ = AbstractWorkerPool.__init__.__doc__ + __init__.__doc__
try:
# Parse html with Beautiful Soup
soup = BeautifulSoup( r.content, "html.parser" )
text = soup.get_text("\n")
# Process Text
text = self._process_text(text)
text_path = os.path.join(self.txt_dir,fname + '.txt')
# Write to file
with codecs.open(text_path,'w',encoding='utf-8') as fout:
fout.write(text)
except BaseException as e:
print("{} parsing failed: {}".format(url,e))
ncpus = cpu_count() if cpu_count() <= 8 else 8;
pool = ProcessPool( ncpus )
pool.map( download_job,
iter_path_generator(index_path) )
if nodes in ['*']: nodes = 'autodetect'
if servers is None:
servers = tuple(sorted(self.__servers)) # no servers is ()
elif servers in ['*', 'autodetect']: servers = ('*',)
# if no server, create one
_pool = __STATE.get(self._id, None)
if not _pool:
_pool = pp.Server(ppservers=servers)
# convert to form returned by pp.Server, then compare
_auto = [('*',)] if _pool.auto_ppservers else []
_servers = sorted(_pool.ppservers + _auto)
_servers = tuple(':'.join((str(i) for i in tup)) for tup in _servers)
if servers != _servers: #XXX: assume servers specifies ports if desired
_pool = pp.Server(ppservers=servers)
# convert to form returned by pp.Server, then compare
_nodes = cpu_count() if nodes=='autodetect' else nodes
if _nodes != _pool.get_ncpus():
_pool.set_ncpus(nodes) # allows ncpus=0
# set (or 'repoint') the server
__STATE[self._id] = _pool
# set the 'self' internals
self.__nodes = None if nodes in ['autodetect'] else nodes
self.__servers = servers
return _pool
def _clear(self): #XXX: should be STATE method; use id