Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_scheduled_component_ids(client, pipeline_id):
"""
Return ids of component objects of the pipeline which are in "SCHEDULED" state.
"""
paginator = client.get_paginator("query_objects")
response_iterator = paginator.paginate(
pipelineId=pipeline_id,
query={"selectors": [{"fieldName": "@status", "operator": {"type": "EQ", "values": ["SCHEDULED"]}}]},
sphere="COMPONENT",
)
return list(funcy.cat(response["ids"] for response in response_iterator))
def to_signal(ts_mapping):
start = min(fn.pluck(0, fn.cat(ts_mapping.values())))
assert start >= 0
signals = (signal(v, start, OO, tag=k) for k, v in ts_mapping.items())
return reduce(op.or_, signals)
@memoized_property
def individual_failures(self):
return cat(s.failures for s in self.failures)
def extract_queries(lines):
lines = remove(r'^(IMAGE:\d+|--[\w>-]+)$', lines)
queries = cat(re_iter(r'[\w+.-]+', l) for l in lines)
queries = remove(r'_at$|^\d+-\d+$', queries) # No such thing
return queries
# Clean unicode for mygene
# http://stackoverflow.com/questions/15321138/removing-unicode-u2026-like-characters
return [q.decode('unicode_escape').encode('ascii', 'ignore') for q in queries]
def newcols(df):
known_cols = set(cat(cols for _, cols in SCOPE_COLUMNS)) | TRASH_COLUMS
return lremove(known_cols, df.columns)
cprint('> Going to query %d genes in %s...' % (len(queries), scopes), 'cyan')
cprint('> sample queries: %s' % ', '.join(take(8, queries)), 'cyan')
# Read cache
prefix = '%s-%s:' % (SPECIE_PREFIXES[specie], PREFIXES[scopes])
keys = [prefix + q for q in queries]
res = {k: pickle.loads(v) if v else ''
for k, v in zip(queries, mget(keys))
if v is not None}
if res:
queries = set(queries) - set(res)
print(('Got %d from cache, %d queries left' % (len(res), len(queries))))
if queries:
mg = mygene.MyGeneInfo()
# Looks like sorting groups bad queries
data = cat(querymany(qs) for qs in chunks(500, tqdm(sorted(queries), leave=False)))
new = {str(item['query']): (item['entrezgene'], item['symbol'])
for item in data
if not item.get('notfound') and 'entrezgene' in item and 'symbol' in item}
res.update(new)
# Cache results and fails
pipe = redis_client.pipeline(transaction=False)
for k, v in new.items():
pipe.setex(prefix + k, CACHE_TIMEOUT, pickle.dumps(v, -1))
for k in queries - set(new):
pipe.setex(prefix + k, CACHE_TIMEOUT, '')
pipe.execute()
res = {k: v for k, v in res.items() if v != ''}
cprint('-> Got %d matches' % len(res), 'yellow')
return res