Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if not automated:
print("Loading old results ..")
prev_results = load_or_create("results_file", lambda: None)
if no_resolution:
results = prev_results
embed()
return
results = Results()
files = []
for dir in dirs:
dir_files = sorted(glob('{}/*.ad?'.format(dir)))
if pattern:
dir_files = [f for f in dir_files if re.findall(pattern, f)]
dir_files = chunks(
chunk_size,
map(os.path.basename, dir_files)
)
files += [(dir, fs) for fs in dir_files]
project = os.path.abspath(project)
raw_results = pmap(
lambda (dir, f): FileResult.nameres_files(
dir, f, project=project, extra_args=extra_args
),
files, nb_threads=j
)
total_nb_files = sum(len(fs[1]) for fs in files)
def upgrade():
### commands auto generated by Alembic - please adjust! ###
table = op.create_table('free_email_providers',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('domain', sa.Unicode(length=255), nullable=False),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_free_email_providers_domain'), 'free_email_providers', ['domain'], unique=False)
### end Alembic commands ###
with open(os.path.join(PROJECT_ROOT, 'free.emails'), 'r') as f:
FREE_EMAILS_SET = set(r.strip() for r in f.readlines())
# Fill the data
for domains in chunks(1000, FREE_EMAILS_SET):
op.bulk_insert(table, [{'domain': d} for d in domains])
timeout=TIMEOUT)
# TODO: re-raise custom exception for 429 HTTP error
# for further handling (e.g. retry celery task)
response.raise_for_status()
result = response.json()
try:
status_url = result['links']['self']
logger.debug('Bulk update status: %s', status_url)
except KeyError:
logger.error('Weird response from Intercom: %r', result)
return result
with self.get_executor() as executor:
for _ in executor.map(request, chunks(CHUNK_SIZE, users_data)):
pass
def mget(keys):
return lcat(redis_client.mget(chunk) for chunk in chunks(10000, keys))
def word_tokenize(self, text):
"""Get list of string tokens from input string.
Args:
text: input string for tokenization
Yields:
token: str, non-whitespace tokens
"""
for token in split_possessive_markers(split_contractions(_html_tokenize(text))):
if self._max_characters_per_token is not None:
for token_chunk in funcy.chunks(self._max_characters_per_token, token):
yield token_chunk
else:
yield token
def input_batch_generator(self, tokenized_sentences, batch_size):
"""Yield inputs to ContextualizedEmbedding in batches with minimal padding for prediction.
Group sentences into batches in the order they're provided. Character-level padding is
determined by longest sentence in the batch. Yield one batch at a time.
Args:
tokenized_sentences: list of lists of str, each str a token
batch_size: int, number of sentences per batch generated
Returns:
Yields inputs to ContextualizedEmbedding one sentence batch at a time
"""
while True:
for chunk in chunks(batch_size, range(len(tokenized_sentences))):
selected_sentences = [tokenized_sentences[index] for index in chunk]
model_inputs = self.prepare_inputs_from_pretokenized(selected_sentences)
yield model_inputs
"""
Return dicts describing the current status of the pipelines.
"""
extract_fields = jmespath.compile(
"""
pipelineDescriptionList[].{
pipelineId: pipelineId,
name: name,
pipelineState: fields[?key == '@pipelineState'].stringValue|[0],
healthStatus: fields[?key == '@healthStatus'].stringValue|[0],
latestRunTime: fields[?key == '@latestRunTime'].stringValue|[0]
}
"""
)
chunk_size = 25 # Per AWS documentation, need to go in pages of 25 pipelines
for ids_chunk in funcy.chunks(chunk_size, pipeline_ids):
response = client.describe_pipelines(pipelineIds=ids_chunk)
values = extract_fields.search(response)
for value in values:
yield value
"""
client = boto3.client('datapipeline')
paginator = client.get_paginator('list_pipelines')
response_iterator = paginator.paginate()
all_pipeline_ids = response_iterator.search("pipelineIdList[].id")
if selection:
selected_pipeline_ids = [pipeline_id
for pipeline_id in all_pipeline_ids
for glob in selection
if fnmatch.fnmatch(pipeline_id, glob)]
else:
selected_pipeline_ids = list(all_pipeline_ids)
dw_pipelines = []
chunk_size = 25 # Per AWS documentation, need to go in pages of 25 pipelines
for ids_chunk in funcy.chunks(chunk_size, selected_pipeline_ids):
resp = client.describe_pipelines(pipelineIds=ids_chunk)
for description in resp['pipelineDescriptionList']:
for tag in description['tags']:
if tag['key'] == 'user:project' and tag['value'] == 'data-warehouse':
dw_pipelines.append(DataPipeline(description))
return sorted(dw_pipelines, key=attrgetter("name"))