Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
word_counter, char_counter, lower_word_counter = Counter(), Counter(), Counter()
start_ai = int(round(len(source_data['data']) * start_ratio))
stop_ai = int(round(len(source_data['data']) * stop_ratio))
for ai, article in enumerate(tqdm(source_data['data'][start_ai:stop_ai])):
xp, cxp = [], []
pp = []
x.append(xp)
cx.append(cxp)
p.append(pp)
for pi, para in enumerate(article['paragraphs']):
# wordss
context = para['context']
context = context.replace("''", '" ')
context = context.replace("``", '" ')
xi = list(map(word_tokenize, sent_tokenize(context)))
xi = [process_tokens(tokens) for tokens in xi] # process tokens
# given xi, add chars
cxi = [[list(xijk) for xijk in xij] for xij in xi]
xp.append(xi)
cxp.append(cxi)
pp.append(context)
for xij in xi:
for xijk in xij:
word_counter[xijk] += len(para['qas'])
lower_word_counter[xijk.lower()] += len(para['qas'])
for xijkl in xijk:
char_counter[xijkl] += len(para['qas'])
rxi = [ai, pi]
assert len(x) - 1 == ai
assert len(x[ai]) - 1 == pi
stop_ai = int(round(len(source_data) * stop_ratio))
answer_counter = Counter()
N = 0
for ai, article in enumerate(tqdm(source_data[start_ai:stop_ai])):
xp, cxp = [], []
pp = []
x.append(xp)
cx.append(cxp)
p.append(pp)
for pi, para in enumerate(article['paragraphs']):
context = para['context']
context = context.replace("''", '" ')
context = context.replace("``", '" ')
xi = list(map(word_tokenize, sent_tokenize(context)))
xi = [process_tokens(tokens) for tokens in xi] # process tokens
cxi = [[list(xijk) for xijk in xij] for xij in xi]
xp.append(xi)
cxp.append(cxi)
pp.append(context)
for xij in xi:
for xijk in xij:
word_counter[xijk] += len(para['qas'])
lower_word_counter[xijk.lower()] += len(para['qas'])
for xijkl in xijk:
char_counter[xijkl] += len(para['qas'])
rxi = [ai, pi]
assert len(x) - 1 == ai
assert len(x[ai]) - 1 == pi
for qa in para['qas']:
word_counter, char_counter, lower_word_counter = Counter(), Counter(), Counter()
start_ai = int(round(len(source_data['data']) * start_ratio))
stop_ai = int(round(len(source_data['data']) * stop_ratio))
for ai, article in enumerate(tqdm(source_data['data'][start_ai:stop_ai])):
xp, cxp = [], []
pp = []
x.append(xp)
cx.append(cxp)
p.append(pp)
for pi, para in enumerate(article['paragraphs']):
# wordss
context = para['context']
context = context.replace("''", '" ')
context = context.replace("``", '" ')
xi = list(map(word_tokenize, sent_tokenize(context)))
xi = [process_tokens(tokens) for tokens in xi] # process tokens
# given xi, add chars
cxi = [[list(xijk) for xijk in xij] for xij in xi]
xp.append(xi)
cxp.append(cxi)
pp.append(context)
for xij in xi:
for xijk in xij:
word_counter[xijk] += len(para['qas'])
lower_word_counter[xijk.lower()] += len(para['qas'])
for xijkl in xijk:
char_counter[xijkl] += len(para['qas'])
rxi = [ai, pi]
assert len(x) - 1 == ai
assert len(x[ai]) - 1 == pi
word_counter, char_counter, lower_word_counter = Counter(), Counter(), Counter()
start_ai = int(round(len(source_data['data']) * start_ratio))
stop_ai = int(round(len(source_data['data']) * stop_ratio))
for ai, article in enumerate(tqdm(source_data['data'][start_ai:stop_ai])):
xp, cxp = [], []
pp = []
x.append(xp)
cx.append(cxp)
p.append(pp)
for pi, para in enumerate(article['paragraphs']):
# wordss
context = para['context']
context = context.replace("''", '" ')
context = context.replace("``", '" ')
xi = list(map(word_tokenize, sent_tokenize(context)))
xi = [process_tokens(tokens) for tokens in xi] # process tokens
# given xi, add chars
cxi = [[list(xijk) for xijk in xij] for xij in xi]
xp.append(xi)
cxp.append(cxi)
pp.append(context)
for xij in xi:
for xijk in xij:
word_counter[xijk] += len(para['qas'])
lower_word_counter[xijk.lower()] += len(para['qas'])
for xijkl in xijk:
char_counter[xijkl] += len(para['qas'])
rxi = [ai, pi]
assert len(x) - 1 == ai
assert len(x[ai]) - 1 == pi
p.append(pp)
xp, cxp, contexts, c_questions = [], [], [], []
x.append(xp)
cx.append(cxp)
contextss.append(contexts)
context_questions.append(c_questions)
title = "[" + str(ai).zfill(2) + "] " + article['title'].replace('_', ' ')
titles.append(title)
for pi, para in enumerate(article['paragraphs']):
# wordss
context = para['context']
context = context.replace("''", '" ')
context = context.replace("``", '" ') #Sentences of priginal Paragraph
contexts.append(context)
xi = list(map(word_tokenize, sent_tokenize(context)))
xi = [process_tokens(tokens) for tokens in xi] # process tokens
# given xi, add chars
cxi = [[list(xijk) for xijk in xij] for xij in xi]
xp.append(xi)
cxp.append(cxi)
pp.append(context)
for xij in xi:
for xijk in xij:
word_counter[xijk] += len(para['qas'])
lower_word_counter[xijk.lower()] += len(para['qas'])
for xijkl in xijk:
char_counter[xijkl] += len(para['qas'])
rxi = [ai, pi]
assert len(x) - 1 == ai
assert len(x[ai]) - 1 == pi
word_counter, char_counter, lower_word_counter = Counter(), Counter(), Counter()
start_ai = int(round(len(source_data['data']) * start_ratio))
stop_ai = int(round(len(source_data['data']) * stop_ratio))
for ai, article in enumerate(tqdm(source_data['data'][start_ai:stop_ai])):
xp, cxp = [], []
pp = []
x.append(xp)
cx.append(cxp)
p.append(pp)
for pi, para in enumerate(article['paragraphs']):
# wordss
context = para['context']
context = context.replace("''", '" ')
context = context.replace("``", '" ')
xi = list(map(word_tokenize, sent_tokenize(context)))
xi = [process_tokens(tokens) for tokens in xi] # process tokens
# given xi, add chars
cxi = [[list(xijk) for xijk in xij] for xij in xi]
xp.append(xi)
cxp.append(cxi)
pp.append(context)
for xij in xi:
for xijk in xij:
word_counter[xijk] += len(para['qas'])
lower_word_counter[xijk.lower()] += len(para['qas'])
for xijkl in xijk:
char_counter[xijkl] += len(para['qas'])
rxi = [ai, pi]
assert len(x) - 1 == ai
assert len(x[ai]) - 1 == pi
word_counter, char_counter, lower_word_counter = Counter(), Counter(), Counter()
start_ai = int(round(len(source_data['data']) * start_ratio))
stop_ai = int(round(len(source_data['data']) * stop_ratio))
for ai, article in enumerate(tqdm(source_data['data'][start_ai:stop_ai])):
xp, cxp = [], []
pp = []
x.append(xp)
cx.append(cxp)
p.append(pp)
for pi, para in enumerate(article['paragraphs']):
# wordss
context = para['context']
context = context.replace("''", '" ')
context = context.replace("``", '" ')
xi = list(map(word_tokenize, sent_tokenize(context)))
xi = [process_tokens(tokens) for tokens in xi] # process tokens
# given xi, add chars
cxi = [[list(xijk) for xijk in xij] for xij in xi]
xp.append(xi)
cxp.append(cxi)
pp.append(context)
for xij in xi:
for xijk in xij:
word_counter[xijk] += len(para['qas'])
lower_word_counter[xijk.lower()] += len(para['qas'])
for xijkl in xijk:
char_counter[xijkl] += len(para['qas'])
rxi = [ai, pi]
assert len(x) - 1 == ai
assert len(x[ai]) - 1 == pi