Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
template = env.get_template(args.template_name)
data_path = os.path.join(data_dir, "data_{}.json".format(data_type))
shared_path = os.path.join(data_dir, "shared_{}.json".format(data_type))
print("loading {}".format(data_path))
data = json.load(open(data_path, 'r'))
print("loading {}".format(shared_path))
shared = json.load(open(shared_path, 'r'))
rows = []
for i, (idx, yi, ypi, yp2i) in tqdm(enumerate(zip(*[eval_[key] for key in ('idxs', 'y', 'yp', 'yp2')])), total=len(eval_['idxs'])):
id_, q, rx, answers = (data[key][idx] for key in ('ids', 'q', '*x', 'answerss'))
x = shared['x'][rx[0]][rx[1]]
ques = [" ".join(q)]
para = [[word for word in sent] for sent in x]
span = get_best_span(ypi, yp2i)
ap = get_segment(para, span)
score = "{:.3f}".format(ypi[span[0][0]][span[0][1]] * yp2i[span[1][0]][span[1][1]-1])
row = {
'id': id_,
'title': "Hello world!",
'ques': ques,
'para': para,
'y': yi[0][0],
'y2': yi[0][1],
'yp': ypi,
'yp2': yp2i,
'a': answers,
'ap': ap,
'score': score
}
def ensemble1(context, wordss, y1_list, y2_list):
"""
:param context: Original context
:param wordss: tokenized words (nested 2D list)
:param y1_list: list of start index probs (each element corresponds to probs form single model)
:param y2_list: list of stop index probs
:return:
"""
sum_y1 = combine_y_list(y1_list)
sum_y2 = combine_y_list(y2_list)
span, score = get_best_span(sum_y1, sum_y2)
return get_phrase(context, wordss, span)
new_yi.append((new_start, new_stop))
new_y.append(new_yi)
y = new_y
if self.config.single:
new_y = []
for yi in y:
new_yi = []
for start, stop in yi:
new_start = 0, start[1]
new_stop = 0, stop[1]
new_yi.append((new_start, new_stop))
new_y.append(new_yi)
y = new_y
yp, yp2, p = yp[:data_set.num_examples], yp2[:data_set.num_examples], p[:data_set.num_examples]
spans, scores = zip(*[get_best_span(ypi, yp2i) for ypi, yp2i in zip(yp, yp2)])
def _get(xi, span):
if len(xi) <= span[0][0]:
return [""]
if len(xi[span[0][0]]) <= span[1][1]:
return [""]
return xi[span[0][0]][span[0][1]:span[1][1]]
def _get2(context, xi, span):
if len(xi) <= span[0][0]:
return ""
if len(xi[span[0][0]]) <= span[1][1]:
return ""
return get_phrase(context, xi, span)
id2answer_dict = {id_: _get2(context, xi, span)
for id_, xi, span, context in zip(data_set.data['ids'], data_set.data['x'], spans, data_set.data['p'])}
def ensemble3(context, wordss, y1_list, y2_list):
d = defaultdict(float)
for y1, y2 in zip(y1_list, y2_list):
span, score = get_best_span(y1, y2)
phrase = get_phrase(context, wordss, span)
d[phrase] += score
return max(d.items(), key=lambda pair: pair[1])[0]
def ensemble3(context, wordss, y1_list, y2_list):
d = defaultdict(float)
for y1, y2 in zip(y1_list, y2_list):
span, score = get_best_span(y1, y2)
phrase = get_phrase(context, wordss, span)
d[phrase] += score
return max(d.items(), key=lambda pair: pair[1])[0]
template = env.get_template(args.template_name)
data_path = os.path.join(data_dir, "data_{}.json".format(data_type))
shared_path = os.path.join(data_dir, "shared_{}.json".format(data_type))
print("loading {}".format(data_path))
data = json.load(open(data_path, 'r'))
print("loading {}".format(shared_path))
shared = json.load(open(shared_path, 'r'))
rows = []
for i, (idx, yi, ypi, yp2i) in tqdm(enumerate(zip(*[eval_[key] for key in ('idxs', 'y', 'yp', 'yp2')])), total=len(eval_['idxs'])):
id_, q, rx, answers = (data[key][idx] for key in ('ids', 'q', '*x', 'answerss'))
x = shared['x'][rx[0]][rx[1]]
ques = [" ".join(q)]
para = [[word for word in sent] for sent in x]
span = get_best_span(ypi, yp2i)
ap = get_segment(para, span)
score = "{:.3f}".format(ypi[span[0][0]][span[0][1]] * yp2i[span[1][0]][span[1][1]-1])
row = {
'id': id_,
'title': "Hello world!",
'ques': ques,
'para': para,
'y': yi[0][0],
'y2': yi[0][1],
'yp': ypi,
'yp2': yp2i,
'a': answers,
'ap': ap,
'score': score
}
def ensemble2(context, wordss, y1_list, y2_list):
start_dict = defaultdict(float)
stop_dict = defaultdict(float)
for y1, y2 in zip(y1_list, y2_list):
span, score = get_best_span(y1, y2)
start_dict[span[0]] += y1[span[0][0]][span[0][1]]
stop_dict[span[1]] += y2[span[1][0]][span[1][1]]
start = max(start_dict.items(), key=lambda pair: pair[1])[0]
stop = max(stop_dict.items(), key=lambda pair: pair[1])[0]
best_span = (start, stop)
return get_phrase(context, wordss, best_span)