Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
step_count = 15
obs_list = MagicMock()
render_img_list = MagicMock()
os.makedirs(os.path.join(tmpdir, 'images'))
if test_case == 'Unsupported':
with pytest.raises(Exception):
rollout(agent, gymlike_env, rollout_dir, step_count, obs_list, render_img_list)
agent.stop_episode.assert_called_once()
return
rollout(agent, gymlike_env, rollout_dir, step_count, obs_list, render_img_list)
agent.stop_episode.assert_called_once()
with jsonlines.open('{}/{}'.format(tmpdir, ROLLOUT_LOG_FILE_NAME)) as reader:
lines_num = 0
for log_line in reader.iter(type=dict):
lines_num += 1
assert lines_num == 15
# Common log entries for each test case
assert 'step' in log_line
assert 'reward' in log_line
assert 'image_path' in log_line
assert 'action' in log_line
if test_case == 'A3C':
assert 'state_value' in log_line
assert len(log_line['action_probs']) == 4
elif test_case == 'PPO':
assert 'state_value' in log_line
def predict_test(predictions_test, entailment_predictions_test, new_predictions_file):
clf = joblib.load('label_classifier.pkl')
i = 1
previous_predictions = jsonlines.open(predictions_test)
with jsonlines.open(new_predictions_file, mode='w') as writer:
for pred in previous_predictions:
new_pred = {'id': pred['id'], 'predicted_evidence': []}
entailment_results_file = entailment_predictions_test + "/claim_" + str(i) + ".json"
entailment_results_file = codecs.open(entailment_results_file, "r", "utf-8").readlines()
support_evidence = []
refute_evidence = []
nei_evidence = []
support_count = 0
refute_count = 0
nei_count = 0
support_confidence = 0
refute_confidence = 0
nei_confidence = 0
support_scores = []
refute_scores = []
nei_scores = []
train_predictions_file = "predictions/predictions_train.jsonl"
else: # type_file == 'dev':
train_file = "data/dev.jsonl"
train_relevant_file = "data/dev_relevant_docs.jsonl"
train_concatenate_file = "data/dev_sentence_selection.jsonl"
train_predictions_file = "predictions/new_dev_bert_test.jsonl"
else:
print("Needs to have one argument. Choose:")
print("train")
print("dev")
print("test")
exit(0)
train_file = jsonlines.open(train_file)
train_relevant_file = jsonlines.open(train_relevant_file)
train_concatenate_file = jsonlines.open(train_concatenate_file)
train_predictions_file = jsonlines.open(train_predictions_file)
train_set = []
train_relevant = []
train_concatenate = []
train_prediction = []
for lines in train_file:
lines['claim'] = lines['claim'].replace("-LRB-", " ( ")
lines['claim'] = lines['claim'].replace("-RRB-", " ) ")
train_set.append(lines)
for lines in train_relevant_file:
lines['claim'] = lines['claim'].replace("-LRB-", " ( ")
lines['claim'] = lines['claim'].replace("-RRB-", " ) ")
train_relevant.append(lines)
print("preprocessing finished")
if local_rank in [-1, 0]:
feature_cache_dir = "./cached_qa_features"
CACHED_EXAMPLES_TEST_FILE = "cached_examples_test.jsonl"
CACHED_FEATURES_TEST_FILE = "cached_features_test.jsonl"
examples_file = os.path.join(feature_cache_dir, CACHED_EXAMPLES_TEST_FILE)
features_file = os.path.join(feature_cache_dir, CACHED_FEATURES_TEST_FILE)
if os.path.isdir(feature_cache_dir):
shutil.rmtree(feature_cache_dir, ignore_errors=True)
os.mkdir(feature_cache_dir)
with jsonlines.open(examples_file, "w") as examples_writer, jsonlines.open(
features_file, "w"
) as features_writer:
examples_writer.write_all(qa_examples_json)
features_writer.write_all(features_json)
print("features cahed")
with Timer() as t:
qa_extractor.fit(
train_dataset=train_features,
num_epochs=NUM_EPOCHS,
learning_rate=LEARNING_RATE,
per_gpu_batch_size=PER_GPU_BATCH_SIZE,
gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
seed=RANDOM_SEED,
text = remove_stopwords(doc)
return text
# TODO:Remove all STOP-WORDS and Lemmatize every token!!!!!
# full text and processed in ['text'] tag
wiki_folder = "data/wiki-pages-split"
files = os.listdir(wiki_folder)
shuffle(files)
counter = 0
train_text = []
tokens = []
for file in files:
file_content = jsonlines.open(wiki_folder + "/" + file)
doc = file_content.read()['text']
text = pre_process(doc)
if counter > max_counter:
# adding required docs by fever with the claim given
file_content = jsonlines.open(wiki_folder + "/" + "Telemundo.json")
doc = file_content.read()['text']
text = pre_process(doc)
tokens = gensim.utils.simple_preprocess(text)
print(tokens)
train_text.append(gensim.models.doc2vec.TaggedDocument(tokens, ["Telemundo.json"]))
file_content = jsonlines.open(wiki_folder + "/" + "Hispanic_and_Latino_Americans.json")
doc = file_content.read()['text']
text = pre_process(doc)
tokens = gensim.utils.simple_preprocess(text)
def __init__(self, *args):
super().__init__(*args)
self.hypothesis = []
self.reason = []
self.label = []
with jsonlines.open(os.path.join(self.data_dir, 'train.jsonl')) as jsonl_reader:
for instance in jsonl_reader:
if instance['label'] is 'n':
continue
self.hypothesis.append(instance['hypothesis'])
self.reason.append(instance['reason'])
if instance['label'] is 'e':
self.label.append(0)
else:
self.label.append(1)
dataset = pd.DataFrame(
list(
zip(
self.hypothesis,
self.reason,
self.label,
"log": [],
"command": "",
"args": {},
"environ": {},
"agents": [],
"envs": [],
"action_meanings": [],
}
log_file_name = os.path.join(self.path, "log.jsonl")
command_file_name = os.path.join(self.path, "command.txt")
args_file_name = os.path.join(self.path, "args.jsonl")
environ_file_name = os.path.join(self.path, "environ.jsonl")
if os.path.isfile(log_file_name):
with jsonlines.open(log_file_name) as reader:
for obj in reader:
experiment["log"].append(obj)
if os.path.isfile(command_file_name):
with open(command_file_name) as f:
experiment["command"] = f.readline()
if os.path.isfile(args_file_name):
with jsonlines.open(args_file_name) as reader:
for obj in reader:
experiment["args"] = obj
if os.path.isfile(environ_file_name):
with jsonlines.open(environ_file_name) as reader:
for obj in reader:
experiment["environ"] = obj
def download_drawing_dataset():
try:
path = download_path / 'drawing_dataset'
with jsonlines.open(str(label_map_path), mode='r') as reader:
category_mapping = reader.read()
print('checking whether drawing files already exist...')
drawing_categories = ['face', 't-shirt', 'pants'] + category_mapping.values()
missing_files = [file for file in drawing_categories if not Path(path / Path(file).with_suffix('.bin')).exists()]
if missing_files:
print('{} drawing files missing, downloading the following files: '.format(len(missing_files)))
for f in missing_files:
print(f)
download_recurse(quickdraw_dataset_url, path, missing_files)
except IOError as e:
print('label_mapping.jsonl not found')
def export_defacto_models():
try:
job_args = []
print('searching .pkl files in: ', ROOT_PATH + DEFACTO_OUTPUT_FOLDER)
i=0
with jsonlines.open(TRAIN_FILE, mode='r') as reader:
for obj in reader:
if i > MAX_TRAINING_DATA:
break
i+=1
f = Path(ROOT_PATH + DEFACTO_OUTPUT_FOLDER + 'defacto_' + str(obj["id"]) + '.pkl')
if not f.exists() and obj["label"] != 'NOT ENOUGH INFO':
job_args.append((obj["id"], obj["claim"], obj["label"], obj["evidence"][0]))
print('export_defacto_models: job args created: ' + str(len(job_args)))
if len(job_args) > 0:
with Pool(processes=int(4)) as pool:
err_asyncres = pool.starmap(save_defacto_model, job_args)
print('done! tot errors:', np.count_nonzero(err_asyncres, 0))
print('done! tot OK:', len(err_asyncres) - np.count_nonzero(err_asyncres, 0))
else:
train_file = jsonlines.open(os.path.join(dir_path, "train.jsonl"))
train_control_file = jsonlines.open(os.path.join(dir_path, "control_train.jsonl"))
train_pairs = read_pairs(train_file)
train_control_pairs = read_pairs(train_control_file)
percents = [0.001, 0.003, 0.01]
for p in percents:
data = random.sample(train_pairs, int(5000.0 * (1 - p)))
data.extend(random.sample(train_control_pairs, int(5000 * p)))
output_file = open(os.path.join(dir_path, "train_%s.jsonl" % str(p)), "w")
w = jsonlines.Writer(output_file)
data = unzip_pairs(data)
w.write_all(data)
w.close()
test_file = jsonlines.open(os.path.join(dir_path, "test.jsonl"))
test_control_file = jsonlines.open(os.path.join(dir_path, "control_test.jsonl"))
test_data = [x for x in test_file]
test_data.extend([x for x in test_control_file])
output_file = open(os.path.join(dir_path, "test_combined.jsonl"), "w")
w = jsonlines.Writer(output_file)
w.write_all(test_data)
w.close()