Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Classify the new labels, train and store the results.
with open_state(state_file) as state:
reviewer.classify(query_idx, inclusions, state, method=label_method)
reviewer.train()
reviewer.log_probabilities(state)
new_query_idx = reviewer.query(reviewer.n_pool()).tolist()
reviewer.log_current_query(state)
proba = state.pred_proba.tolist()
with SQLiteLock(lock_file, blocking=True, lock_name="active") as lock:
current_pool = read_pool(project_id)
in_current_pool = np.zeros(len(as_data))
in_current_pool[current_pool] = 1
new_pool = [x for x in new_query_idx
if in_current_pool[x]]
write_pool(project_id, new_pool)
write_proba(project_id, proba)
# add path to dict (overwrite if already exists)
project_dict["dataset_path"] = file_name
with open(project_file_path, "w") as f_write:
json.dump(project_dict, f_write)
# fill the pool of the first iteration
as_data = read_data(project_id)
if as_data.labels is not None:
unlabeled = np.where(as_data.labels == LABEL_NA)[0]
pool_indices = as_data.record_ids[unlabeled]
else:
pool_indices = as_data.record_ids
np.random.shuffle(pool_indices)
write_pool(project_id, pool_indices.tolist())
# make a empty qeue for the items to label
write_label_history(project_id, [])
labeled_list = read_label_history(project_id)
labeled_list_new = []
for item_id, item_label in labeled_list:
item_id = int(item_id)
item_label = int(item_label)
if paper_i == item_id:
pool_list.append(item_id)
else:
labeled_list_new.append([item_id, item_label])
# write the papers to the label dataset
write_pool(project_id, pool_list)
# load the papers from the pool
write_label_history(project_id, labeled_list_new)
def move_label_from_pool_to_labeled(project_id, paper_i, label):
print(f"Move {paper_i} from pool to labeled")
# load the papers from the pool
pool_idx = read_pool(project_id)
# Remove the paper from the pool.
try:
pool_idx.remove(int(paper_i))
except (IndexError, ValueError):
print(f"Failed to remove {paper_i} from the pool.")
return
write_pool(project_id, pool_idx)
# Add the paper to the reviewed papers.
labeled = read_label_history(project_id)
labeled.append([int(paper_i), int(label)])
write_label_history(project_id, labeled)