Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Lock so that only one training run is running at the same time.
# It doesn't lock the flask server/client.
with SQLiteLock(lock_file, blocking=False, lock_name="training") as lock:
# If the lock is not acquired, another training instance is running.
if not lock.locked():
logging.info("Cannot acquire lock, other instance running.")
return
# Lock the current state. We want to have a consistent active state.
# This does communicate with the flask backend; it prevents writing and
# reading to the same files at the same time.
with SQLiteLock(lock_file, blocking=True, lock_name="active") as lock:
# Get the all labels since last run. If no new labels, quit.
new_label_history = read_label_history(project_id)
data_fp = str(get_data_file_path(project_id))
as_data = read_data(project_id)
state_file = get_state_path(project_id)
# collect command line arguments and pass them to the reviewer
with open(asr_kwargs_file, "r") as fp:
asr_kwargs = json.load(fp)
asr_kwargs['state_file'] = str(state_file)
reviewer = get_reviewer(dataset=data_fp,
mode="minimal",
**asr_kwargs)
with open_state(state_file) as state:
old_label_history = get_label_train_history(state)
def move_label_from_labeled_to_pool(project_id, paper_i, label):
print(f"Move {paper_i} from labeled to pool")
# load the papers from the pool
pool_list = read_pool(project_id)
# Add the paper to the reviewed papers.
labeled_list = read_label_history(project_id)
labeled_list_new = []
for item_id, item_label in labeled_list:
item_id = int(item_id)
item_label = int(item_label)
if paper_i == item_id:
pool_list.append(item_id)
else:
labeled_list_new.append([item_id, item_label])
# write the papers to the label dataset
write_pool(project_id, pool_list)
def get_statistics(project_id):
fp_lock = get_lock_path(project_id)
with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
# get the index of the active iteration
label_history = read_label_history(project_id)
current_labels = read_current_labels(
project_id, label_history=label_history)
n_since_last_inclusion = 0
for _, inclusion in reversed(label_history):
if inclusion == 1:
break
n_since_last_inclusion += 1
n_included = len(np.where(current_labels == 1)[0])
n_excluded = len(np.where(current_labels == 0)[0])
n_papers = len(current_labels)
stats = {
"n_included": n_included,
"n_excluded": n_excluded,
"n_since_last_inclusion": n_since_last_inclusion,
print(f"Move {paper_i} from pool to labeled")
# load the papers from the pool
pool_idx = read_pool(project_id)
# Remove the paper from the pool.
try:
pool_idx.remove(int(paper_i))
except (IndexError, ValueError):
print(f"Failed to remove {paper_i} from the pool.")
return
write_pool(project_id, pool_idx)
# Add the paper to the reviewed papers.
labeled = read_label_history(project_id)
labeled.append([int(paper_i), int(label)])
write_label_history(project_id, labeled)
def api_get_prior(project_id): # noqa: F401
"""Get all papers classified as prior documents
"""
lock_fp = get_lock_path(project_id)
with SQLiteLock(lock_fp, blocking=True, lock_name="active"):
label_history = read_label_history(project_id)
indices = [x[0] for x in label_history]
records = read_data(project_id).record(indices)
payload = {"result": []}
for i, record in enumerate(records):
payload["result"].append({
"id": int(record.record_id),
"title": record.title,
"abstract": record.abstract,
"authors": record.authors,
"keywords": record.keywords,
"included": int(label_history[i][1])
})