Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def remove_dataset_to_project(project_id, file_name):
"""Remove dataset from project
"""
project_file_path = get_project_file_path(project_id)
fp_lock = get_lock_path(project_id)
with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
# open the projects file
with open(project_file_path, "r") as f_read:
project_dict = json.load(f_read)
# remove the path from the project file
data_fn = project_dict["dataset_path"]
del project_dict["dataset_path"]
with open(project_file_path, "w") as f_write:
json.dump(project_dict, f_write)
# files to remove
data_path = get_data_file_path(project_id, data_fn)
def export_to_string(project_id, export_type="csv"):
fp_lock = get_lock_path(project_id)
as_data = read_data(project_id)
with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
proba = read_proba(project_id)
if proba is None:
proba = np.flip(np.arange(len(as_data)))
else:
proba = np.array(proba)
labels = read_current_labels(project_id, as_data=as_data)
pool_idx = np.where(labels == LABEL_NA)[0]
one_idx = np.where(labels == 1)[0]
zero_idx = np.where(labels == 0)[0]
proba_order = np.argsort(-proba[pool_idx])
ranking = np.concatenate(
(one_idx, pool_idx[proba_order], zero_idx), axis=None)
def get_instance(project_id):
"""Get a new instance to review.
Arguments
---------
project_id: str
The id of the current project.
"""
fp_lock = get_lock_path(project_id)
with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
pool_idx = read_pool(project_id)
if len(pool_idx) > 0:
logging.info(f"Requesting {pool_idx[0]} from project {project_id}")
return pool_idx[0]
else:
# end of pool
logging.info(f"No more records for project {project_id}")
return None
def add_dataset_to_project(project_id, file_name):
"""Add file path to the project file.
Add file to data subfolder and fill the pool of iteration 0.
"""
project_file_path = get_project_file_path(project_id)
fp_lock = get_lock_path(project_id)
with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
# open the projects file
with open(project_file_path, "r") as f_read:
project_dict = json.load(f_read)
# add path to dict (overwrite if already exists)
project_dict["dataset_path"] = file_name
with open(project_file_path, "w") as f_write:
json.dump(project_dict, f_write)
# fill the pool of the first iteration
as_data = read_data(project_id)
if as_data.labels is not None:
unlabeled = np.where(as_data.labels == LABEL_NA)[0]
def api_get_prior(project_id): # noqa: F401
"""Get all papers classified as prior documents
"""
lock_fp = get_lock_path(project_id)
with SQLiteLock(lock_fp, blocking=True, lock_name="active"):
label_history = read_label_history(project_id)
indices = [x[0] for x in label_history]
records = read_data(project_id).record(indices)
payload = {"result": []}
for i, record in enumerate(records):
payload["result"].append({
"id": int(record.record_id),
"title": record.title,
"abstract": record.abstract,
"authors": record.authors,
"keywords": record.keywords,
def get_statistics(project_id):
fp_lock = get_lock_path(project_id)
with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
# get the index of the active iteration
label_history = read_label_history(project_id)
current_labels = read_current_labels(
project_id, label_history=label_history)
n_since_last_inclusion = 0
for _, inclusion in reversed(label_history):
if inclusion == 1:
break
n_since_last_inclusion += 1
n_included = len(np.where(current_labels == 1)[0])
n_excluded = len(np.where(current_labels == 0)[0])
n_papers = len(current_labels)
def label_instance(project_id, paper_i, label, retrain_model=True):
"""Label a paper after reviewing the abstract.
"""
paper_i = int(paper_i)
label = int(label)
fp_lock = get_lock_path(project_id)
with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
# get the index of the active iteration
if int(label) in [0, 1]:
move_label_from_pool_to_labeled(
project_id, paper_i, label
)
else:
move_label_from_labeled_to_pool(
project_id, paper_i, label
)
if retrain_model:
# Update the model (if it isn't busy).
def api_random_prior_papers(project_id): # noqa: F401
"""Get a selection of random papers to find exclusions.
This set of papers is extracted from the pool, but without
the already labeled items.
"""
lock_fp = get_lock_path(project_id)
with SQLiteLock(lock_fp, blocking=True, lock_name="active"):
pool = read_pool(project_id)
# with open(get_labeled_path(project_id, 0), "r") as f_label:
# prior_labeled = json.load(f_label)
# excluded the already labeled items from our random selection.
# prior_labeled_index = [int(label) for label in prior_labeled.keys()]
# pool = [i for i in pool if i not in prior_labeled_index]
# sample from the pool (this is already done atm of initializing
# the pool. But doing it again because a double shuffle is always
# best)
try:
pool_random = np.random.choice(pool, 5, replace=False)