Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def review(self, *args, instant_save=True, **kwargs):
with open_state(self.state_file) as state:
self.main_menu(state, *args, instant_save=instant_save, **kwargs)
if len(as_data) == 0:
raise ValueError("Supply at least one dataset"
" with at least one record.")
cli_settings = ASReviewSettings(
model=model, n_instances=n_instances, n_queries=n_queries,
n_papers=n_papers, n_prior_included=n_prior_included,
n_prior_excluded=n_prior_excluded, query_strategy=query_strategy,
balance_strategy=balance_strategy,
feature_extraction=feature_extraction,
mode=mode, data_fp=None,
abstract_only=abstract_only)
cli_settings.from_file(config_file)
if state_file is not None:
with open_state(state_file) as state:
if state.is_empty():
state.settings = cli_settings
settings = state.settings
else:
settings = cli_settings
if n_queries is not None:
settings.n_queries = n_queries
if n_papers is not None:
settings.n_papers = n_papers
if model_param is not None:
settings.model_param = model_param
if query_param is not None:
settings.query_param = query_param
if balance_param is not None:
# Get the all labels since last run. If no new labels, quit.
new_label_history = read_label_history(project_id)
data_fp = str(get_data_file_path(project_id))
as_data = read_data(project_id)
state_file = get_state_path(project_id)
# collect command line arguments and pass them to the reviewer
with open(asr_kwargs_file, "r") as fp:
asr_kwargs = json.load(fp)
asr_kwargs['state_file'] = str(state_file)
reviewer = get_reviewer(dataset=data_fp,
mode="minimal",
**asr_kwargs)
with open_state(state_file) as state:
old_label_history = get_label_train_history(state)
diff_history = get_diff_history(new_label_history, old_label_history)
if len(diff_history) == 0:
logging.info("No new labels since last run.")
return
query_idx = np.array([x[0] for x in diff_history], dtype=int)
inclusions = np.array([x[1] for x in diff_history], dtype=int)
# Classify the new labels, train and store the results.
with open_state(state_file) as state:
reviewer.classify(query_idx, inclusions, state, method=label_method)
reviewer.train()
reviewer.log_probabilities(state)
**asr_kwargs)
with open_state(state_file) as state:
old_label_history = get_label_train_history(state)
diff_history = get_diff_history(new_label_history, old_label_history)
if len(diff_history) == 0:
logging.info("No new labels since last run.")
return
query_idx = np.array([x[0] for x in diff_history], dtype=int)
inclusions = np.array([x[1] for x in diff_history], dtype=int)
# Classify the new labels, train and store the results.
with open_state(state_file) as state:
reviewer.classify(query_idx, inclusions, state, method=label_method)
reviewer.train()
reviewer.log_probabilities(state)
new_query_idx = reviewer.query(reviewer.n_pool()).tolist()
reviewer.log_current_query(state)
proba = state.pred_proba.tolist()
with SQLiteLock(lock_file, blocking=True, lock_name="active") as lock:
current_pool = read_pool(project_id)
in_current_pool = np.zeros(len(as_data))
in_current_pool[current_pool] = 1
new_pool = [x for x in new_query_idx
if in_current_pool[x]]
write_pool(project_id, new_pool)
write_proba(project_id, proba)
self.start_idx = start_idx
if log_file is not None:
warnings.warn("The log_file argument for BaseReview will be"
" replaced by state_file.", category=FutureWarning)
self.state_file = log_file
else:
self.state_file = state_file
self.query_i = 0
self.query_i_classified = 0
self.train_idx = np.array([], dtype=np.int)
self.model_trained = False
# Restore the state from a file or initialize said file.
with open_state(self.state_file) as state:
# From file
if not state.is_empty():
startup = state.startup_vals()
# If there are start indices not in the training add them.
if not set(startup["train_idx"]) >= set(start_idx):
new_idx = list(set(start_idx)-set(startup["train_idx"]))
self.classify(new_idx, self.y[new_idx], state,
method="initial")
startup = state.startup_vals()
self.train_idx = startup["train_idx"]
self.y = startup["labels"]
self.shared["query_src"] = startup["query_src"]
self.query_i = startup["query_i"]
self.query_i_classified = startup["query_i_classified"]
# From scratch
else:
def open_logger(*args, **kwargs):
warnings.warn("open_logger will be replaced by open_state.",
category=FutureWarning)
return open_state(*args, **kwargs)