Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_no_seed():
n_test_max = 100
as_data = ASReviewData.from_file(data_fp)
n_priored = np.zeros(len(as_data), dtype=int)
for _ in range(n_test_max):
reviewer = get_reviewer(
data_fp, mode="simulate", model="nb", state_file=None,
init_seed=None, n_prior_excluded=1, n_prior_included=1)
assert len(reviewer.start_idx) == 2
n_priored[reviewer.start_idx] += 1
if np.all(n_priored > 0):
return
raise ValueError(f"Error getting all priors in {n_test_max} iterations.")
if isinstance(dataset, (str, PurePath)):
dataset = [dataset]
if isinstance(included_dataset, (str, PurePath)):
included_dataset = [included_dataset]
if isinstance(excluded_dataset, (str, PurePath)):
excluded_dataset = [excluded_dataset]
if isinstance(prior_dataset, (str, PurePath)):
prior_dataset = [prior_dataset]
as_data = ASReviewData()
# Find the URL of the datasets if the dataset is an example dataset.
for data in dataset:
as_data.append(ASReviewData.from_file(find_data(data)))
if new:
as_data.labels = np.full((len(as_data),), LABEL_NA, dtype=int)
for data in included_dataset:
as_data.append(ASReviewData.from_file(
find_data(data), data_type="included"))
for data in excluded_dataset:
as_data.append(ASReviewData.from_file(
find_data(data), data_type="excluded"))
for data in prior_dataset:
as_data.append(ASReviewData.from_file(
find_data(data), data_type="prior"))
return as_data
def read_data(project_id):
"""Get ASReviewData object of the dataset"""
dataset = get_data_file_path(project_id)
return ASReviewData.from_file(dataset)
def create_as_data(dataset, included_dataset=[], excluded_dataset=[],
prior_dataset=[], new=False):
"""Create ASReviewData object from multiple datasets."""
if isinstance(dataset, (str, PurePath)):
dataset = [dataset]
if isinstance(included_dataset, (str, PurePath)):
included_dataset = [included_dataset]
if isinstance(excluded_dataset, (str, PurePath)):
excluded_dataset = [excluded_dataset]
if isinstance(prior_dataset, (str, PurePath)):
prior_dataset = [prior_dataset]
as_data = ASReviewData()
# Find the URL of the datasets if the dataset is an example dataset.
for data in dataset:
as_data.append(ASReviewData.from_file(find_data(data)))
if new:
as_data.labels = np.full((len(as_data),), LABEL_NA, dtype=int)
for data in included_dataset:
as_data.append(ASReviewData.from_file(
find_data(data), data_type="included"))
for data in excluded_dataset:
as_data.append(ASReviewData.from_file(
find_data(data), data_type="excluded"))
for data in prior_dataset:
as_data.append(ASReviewData.from_file(
find_data(data), data_type="prior"))
return as_data
Useful if some parts should be kept/thrown away.
Arguments
---------
idx: list, np.ndarray
Record ids that should be kept.
Returns
-------
ASReviewData:
Slice of itself.
"""
if self.df is None:
raise ValueError("Cannot slice empty ASReviewData object.")
return ASReviewData(self.df[idx], data_name="sliced")