Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Look for this term in aliases within any dataset. A group can
be specified by setting dataset_name to 'group_id:dataset_id'.
This can be helpful if the dataset_id is not unique.
The dataset_name can also be a non-string iterable, in which case
a list will be returned with all terms.
Dataset_ids should not contain semicolons (:).
Return None if the dataset could not be found.
Returns
-------
BaseDataSet, VersionedDataSet:
If the dataset with that name is found, return it
(or a list there of).
"""
# If dataset_name is a non-string iterable, return a list.
if is_iterable(dataset_name):
return [self.find(x) for x in dataset_name]
# If dataset_name is a valid path, create a dataset from it.
if Path(dataset_name).is_file():
return BaseDataSet(dataset_name)
dataset_name = str(dataset_name)
# Split into group/dataset if possible.
split_dataset_id = dataset_name.split(":")
if len(split_dataset_id) == 2:
data_group = split_dataset_id[0]
split_dataset_name = split_dataset_id[1]
if data_group in self.all_datasets:
return self.all_datasets[data_group].find(split_dataset_name)
def get_dataset_metadata(exclude=None, include=None):
all_datasets = DatasetManager().list(latest_only=False)
if exclude is not None:
if not is_iterable(exclude):
exclude = [exclude]
for group_id in exclude:
all_datasets.pop(group_id, None)
if include is not None:
if not is_iterable(include):
include = [include]
for group_id in list(all_datasets):
if group_id not in include:
all_datasets.pop(group_id, None)
result_datasets = []
for group_id, data_list in all_datasets.items():
for dataset in data_list:
if isinstance(dataset, BaseVersionedDataSet):
cur_data = []
for vdata in dataset.datasets:
vdata.dataset_id = f"{group_id}:{vdata.dataset_id}"
cur_data.append(vdata.to_dict())
result_datasets.append(cur_data)
else:
dataset.dataset_id = f"{group_id}:{dataset.dataset_id}"
if not is_iterable(i):
index_list = [i]
else:
index_list = i
if not by_index:
records = [PaperRecord(**self.df.loc[j, :], record_id=j,
column_spec=self.column_spec)
for j in index_list]
else:
records = [PaperRecord(**self.df.iloc[j],
column_spec=self.column_spec,
record_id=self.df.index.values[j])
for j in index_list]
if is_iterable(i):
return records
return records[0]
----------
group_name: str, iterable
List only datasets in the group(s) with that name. Lists all
groups if group_name is None.
latest_only: bool
Only include the latest version of the dataset.
Returns
-------
dict:
Dictionary with group names as keys and lists of datasets as
values.
"""
if group_name is None:
group_names = list(self.all_datasets)
elif not is_iterable(group_name):
group_names = [group_name]
else:
group_names = group_name
dataset_list = {gn: self.all_datasets[gn].list(latest_only=latest_only)
for gn in group_names}
return dataset_list
Arguments
---------
i: int, iterable
Index of the record, or list of indices.
by_index: bool
If True, take the i-th value as used internally by the review.
If False, take the record with record_id==i.
Returns
-------
PaperRecord:
The corresponding record if i was an integer, or a list of records
if i was an iterable.
"""
if not is_iterable(i):
index_list = [i]
else:
index_list = i
if not by_index:
records = [PaperRecord(**self.df.loc[j, :], record_id=j,
column_spec=self.column_spec)
for j in index_list]
else:
records = [PaperRecord(**self.df.iloc[j],
column_spec=self.column_spec,
record_id=self.df.index.values[j])
for j in index_list]
if is_iterable(i):
return records
def get_dataset_metadata(exclude=None, include=None):
all_datasets = DatasetManager().list(latest_only=False)
if exclude is not None:
if not is_iterable(exclude):
exclude = [exclude]
for group_id in exclude:
all_datasets.pop(group_id, None)
if include is not None:
if not is_iterable(include):
include = [include]
for group_id in list(all_datasets):
if group_id not in include:
all_datasets.pop(group_id, None)
result_datasets = []
for group_id, data_list in all_datasets.items():
for dataset in data_list:
if isinstance(dataset, BaseVersionedDataSet):
cur_data = []