Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_task_variables():
return get_asset(TCGA.folder, 'task_variables.json', dtype='json')
def __init__(self, root, meta_train=False, meta_val=False, meta_test=False, meta_split=None,
min_samples_per_class=5, transform=None, target_transform=None,
dataset_transform=None, download=False, chunksize=100, preload=True):
super(TCGA, self).__init__(meta_train, meta_val, meta_test, meta_split,
target_transform=target_transform, dataset_transform=dataset_transform)
self.root = os.path.join(os.path.expanduser(root), self.folder)
self.min_samples_per_class = min_samples_per_class
self.transform = transform
self._all_sample_ids = None
self._gene_ids = None
self._tasks = None
if download:
self.download(chunksize)
self.preloaded = False
self.gene_expression_data = None
self.gene_expression_file = None
if preload:
def from_id(cls, root, task_id, transform=None, target_transform=None):
import pandas as pd
root = os.path.join(os.path.expanduser(root), TCGA.folder)
gene_filepath = os.path.join(root, TCGA.gene_expression_filename)
if not os.path.isfile(gene_filepath):
raise IOError()
label, cancer = task_id
processed_folder = os.path.join(root, 'clinicalMatrices', 'processed')
filename = '{0}.tsv'.format(TCGA.clinical_matrix_filename.format(cancer))
filepath = os.path.join(processed_folder, filename)
if not os.path.isfile(filepath):
raise IOError()
dataframe = pd.read_csv(filepath, sep='\t', index_col=0, header=0)
labels = dataframe[label].dropna().astype('category')
with h5py.File(gene_filepath, 'r') as f:
data = f['expression_data'][labels.index]
return cls(task_id, data, labels.cat.codes.tolist(),
labels.cat.categories.tolist(), transform=transform,
target_transform=target_transform)
def from_id(cls, root, task_id, transform=None, target_transform=None):
import pandas as pd
root = os.path.join(os.path.expanduser(root), TCGA.folder)
gene_filepath = os.path.join(root, TCGA.gene_expression_filename)
if not os.path.isfile(gene_filepath):
raise IOError()
label, cancer = task_id
processed_folder = os.path.join(root, 'clinicalMatrices', 'processed')
filename = '{0}.tsv'.format(TCGA.clinical_matrix_filename.format(cancer))
filepath = os.path.join(processed_folder, filename)
if not os.path.isfile(filepath):
raise IOError()
dataframe = pd.read_csv(filepath, sep='\t', index_col=0, header=0)
labels = dataframe[label].dropna().astype('category')
with h5py.File(gene_filepath, 'r') as f:
data = f['expression_data'][labels.index]