Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __next__(self):
ss = next(self.exonGenerator)
out = {}
out['inputs'] = {}
seq = ss.get_seq(self.fasta).upper()
if self.split_seq:
seq = self.split(seq, ss.overhang)['exon'][0]
out['inputs']['input_3'] = seq
out['metadata'] = {}
out['metadata']['ranges'] = GenomicRanges(
ss.chrom,
ss.Exon_Start,
ss.Exon_End,
ss.transcript_id,
ss.strand)
return out
# Dnase
dnase = np.squeeze(self.dnase_extractor([interval], axis=0))[:, np.newaxis]
dnase[np.isnan(dnase)] = 0 # NA fill
dnase_rc = dnase[::-1]
bigwig_list = [seq]
bigwig_rc_list = [seq_rc]
mappability = np.squeeze(self.mappability_extractor([interval], axis=0))[:, np.newaxis]
mappability[np.isnan(mappability)] = 0 # NA fill
mappability_rc = mappability[::-1]
bigwig_list.append(mappability)
bigwig_rc_list.append(mappability_rc)
bigwig_list.append(dnase)
bigwig_rc_list.append(dnase_rc)
ranges = GenomicRanges.from_interval(interval)
ranges_rc = GenomicRanges.from_interval(interval)
ranges_rc.strand = "-"
return {
"inputs": [
np.concatenate(bigwig_list, axis=-1), # stack along the last axis
np.concatenate(bigwig_rc_list, axis=-1), # RC version
np.append(self.meta_feat, gencode_counts)
],
"targets": {}, # No Targets
"metadata": {
"ranges": ranges,
"ranges_rc": ranges_rc
}
raise ValueError("Expected the interval to be {0} wide. Recieved stop - start = {1}".
format(self.SEQ_WIDTH, interval.stop - interval.start))
if self.targets is not None:
y = self.targets.iloc[idx].values
else:
y = {}
# Run the fasta extractor
seq = np.squeeze(self.fasta_extractor([interval]), axis=0)
seq = np.expand_dims(np.swapaxes(seq, 1, 0), axis=1)
return {
"inputs": seq,
"targets": y,
"metadata": {
"ranges": GenomicRanges.from_interval(interval)
}
# automatically resize the sequence to cerat
interval = resize_interval(
interval, self.auto_resize_len, anchor='center')
# QUESTION: @kromme - why to we need max_seq_len?
# if self.max_seq_len is not None:
# assert interval.stop - interval.start <= self.max_seq_len
# Run the fasta extractor and transform if necessary
seq = self.fasta_extractors.extract(interval)
return {
"inputs": np.array(seq),
"targets": labels,
"metadata": {
"ranges": GenomicRanges(interval.chrom, interval.start, interval.stop, str(idx))
}
def __getitem__(self, idx):
if self.fasta_extractor is None:
self.fasta_extractor = FastaExtractor(self.fasta_file)
interval = self.bt[idx]
if interval.stop - interval.start != self.SEQ_WIDTH:
raise ValueError("Expected the interval to be {0} wide. Recieved stop - start = {1}".
format(self.SEQ_WIDTH, interval.stop - interval.start))
# Run the fasta extractor
seq = np.squeeze(self.fasta_extractor([interval]), axis=0)
return {
"inputs": {"dna": seq},
"metadata": {
"ranges": GenomicRanges.from_interval(interval)
}
format(self.SEQ_WIDTH, interval.stop - interval.start))
if self.targets is not None:
y = self.targets.iloc[idx].values
else:
y = {}
# Run the fasta extractor
seq = np.squeeze(self.fasta_extractor([interval]), axis=0)
# Reformat so that it matches the Basset shape
seq = np.swapaxes(seq, 1, 0)[:, :, None]
return {
"inputs": seq,
"targets": y,
"metadata": {
"ranges": GenomicRanges.from_interval(interval)
}
self.fasta_extractor = FastaExtractor(self.fasta_file)
interval = self.bt[idx]
if self.targets is not None:
y = self.targets.iloc[idx].values
else:
y = {}
# Run the fasta extractor
seq = np.squeeze(self.fasta_extractor([interval]), axis=0)
return {
"inputs": seq,
"targets": y,
"metadata": {
"ranges": GenomicRanges.from_interval(interval)
}
dnase = np.squeeze(self.dnase_extractor([interval], axis=0))[:, np.newaxis]
dnase[np.isnan(dnase)] = 0 # NA fill
dnase_rc = dnase[::-1]
bigwig_list = [seq]
bigwig_rc_list = [seq_rc]
mappability = np.squeeze(self.mappability_extractor([interval], axis=0))[:, np.newaxis]
mappability[np.isnan(mappability)] = 0 # NA fill
mappability_rc = mappability[::-1]
bigwig_list.append(mappability)
bigwig_rc_list.append(mappability_rc)
bigwig_list.append(dnase)
bigwig_rc_list.append(dnase_rc)
ranges = GenomicRanges.from_interval(interval)
ranges_rc = GenomicRanges.from_interval(interval)
ranges_rc.strand = "-"
return {
"inputs": [
np.concatenate(bigwig_list, axis=-1), # stack along the last axis
np.concatenate(bigwig_rc_list, axis=-1), # RC version
np.append(self.meta_feat, gencode_counts)
],
"targets": {}, # No Targets
"metadata": {
"ranges": ranges,
"ranges_rc": ranges_rc
}
# Run the fasta extractor
seq = np.squeeze(self.fasta_extractor([interval]), axis=0)
seq_rc = seq[::-1, ::-1]
# Dnase
dnase = np.squeeze(self.dnase_extractor([interval], axis=0))[:, np.newaxis]
dnase[np.isnan(dnase)] = 0 # NA fill
dnase_rc = dnase[::-1]
bigwig_list = [seq]
bigwig_rc_list = [seq_rc]
bigwig_list.append(dnase)
bigwig_rc_list.append(dnase_rc)
ranges = GenomicRanges.from_interval(interval)
ranges_rc = GenomicRanges.from_interval(interval)
ranges_rc.strand = "-"
return {
"inputs": [
np.concatenate(bigwig_list, axis=-1), # stack along the last axis
np.concatenate(bigwig_rc_list, axis=-1), # RC version
],
"targets": {}, # No Targets
"metadata": {
"ranges": ranges,
"ranges_rc": ranges_rc
}
if interval.name is not None:
y = np.array([float(interval.name)])
else:
y = {}
# Run the fasta extractor
seq = np.squeeze(self.fasta_extractor([interval]))
# Reformat so that it matches the Basset shape
# seq = np.swapaxes(seq, 1, 0)[:,:,None]
return {
"inputs": {"data/genome_data_dir": seq},
"targets": y,
"metadata": {
"ranges": GenomicRanges.from_interval(interval)
}