Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
else:
BT = BedTool
self.bt = BT(intervals_file)
# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
# DNase
self.dnase_file = dnase_file
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
md5="1d15ddafe2c8df51cf08495db96679e7")
mappability_file = os.path.join(common_dl_dir, "wgEncodeDukeMapabilityUniqueness35bp.bigWig")
if not os.path.exists(mappability_file) or not rf.validate(mappability_file):
# download the path
rf.get_file(mappability_file)
self.mappability_file = mappability_file
self.mappability_extractor = None
# Gencode features
if GENCODE_dir is None:
gp = os.path.join(this_dir, "../../template/downloaded/dataloader_files/gencode_features/")
else:
gp = GENCODE_dir
download_gencode_dir(gp) # download files
self.gencode_beds = [
else:
BT = BedTool
self.bt = BT(intervals_file)
# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
# DNase
self.dnase_file = dnase_file
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
md5="1d15ddafe2c8df51cf08495db96679e7")
mappability_file = os.path.join(common_dl_dir, "wgEncodeDukeMapabilityUniqueness35bp.bigWig")
if not os.path.exists(mappability_file) or not rf.validate(mappability_file):
# download the path
rf.get_file(mappability_file)
self.mappability_file = mappability_file
self.mappability_extractor = None
# Get the metadata features
if cell_line is None:
if RNAseq_PC_file is None:
raise ValueError("RNAseq_PC_file has to be specified when cell_line=None")
assert os.path.exists(RNAseq_PC_file)
else:
# Using the pre-defined cell-line
output_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files/RNAseq_features/")
def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")
# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
('wgEncodeGencodeBasicV19.cds.merged.bed.gz', '4ec9883932932efe87e4adc6c84ced1c'),
('wgEncodeGencodeBasicV19.intron.merged.bed.gz', 'd2db7e3255323d2b5b04e1c0c59ecd2d'),
('wgEncodeGencodeBasicV19.promoter.merged.bed.gz', '48fe1ab3aa0e9f5d11f3e5dfedbd47b6'),
('wgEncodeGencodeBasicV19.utr5.merged.bed.gz', 'de87c14d4ff055226afeb01446aba6e6'),
('wgEncodeGencodeBasicV19.utr3.merged.bed.gz', '8bbe08f5fba86306dfbef56d756856f1')]
for fname, md5 in fnames:
output_file = os.path.join(output_dir, fname)
rf = RemoteFile(url=url_template.format(fname), md5=md5)
if not os.path.exists(output_file) or not rf.validate(output_file):
rf.get_file(output_file)
def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")
# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
('wgEncodeGencodeBasicV19.cds.merged.bed.gz', '4ec9883932932efe87e4adc6c84ced1c'),
('wgEncodeGencodeBasicV19.intron.merged.bed.gz', 'd2db7e3255323d2b5b04e1c0c59ecd2d'),
('wgEncodeGencodeBasicV19.promoter.merged.bed.gz', '48fe1ab3aa0e9f5d11f3e5dfedbd47b6'),
('wgEncodeGencodeBasicV19.utr5.merged.bed.gz', 'de87c14d4ff055226afeb01446aba6e6'),
('wgEncodeGencodeBasicV19.utr3.merged.bed.gz', '8bbe08f5fba86306dfbef56d756856f1')]
for fname, md5 in fnames:
output_file = os.path.join(output_dir, fname)
rf = RemoteFile(url=url_template.format(fname), md5=md5)
if not os.path.exists(output_file) or not rf.validate(output_file):
rf.get_file(output_file)
else:
BT = BedTool
self.bt = BT(intervals_file)
# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
# DNase
self.dnase_file = dnase_file
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
md5="1d15ddafe2c8df51cf08495db96679e7")
mappability_file = os.path.join(common_dl_dir, "wgEncodeDukeMapabilityUniqueness35bp.bigWig")
if not os.path.exists(mappability_file) or not rf.validate(mappability_file):
# download the path
rf.get_file(mappability_file)
self.mappability_file = mappability_file
self.mappability_extractor = None
def download_gencode_dir(output_dir):
"""Download all the required gencode files
"""
makedir_exist_ok(output_dir)
url_template = ("https://s3.eu-central-1.amazonaws.com/kipoi-models/"
"dataloader_files/FactorNet/dataloader_files/gencode_features/{}")
# url_template = "https://github.com/uci-cbcl/FactorNet/blob/master/resources/{}?raw=true"
fnames = [('cpgisland.bed.gz', 'ac7dc007d7019c05adb7a331d1d6721d'),
('wgEncodeGencodeBasicV19.cds.merged.bed.gz', '4ec9883932932efe87e4adc6c84ced1c'),
('wgEncodeGencodeBasicV19.intron.merged.bed.gz', 'd2db7e3255323d2b5b04e1c0c59ecd2d'),
('wgEncodeGencodeBasicV19.promoter.merged.bed.gz', '48fe1ab3aa0e9f5d11f3e5dfedbd47b6'),
('wgEncodeGencodeBasicV19.utr5.merged.bed.gz', 'de87c14d4ff055226afeb01446aba6e6'),
('wgEncodeGencodeBasicV19.utr3.merged.bed.gz', '8bbe08f5fba86306dfbef56d756856f1')]
for fname, md5 in fnames:
output_file = os.path.join(output_dir, fname)
rf = RemoteFile(url=url_template.format(fname), md5=md5)
if not os.path.exists(output_file) or not rf.validate(output_file):
rf.get_file(output_file)
else:
BT = BedTool
self.bt = BT(intervals_file)
# Fasta
self.fasta_file = fasta_file
self.fasta_extractor = None # initialize later
# DNase
self.dnase_file = dnase_file
self.dnase_extractor = None
# mappability
if mappability_file is None:
# download the mappability file if not existing
common_dl_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files")
makedir_exist_ok(common_dl_dir)
rf = RemoteFile(url="http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig",
md5="1d15ddafe2c8df51cf08495db96679e7")
mappability_file = os.path.join(common_dl_dir, "wgEncodeDukeMapabilityUniqueness35bp.bigWig")
if not os.path.exists(mappability_file) or not rf.validate(mappability_file):
# download the path
rf.get_file(mappability_file)
self.mappability_file = mappability_file
self.mappability_extractor = None
# Gencode features
if GENCODE_dir is None:
gp = os.path.join(this_dir, "../../template/downloaded/dataloader_files/gencode_features/")
else:
gp = GENCODE_dir
download_gencode_dir(gp) # download files
self.gencode_beds = [
md5="1d15ddafe2c8df51cf08495db96679e7")
mappability_file = os.path.join(common_dl_dir, "wgEncodeDukeMapabilityUniqueness35bp.bigWig")
if not os.path.exists(mappability_file) or not rf.validate(mappability_file):
# download the path
rf.get_file(mappability_file)
self.mappability_file = mappability_file
self.mappability_extractor = None
# Get the metadata features
if cell_line is None:
if RNAseq_PC_file is None:
raise ValueError("RNAseq_PC_file has to be specified when cell_line=None")
assert os.path.exists(RNAseq_PC_file)
else:
# Using the pre-defined cell-line
output_dir = os.path.join(this_dir, "../../template/downloaded/dataloader_files/RNAseq_features/")
makedir_exist_ok(output_dir)
RNAseq_PC_file = os.path.join(output_dir, cell_line, "meta.txt")
url_template = ('https://s3.eu-central-1.amazonaws.com/kipoi-models/dataloader_files/'
'FactorNet/dataloader_files/RNAseq_features/{}/meta.txt')
# rf = RemoteFile(url=url_template.format(cell_line))
if not os.path.exists(RNAseq_PC_file): # or not rf.validate(mappability_file):
# download the path
download_url(url_template.format(cell_line), os.path.join(output_dir, cell_line), "meta.txt")
# rf.get_file(RNAseq_PC_file)
self.meta_feat = pd.read_csv(RNAseq_PC_file,
sep="\t", header=None)[0].values