Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def parse_context(orig_dir):
"""Parses the context for each model
"""
bigwig = read_txt(os.path.join(orig_dir, "bigwig.txt"))
tasks = read_txt(os.path.join(orig_dir, "chip.txt"))
features = read_txt(os.path.join(orig_dir, "feature.txt"))
meta_fname = os.path.join(orig_dir, "meta.txt")
if os.path.exists(meta_fname):
meta = read_txt(meta_fname)
n_meta_features = len(meta)
assert n_meta_features == 8
else:
meta = None
n_meta_features = 0
needs_gencode = "gencode" in features
if needs_gencode:
n_meta_features += 6
seq_n_channels = 4 + len(bigwig)
def parse_context(orig_dir):
"""Parses the context for each model
"""
bigwig = read_txt(os.path.join(orig_dir, "bigwig.txt"))
tasks = read_txt(os.path.join(orig_dir, "chip.txt"))
features = read_txt(os.path.join(orig_dir, "feature.txt"))
meta_fname = os.path.join(orig_dir, "meta.txt")
if os.path.exists(meta_fname):
meta = read_txt(meta_fname)
n_meta_features = len(meta)
assert n_meta_features == 8
else:
meta = None
n_meta_features = 0
needs_gencode = "gencode" in features
if needs_gencode:
n_meta_features += 6
seq_n_channels = 4 + len(bigwig)
return {"bigwig": bigwig,
"tasks": tasks,
tmpdir='/tmp/KipoiSplice/'):
"""
Args:
vcf_file: Path to the input vcf file
fasta_file: reference genome fasta file
gtf_file: path to the GTF file required by the models (Ensemble)
batch_size: batch size to use with all the models
num_workers: number of workers to use for each model
tmpdir (optional): path to the temporary directory where to store the predictions
"""
# contains_conservation: if True, include the conservation scores in the inputs
# NOTE: This requires the `vcf_file` to be annotated using VEP
contains_conservation=False
MODELS = ["MaxEntScan/3prime", "MaxEntScan/5prime", "HAL", "labranchor"]
features = read_txt(os.path.join(this_path, "features.txt"))
# Could also be generated on the fly from "MODELS"
with open(os.path.join(this_path, "model_table_cols.json"), "r") as ifh:
model_output_col_names = json.load(ifh)
os.makedirs(tmpdir, exist_ok=True)
tmpdir = tempfile.mkdtemp(dir=tmpdir)
# Generate a vcf file for each model
for model in MODELS:
# One could even parallelize here using joblib for example
out_vcf_fpath = os.path.join(tmpdir, model + ".vcf")
ensure_dirs(out_vcf_fpath)
dataloader_arguments = {"gtf_file": os.path.abspath(gtf_file),
"fasta_file": os.path.abspath(fasta_file)}
if "rbp_eclip" in model:
def parse_context(orig_dir):
"""Parses the context for each model
"""
bigwig = read_txt(os.path.join(orig_dir, "bigwig.txt"))
tasks = read_txt(os.path.join(orig_dir, "chip.txt"))
features = read_txt(os.path.join(orig_dir, "feature.txt"))
meta_fname = os.path.join(orig_dir, "meta.txt")
if os.path.exists(meta_fname):
meta = read_txt(meta_fname)
n_meta_features = len(meta)
assert n_meta_features == 8
else:
meta = None
n_meta_features = 0
needs_gencode = "gencode" in features
if needs_gencode:
n_meta_features += 6
seq_n_channels = 4 + len(bigwig)
return {"bigwig": bigwig,
"tasks": tasks,
"features": features,
"meta": meta,
"needs_mappability": "Unique35" in bigwig,
def parse_context(orig_dir):
"""Parses the context for each model
"""
bigwig = read_txt(os.path.join(orig_dir, "bigwig.txt"))
tasks = read_txt(os.path.join(orig_dir, "chip.txt"))
features = read_txt(os.path.join(orig_dir, "feature.txt"))
meta_fname = os.path.join(orig_dir, "meta.txt")
if os.path.exists(meta_fname):
meta = read_txt(meta_fname)
n_meta_features = len(meta)
assert n_meta_features == 8
else:
meta = None
n_meta_features = 0
needs_gencode = "gencode" in features
if needs_gencode:
n_meta_features += 6
seq_n_channels = 4 + len(bigwig)
return {"bigwig": bigwig,