Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def rlebam(args):
"""Entry point for merging run length information for fast5s to bam."""
logger = medaka.common.get_named_logger('BAMDecor')
read_index = medaka.common.read_key_value_tsv(args.read_index)
logger.info("Found {} read in index\n".format(len(read_index)))
def _ingress():
for line in sys.stdin:
if line[0] == '@':
yield line.rstrip(), None, None, None
else:
read_id, flag, _ = line.split('\t', 2)
is_rev = bool(int(flag) & 16)
fname = read_index[read_id]
yield line.rstrip(), read_id, is_rev, fname
with concurrent.futures.ProcessPoolExecutor(
max_workers=args.workers) as executor:
for results in executor.map(
"""Creation and loading of models."""
import os
import pathlib
import tempfile
import requests
import medaka.common
import medaka.datastore
import medaka.options
logger = medaka.common.get_named_logger('ModelLoad')
class DownloadError(ValueError):
"""Raised when model is unsuccessfully downloaded."""
def resolve_model(model):
"""Resolve a model filepath, downloading known models if necessary.
:param model_name: str, model filepath or model ID
:returns: str, filepath to model file.
"""
if os.path.exists(model): # model is path to model file
return model
elif model not in medaka.options.allowed_models:
def run_prediction(
output, bam, regions, model, feature_encoder,
chunk_len, chunk_ovlp, batch_size=200,
save_features=False, enable_chunking=True):
"""Inference worker."""
logger = medaka.common.get_named_logger('PWorker')
remainder_regions = list()
loader = DataLoader(
4 * batch_size, bam, regions, feature_encoder,
chunk_len=chunk_len, chunk_overlap=chunk_ovlp,
enable_chunking=enable_chunking)
batches = medaka.common.grouper(loader, batch_size)
total_region_mbases = sum(r.size for r in regions) / 1e6
logger.info(
"Running inference for {:.1f}M draft bases.".format(
total_region_mbases))
with medaka.datastore.DataStore(output, 'a') as ds:
mbases_done = 0
cache_size_log_interval = 5
def _group_and_trim_by_haplotype(alignments):
"""Group alignments by haplotype tag and trim to common genomic window.
:param alignments: {haplotype: [`TruthAlignment`]}
:returns: list of tuples where each tuple contains `TruthAlignment`
for each haplotype trimmed to common genomic window.
.. note:: We should avoid the situation of staggered alignments
which could occur by independently chunking each haplotype
by chunking the draft and aligning to both haplotypes, then
chunking both haplotypes according to draft-chunks, then realining
haplotype chunks to back to the draft - this should minimize
staggering of truth alignments and hence the number of labels
discarded.
"""
logger = medaka.common.get_named_logger("Group_and_trim")
haplotypes = sorted(list(alignments.keys()))
if len(haplotypes) == 1: # haploid
grouped = [(a,) for a in alignments[haplotypes[0]]]
else:
# create interval trees for other haplotypes
trees = {}
for h in haplotypes[1:]:
trees[h] = intervaltree.IntervalTree(
[intervaltree.Interval(a.start, a.end, a)
for a in alignments[h]])
# loop over alignments in first haplotype and find overlapping
# alignments in other haplotypes. If there are multiple overlapping
# alignments, take the one with the longest overlap.
grouped = []
for a in alignments[haplotypes[0]]:
group = [a]