Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
logger.debug("Skipping because I've seen before.")
continue
else:
logger.debug("Analysing {} samples".format(len(read_block)))
sample_rate = read_block.sample_rate
events = minknow_event_detect(
read_block, read_block.sample_rate, **{
'window_lengths':[3, 6], 'thresholds':[1.4, 1.1],
'peak_height':0.2
}
)
if len(events) < 100:
continue
#TODO: do this in a process pool
score, basecall = pyscrap.basecall_events(events)
#TODO: check sanity of basecall
if len(basecall) < 100:
continue
alignment, returncode = yield from align_client.call.align(basecall)
hits = []
if returncode != 0:
logger.warning('Alignment failed for {}'.format(read_block.info))
else:
recs = [x for x in alignment.split('\n') if len(x) > 0 and x[0] != '@']
for r in recs:
fields = r.split('\t')
if fields[2] != '*':
hits.append(fields[2])
logger.debug('{} aligns to {}'.format(read_block.info, hits))
def run_dealer(args):
"""Entry point handler for server."""
set_wakeup()
asyncio.get_event_loop().run_until_complete(main_dealer(args.path, args.outpath, output=args.output, port=args.port))
def run_router(args):
"""Entry point handler for client."""
set_wakeup()
asyncio.get_event_loop().run_until_complete(main_router(args.addr))
suffixes).
:param channels: list of channels to simulate.
:param start_port: port on which to run .fast5 replay server, bwa alignment
server will be run on the following numbered port.
:param targets: list of reference names. If `whitelist` is `False`, reads
aligning to these references will be ejected. If `whitelist` is `True`
any read alinging to a reference other than those contained in this
list will be ejected. Unidentified reads are never ejected.
:param whitelist: see `target`.
"""
logger = logging.getLogger('ReadUntil App')
good_class = 'strand'
time_warp=2
event_loop = asyncio.get_event_loop()
set_wakeup()
port = start_port
# Setup replay service
replay_port = port
replay_server = event_loop.create_task(replayfast5.replay_server(
fast5, channels, replay_port, good_class, time_warp=time_warp
))
port += 1
# Setup alignment service
align_port = port
align_server = event_loop.create_task(bwa.align_server(
bwa_index, align_port
))
identified_reads = {}
def read_until_demo(fast5, channels, port=5555):
"""Simple demo of read until server and client application.
:param fast5: input .fast5 file.
:param channels: list of channels to simulate.
:param port: port on which to run server and cliant.
"""
logger = logging.getLogger('ReadUntil App')
good_class = 'strand'
time_warp=2
event_loop = asyncio.get_event_loop()
set_wakeup()
# Setup replay service
event_loop.create_task(replay_server(
fast5, channels, port, good_class, time_warp=time_warp
))
@asyncio.coroutine
def read_until(port):
client = yield replay_client(port)
counter = 0
while True:
#This can be mostly rewritten for any real app
for channel in channels:
read_block = yield from client.call.get_events(channel)
if read_block is None:
logger.debug("Channel not in '{}' classification".format(good_class))
else:
def _run():
set_wakeup()
server = yield from align_server(
args.index, args.port, args.aligner, opts=args.opts
)
logger.info('Alignment server running, awaiting requests...')
yield from server.wait_closed()
# filter orientation
if (r.is_reverse and args.orientation == 'fwd') or \
(not r.is_reverse and args.orientation == 'rev'):
return True
# filter quality
if args.quality is not None:
mean_q = np.mean(r.query_qualities)
if mean_q < args.quality:
logger.debug("Filtering {} by quality ({:.2f}).".format(r.query_name, mean_q))
return True
# filter accuracy or alignment coverage
if args.accuracy is not None or args.coverage is not None or args.length is not None:
stats = stats_from_aligned_read(r, bam.references, bam.lengths)
if args.accuracy is not None and stats['acc'] < args.accuracy:
logger.info("Filtering {} by accuracy ({:.2f}).".format(r.query_name, stats['acc']))
return True
if args.coverage is not None and stats['coverage'] < args.coverage:
logger.info("Filtering {} by coverage ({:.2f}).".format(r.query_name, stats['coverage']))
return True
if args.length is not None and stats['read_length'] < args.length:
logger.info("Filtering {} by length ({:.2f}).".format(r.query_name, stats['length']))
return True
# don't filter
return False
# reads should already be trimmed to a common aligment start and end point
reads = [r for r in bam]
ref_end, ref_start = reads[0].reference_end, reads[0].reference_start
ref_len = ref_end - ref_start
if not (all([r.reference_end == ref_end for r in reads]) and
all([r.reference_start == ref_start for r in reads])):
raise ValueError('Alignments have not been trimmed to a common overlap window, try trim_alignments')
# get errors in each read
data = {}
qscores = []
for aln in reads:
errors = get_errors(aln, ref_seq)
counts = count_errors(errors)
# check we got the same error counts as stats_from_aligned_read
stats = stats_from_aligned_read(aln, list(ref_lengths.keys()), list(ref_lengths.values()))
for k in counts.keys():
if stats[k] != counts[k]:
msg = "Error counts {} don't match those from the CIGAR str {}."
raise ValueError(msg.format(counts, {k: stats[k] for k in counts.keys()}))
qscores.append((aln.query_name, get_qscores(counts, ref_len)))
data[aln.query_name] = errors
# get intersection of errors
names = list(data.keys())
common_errors = set(data[names[0]].keys()) # set of reference positions
for name in names[1:]:
common_errors = common_errors.intersection(set(data[name].keys()))
remaining_errors = {}
# loop through common errors, checking ref is the same and retaining the
# error with the shortest edit distance
for rp in common_errors:
lambda binary: msgpack.unpackb(binary, object_hook=Fast5Data.decode)
),
read = self.reads[self.current_read]
if read['classification'] != self.good_class:
return None
else:
start = int(read['read_start'] + self.sample_rate * delay)
end = int(min(self.current_sample, start + self.sample_rate * seconds))
if end <= start:
return None
self.logger.debug("Fetching raw [{}, {}] for read {} starting at {}. Current sample is {}.".format(
start, end,
self.current_read, int(self.sample_offset + self.reads[self.current_read]['read_start']),
self.current_sample
))
with BulkFast5(self.fast5) as fh:
raw = fh.get_raw(self.channel, raw_indices=[start, end])
return Fast5Data(
raw, info=read['read_id'].decode('utf-8'),
start=int(self.sample_offset + read['read_start']),
end=int(self.sample_offset + read['read_start'] + read['read_length'])
)