Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
start_2 = None
for s2 in itertools.chain(sample_gen, (None,)):
s1_name = 'Unknown' if s1 is None else s1.name
s2_name = 'Unknown' if s2 is None else s2.name
is_last_in_contig = False
# s1 is last chunk
if s2 is None:
# go to end of s1
end_1 = None
is_last_in_contig = True
else:
rel = medaka.common.Sample.relative_position(s1, s2)
# skip s2 if it is contained within s1
if rel is medaka.common.Relationship.s2_within_s1:
logger.info('{} is contained within {}, skipping.'.format(
s2_name, s1_name))
continue
elif rel is medaka.common.Relationship.forward_overlap:
end_1, start_2, _ = medaka.common.Sample.overlap_indices(
s1, s2)
elif rel is medaka.common.Relationship.forward_gapped:
is_last_in_contig = True
end_1, start_2 = (None, None)
msg = '{} and {} cannot be concatenated as there is ' + \
'no overlap and they do not abut.'
logger.info(msg.format(s1_name, s2_name))
else:
raise RuntimeError(
'Unexpected sample relationship {} '
'between {} and {}'.format(repr(rel), s1.name, s2.name))
else:
return Relationship.s1_within_s2
# do samples abut?
elif ordered_abuts(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_abutted
else:
return Relationship.reverse_abutted
# do samples overlap?
elif ordered_overlaps(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_overlap
else:
return Relationship.reverse_overlap
# if we got this far there should be a gap between s1_ord and s2_ord
elif ordered_gapped(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_gapped
else:
return Relationship.reverse_gapped
else:
raise RuntimeError(
'Could not calculate relative position of {} and {}'.format(
s1.name, s2.name))
:param s1: First `Sample` object.
:param s2: Second `Sample` object.
:returns: (end1, start2
:raises: `OverlapException` if samples do not overlap nor abut.
"""
heuristic = False
rel = Sample.relative_position(s1, s2)
# trivial case
if rel is Relationship.forward_abutted:
return None, None, heuristic
if rel is not Relationship.forward_overlap:
msg = 'Cannot overlap samples {} and {} with relationhip {}'
raise OverlapException(msg.format(s1.name, s2.name, repr(rel)))
# find where the overlap starts (ends) in s1 (s2) indices
ovl_start_ind1 = np.searchsorted(s1.positions, s2.positions[0])
ovl_end_ind2 = np.searchsorted(
s2.positions, s1.positions[-1], side='right')
end_1_ind, start_2_ind = None, None
pos1_ovl = s1.positions[ovl_start_ind1:]
pos2_ovl = s2.positions[0:ovl_end_ind2]
try:
# the nice case where everything lines up
if not np.array_equal(pos1_ovl['minor'], pos2_ovl['minor']):
raise OverlapException("Overlaps are not equal in structure")
overlap_len = len(pos1_ovl)
"""Check for grap between end of s1 and start of s2."""
s1_end_maj, s1_end_min = s1.last_pos
s2_start_maj, s2_start_min = s2.first_pos
gapped = False
if s2_start_maj > s1_end_maj + 1: # gap in major
gapped = True
elif (s2_start_maj > s1_end_maj and
s2_start_min > 0): # missing minors
gapped = True
elif (s2_start_maj == s1_end_maj and
s2_start_min > s1_end_min + 1): # missing minors
gapped = True
return gapped
if s1.ref_name != s2.ref_name: # different ref_names
return Relationship.different_ref_name
s1_ord, s2_ord = sorted((s1, s2), key=lambda x: (x.first_pos, -x.size))
is_ordered = s1_ord.name == s1.name
# is one sample within the other?
if ordered_contained(s1_ord, s2_ord):
if is_ordered:
return Relationship.s2_within_s1
else:
return Relationship.s1_within_s2
# do samples abut?
elif ordered_abuts(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_abutted
else:
def from_samples(samples):
"""Create a sample by concatenating an iterable of `Sample` objects.
:param samples: iterable of `Sample` objects.
:returns: `Sample` obj
"""
samples = list(samples)
for s1, s2 in zip(samples[0:-1], samples[1:]):
rel = Sample.relative_position(s1, s2)
if rel is not Relationship.forward_abutted:
msg = (
'Refusing to concatenate unordered/non-abutting '
'samples {} and {} with relationship {}.')
raise ValueError(msg.format(s1.name, s2.name, repr(rel)))
# Relationship.forward_abutted guarantees all samples have the
# same ref_name
non_concat_fields = {'ref_name'}
def concat_attr(attr):
vals = [getattr(s, attr) for s in samples]
if attr not in non_concat_fields:
all_none = all([v is None for v in vals])
c = np.concatenate(vals) if not all_none else None
else:
assert len(set(vals)) == 1
if is_ordered:
return Relationship.forward_abutted
else:
return Relationship.reverse_abutted
# do samples overlap?
elif ordered_overlaps(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_overlap
else:
return Relationship.reverse_overlap
# if we got this far there should be a gap between s1_ord and s2_ord
elif ordered_gapped(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_gapped
else:
return Relationship.reverse_gapped
else:
raise RuntimeError(
'Could not calculate relative position of {} and {}'.format(
s1.name, s2.name))
s1_ord, s2_ord = sorted((s1, s2), key=lambda x: (x.first_pos, -x.size))
is_ordered = s1_ord.name == s1.name
# is one sample within the other?
if ordered_contained(s1_ord, s2_ord):
if is_ordered:
return Relationship.s2_within_s1
else:
return Relationship.s1_within_s2
# do samples abut?
elif ordered_abuts(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_abutted
else:
return Relationship.reverse_abutted
# do samples overlap?
elif ordered_overlaps(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_overlap
else:
return Relationship.reverse_overlap
# if we got this far there should be a gap between s1_ord and s2_ord
elif ordered_gapped(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_gapped
else:
return Relationship.reverse_gapped
else:
else:
return Relationship.reverse_abutted
# do samples overlap?
elif ordered_overlaps(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_overlap
else:
return Relationship.reverse_overlap
# if we got this far there should be a gap between s1_ord and s2_ord
elif ordered_gapped(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_gapped
else:
return Relationship.reverse_gapped
else:
raise RuntimeError(
'Could not calculate relative position of {} and {}'.format(
s1.name, s2.name))
s2_name = 'Unknown' if s2 is None else s2.name
is_last_in_contig = False
# s1 is last chunk
if s2 is None:
# go to end of s1
end_1 = None
is_last_in_contig = True
else:
rel = medaka.common.Sample.relative_position(s1, s2)
# skip s2 if it is contained within s1
if rel is medaka.common.Relationship.s2_within_s1:
logger.info('{} is contained within {}, skipping.'.format(
s2_name, s1_name))
continue
elif rel is medaka.common.Relationship.forward_overlap:
end_1, start_2, _ = medaka.common.Sample.overlap_indices(
s1, s2)
elif rel is medaka.common.Relationship.forward_gapped:
is_last_in_contig = True
end_1, start_2 = (None, None)
msg = '{} and {} cannot be concatenated as there is ' + \
'no overlap and they do not abut.'
logger.info(msg.format(s1_name, s2_name))
else:
raise RuntimeError(
'Unexpected sample relationship {} '
'between {} and {}'.format(repr(rel), s1.name, s2.name))
yield s1.slice(slice(start_1, end_1)), is_last_in_contig
s1 = s2
gapped = True
elif (s2_start_maj == s1_end_maj and
s2_start_min > s1_end_min + 1): # missing minors
gapped = True
return gapped
if s1.ref_name != s2.ref_name: # different ref_names
return Relationship.different_ref_name
s1_ord, s2_ord = sorted((s1, s2), key=lambda x: (x.first_pos, -x.size))
is_ordered = s1_ord.name == s1.name
# is one sample within the other?
if ordered_contained(s1_ord, s2_ord):
if is_ordered:
return Relationship.s2_within_s1
else:
return Relationship.s1_within_s2
# do samples abut?
elif ordered_abuts(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_abutted
else:
return Relationship.reverse_abutted
# do samples overlap?
elif ordered_overlaps(s1_ord, s2_ord):
if is_ordered:
return Relationship.forward_overlap
else:
return Relationship.reverse_overlap