Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if alts[0] == alts[1]: # homozygous 1/1
gt = gt_sep.join(len(haps) * '1')
alts = alts[:1]
else: # heterozygous 1/2
gt = gt_sep.join(map(str, haps))
else: # heterozygous 0/1
assert len(haps) == 1
gts = [0, 1] # appropriate if hap with variant is 2
if not discard_phase:
if int(haps[0]) == 1:
gts = [1, 0]
gt = gt_sep.join(map(str, gts))
genotype_data = {'GT': gt, 'GQ': qual}
return Variant(
v.chrom, interval.begin, ref, alt=alts,
filt='PASS', info=info, qual=qual,
genotype_data=genotype_data).trim()
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=qual, genotype_data=genotype))
# heterozygous, one deletion
elif all((is_het,
contains_nonref,
contains_deletion)):
qual = self._pfmt(self._phred(1 - primary_prob))
alt = [s for s in call if s != '*']
gt = '1/1'
genotype = {'GT': gt, 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=qual, genotype_data=genotype))
# no snp at this location
else:
if return_all:
qual = self._pfmt(ref_prob)
# return variant even though it is not a snp
genotype = {'GT': '0/0', 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt='.', filt='PASS',
info=info, qual=qual, genotype_data=genotype))
return results
def _parse(self):
# just parsing the file to yield records
last_pos = [None, None]
with open(self.filename, encoding='utf-8') as handle:
for index, line in enumerate(handle):
line = line.replace('\n', '')
# Already read meta and header in self.__init__
if line.startswith('#'):
continue
try:
variant = Variant.from_text(line)
except Exception as e:
raise IOError(
'Exception while reading variant #{}.\n'
'Line: {}'.format(index, line)) from e
if variant.chrom != last_pos[0]:
last_pos = [variant.chrom, None]
elif last_pos[1] is not None and last_pos[1] > variant.pos:
raise IOError(
'.vcf is unsorted at index #{}.'.format(index))
if variant.chrom not in self.chroms:
self.chroms.append(variant.chrom)
yield variant
last_pos[1] = variant.pos
genotype = {'GT': '1/1', 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=qual, genotype_data=genotype))
# heterozygous, no deletions
elif all((is_het,
not contains_deletion)):
err = 1 - 0.5 * (primary_prob + secondary_prob)
qual = self._pfmt(self._phred(err))
alt = [s for s in call if s != ref_symbol]
gt = '0/1' if len(alt) == 1 else '1/2'
genotype = {'GT': gt, 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=qual, genotype_data=genotype))
# heterozygous, one deletion
elif all((is_het,
contains_nonref,
contains_deletion)):
qual = self._pfmt(self._phred(1 - primary_prob))
alt = [s for s in call if s != '*']
gt = '1/1'
genotype = {'GT': gt, 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=qual, genotype_data=genotype))
genotype = {'GT': gt, 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=qual, genotype_data=genotype))
# heterozygous, secondary deletion
elif all((not primary_is_reference,
not primary_is_deletion,
secondary_is_deletion,
secondary_exceeds_threshold)):
alt = primary_call
qual = self._pfmt(self._phred(1 - primary_prob))
genotype = {'GT': '1/1', 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=qual, genotype_data=genotype))
# no snp at this location
else:
if return_all:
# return variant even though it is not a snp
qual = self._pfmt(self._phred(1 - primary_prob))
genotype = {'GT': '0/0', 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt='.', filt='PASS',
info=info, qual=qual, genotype_data=genotype))
return results
}
else:
info = {}
# log likelihood ratio
qual = self._pfmt(sum(pred_quals) - sum(ref_quals))
genotype = {'GT': '1', 'GQ': qual}
var_pos = pos['major'][start]
if pad_del_at_start_of_chunk:
var_pos -= 1
var_ref = ref_seq[var_pos] + var_ref
var_pred = ref_seq[var_pos] + var_pred
variant = medaka.vcf.Variant(sample.ref_name, var_pos, var_ref,
alt=var_pred, filt='PASS', info=info,
qual=qual, genotype_data=genotype)
variant = variant.trim()
variants.append(variant)
return variants
results = list()
data = zip(outputs, argmax, probs, quals, positions, ref_symbols)
for network_output, amax, prob, qual, pos, ref_symbol in data:
call = self._decoding[amax]
# notes: we output only variants with one or more substitutions,
# and deletions are masked from the records. TODO: is this
# really the behaviour we want?
if call == (ref_symbol, ref_symbol): # is reference
if return_all:
# return variant even though it is not a snp
q = self._pfmt(qual)
genotype = {'GT': '0/0', 'GQ': q}
info = _make_info(ref_symbol, prob, call)
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt='.', filt='PASS',
info=info, qual=q, genotype_data=genotype))
else: # is variant
contains_deletion = '*' in call
if not self._singleton(call): # heterozygous
if not contains_deletion:
alt = [s for s in call if s != ref_symbol]
gt = '0/1' if len(alt) == 1 else '1/2'
q = self._pfmt(qual)
genotype = {'GT': gt, 'GQ': q}
info = _make_info(ref_symbol, prob, call)
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=q, genotype_data=genotype))
else: # with deletion
# disallow (ref, *), and record (alt, *) as (alt, alt)
qual = self._pfmt(self._phred(1 - primary_prob))
alt = [s for s in call if s != '*']
gt = '1/1'
genotype = {'GT': gt, 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=qual, genotype_data=genotype))
# no snp at this location
else:
if return_all:
qual = self._pfmt(ref_prob)
# return variant even though it is not a snp
genotype = {'GT': '0/0', 'GQ': qual}
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt='.', filt='PASS',
info=info, qual=qual, genotype_data=genotype))
return results
q = self._pfmt(qual)
genotype = {'GT': '0/0', 'GQ': q}
info = _make_info(ref_symbol, prob, call)
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt='.', filt='PASS',
info=info, qual=q, genotype_data=genotype))
else: # is variant
contains_deletion = '*' in call
if not self._singleton(call): # heterozygous
if not contains_deletion:
alt = [s for s in call if s != ref_symbol]
gt = '0/1' if len(alt) == 1 else '1/2'
q = self._pfmt(qual)
genotype = {'GT': gt, 'GQ': q}
info = _make_info(ref_symbol, prob, call)
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=q, genotype_data=genotype))
else: # with deletion
# disallow (ref, *), and record (alt, *) as (alt, alt)
contains_nonref_nondel = len(
[s for s in call
if s != ref_symbol and s != '*']) > 0
if contains_nonref_nondel:
alt = [s for s in call if s != '*']
gt = '1/1'
q = self._pfmt(qual)
genotype = {'GT': gt, 'GQ': q}
info = _make_info(ref_symbol, prob, call)
results.append(medaka.vcf.Variant(
ref_name, pos, ref_symbol, alt, filt='PASS',
info=info, qual=q, genotype_data=genotype))