Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
'f2': 'feature',
'f3': 'start',
'f4': 'end',
'f5': 'score',
'f6': 'strand',
'f7': 'frame',
'f8': 'attribute'})
ht = ht.annotate(attribute=hl.dict(
hl.map(lambda x: (x.split(' ')[0],
x.split(' ')[1].replace('"', '').replace(';$', '')),
ht['attribute'].split('; '))))
attributes = ht.aggregate(hl.agg.explode(lambda x: hl.agg.collect_as_set(x), ht['attribute'].keys()))
ht = ht.transmute(**{x: hl.or_missing(ht['attribute'].contains(x),
ht['attribute'][x])
for x in attributes if x})
if reference_genome:
if reference_genome == 'GRCh37':
ht = ht.annotate(seqname=ht['seqname'].replace('^chr', ''))
else:
ht = ht.annotate(seqname=hl.case()
.when(ht['seqname'].startswith('HLA'), ht['seqname'])
.when(ht['seqname'].startswith('chrHLA'), ht['seqname'].replace('^chr', ''))
.when(ht['seqname'].startswith('chr'), ht['seqname'])
.default('chr' + ht['seqname']))
if skip_invalid_contigs:
valid_contigs = hl.literal(set(hl.get_reference(reference_genome).contigs))
ht = ht.filter(valid_contigs.contains(ht['seqname']))
ht = ht.transmute(interval=hl.locus_interval(ht['seqname'],
def split_position_start(position):
return hl.or_missing(
hl.is_defined(position),
hl.bind(lambda start: hl.cond(start == "?", hl.null(hl.tint), hl.int(start)), position.split("-")[0]),
)
for snp in ["snp1", "snp2", "snp3"]
]
)
ds = ds.annotate(constituent_snv_ids=[ds.snp1, ds.snp2, ds.snp3])
ds = ds.annotate(
mnv_in_exome=ds.constituent_snvs.all(lambda s: hl.is_defined(s.exome)),
mnv_in_genome=ds.constituent_snvs.all(lambda s: hl.is_defined(s.genome)),
)
ds = ds.transmute(
n_individuals=ds.n_indv_tnv,
ac=ds.AC_tnv,
ac_hom=ds.n_tnv_hom,
exome=hl.or_missing(
ds.mnv_in_exome,
hl.struct(
n_individuals=ds.n_indv_tnv_ex, ac=ds.AC_tnv_ex, ac_hom=ds.n_tnv_hom_ex
),
),
genome=hl.or_missing(
ds.mnv_in_genome,
hl.struct(
n_individuals=ds.n_indv_tnv_gen,
ac=ds.AC_tnv_gen,
ac_hom=ds.n_tnv_hom_gen,
),
),
)
ds = ds.drop("AC_snp1", "AC_snp2", "AC_snp3")
lambda entry: hl.scan._prev_nonnull(hl.or_missing(hl.is_defined(entry.END), entry)),
t.__entries),
Returns phased genotype calls in the case of a diploid proband
(autosomes, PAR regions of sex chromosomes or non-PAR regions of a female proband)
:param LocusExpression locus: Locus in the trio MatrixTable
:param ArrayExpression alleles: Alleles in the trio MatrixTable
:param CallExpression proband_call: Input proband genotype call
:param CallExpression father_call: Input father genotype call
:param CallExpression mother_call: Input mother genotype call
:return: Array containing: phased proband call, phased father call, phased mother call
:rtype: ArrayExpression
"""
proband_v = proband_call.one_hot_alleles(alleles)
father_v = hl.cond(
locus.in_x_nonpar() | locus.in_y_nonpar(),
hl.or_missing(father_call.is_haploid(), hl.array([father_call.one_hot_alleles(alleles)])),
call_to_one_hot_alleles_array(father_call, alleles)
)
mother_v = call_to_one_hot_alleles_array(mother_call, alleles)
combinations = hl.flatmap(
lambda f:
hl.zip_with_index(mother_v)
.filter(lambda m: m[1] + f[1] == proband_v)
.map(lambda m: hl.struct(m=m[0], f=f[0])),
hl.zip_with_index(father_v)
)
return (
hl.or_missing(
hl.is_defined(combinations) & (hl.len(combinations) == 1),
hl.array([
lambda x: hl.or_missing( # 2.1 get the start position of blocks that overlap the current locus
(x[1] >= ht.locus.position) & (x[0].contig == ht.locus.contig),
x[0].position,
)
"""
Returns phased genotype calls in the case of a haploid proband in the non-PAR region of X
:param CallExpression proband_call: Input proband genotype call
:param CallExpression father_call: Input father genotype call
:param CallExpression mother_call: Input mother genotype call
:return: Array containing: phased proband call, phased father call, phased mother call
:rtype: ArrayExpression
"""
transmitted_allele = hl.zip_with_index(hl.array([mother_call[0], mother_call[1]])).find(lambda m: m[1] == proband_call[0])
return hl.or_missing(
hl.is_defined(transmitted_allele),
hl.array([
hl.call(proband_call[0], phased=True),
hl.or_missing(father_call.is_haploid(), hl.call(father_call[0], phased=True)),
phase_parent_call(mother_call, transmitted_allele[0])
])
**{ref_contig.replace("chr", ""): ref_contig for ref_contig in ref.contigs if "chr" in ref_contig},
**{f"chr{ref_contig}": ref_contig for ref_contig in ref.contigs if "chr" not in ref_contig}}
mt = hl.import_vcf(
vcf_path,
reference_genome=f"GRCh{genome_version}",
contig_recoding=contig_recoding,
min_partitions=min_partitions,
force_bgz=force_bgz,
drop_samples=drop_samples,
skip_invalid_loci=skip_invalid_loci)
mt = mt.annotate_globals(sourceFilePath=vcf_path, genomeVersion=genome_version)
mt = mt.annotate_rows(
original_alt_alleles=hl.or_missing(hl.len(mt.alleles) > 2, get_expr_for_variant_ids(mt.locus, mt.alleles))
)
if split_multi_alleles:
mt = hl.split_multi_hts(mt)
mt = mt.key_rows_by(**hl.min_rep(mt.locus, mt.alleles))
return mt
def get_alt_count(locus, gt, is_female):
"""
Helper method to calculate alt allele count with sex info if present
"""
if is_female is None:
return hl.or_missing(locus.in_autosome(), gt.n_alt_alleles())
return (
hl.case()
.when(locus.in_autosome_or_par(), gt.n_alt_alleles())
.when(
~is_female & (locus.in_x_nonpar() | locus.in_y_nonpar()),
hl.min(1, gt.n_alt_alleles()),
)
.when(is_female & locus.in_y_nonpar(), 0)
.default(0)
)
lambda entry: hl.scan._prev_nonnull( # 1. Keep a scan of the entries using _prev_nonnull
hl.or_missing(
hl.is_defined(
entry.END
), # Update the scan whenever a new ref block is encountered
hl.tuple(
[ # 1.1 keep the start (ht.locus) and end (entry.END) of each ref block
ht.locus,
entry.END,
]