Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
lambda i:
hl.cond(hl.is_missing(row.data[i].__entries),
hl.range(0, hl.len(gbl.g[i].__cols))
.map(lambda _: hl.null(row.data[i].__entries.dtype.element_type)),
hl.bind(
lambda old_to_new: row.data[i].__entries.map(
lambda e: renumber_entry(e, old_to_new)),
hl.range(0, hl.len(alleles.local[i])).map(
lambda j: combined_allele_index[alleles.local[i][j]])))),
hl.dict(hl.range(0, hl.len(alleles.globl)).map(
def get_expr_for_end_pos(table):
return table.locus.position + hl.len(get_expr_for_ref_allele(table)) - 1
newPL = hl.cond(
hl.is_defined(mt.PL),
(hl.range(0, hl.triangle(hl.len(mt.alleles)))
.map(lambda newi: hl.min(hl.range(0, hl.triangle(hl.len(mt.old_alleles)))
.filter(lambda oldi: hl.bind(
lambda oldc: hl.call(mt.__old_to_new_no_na[oldc[0]],
mt.__old_to_new_no_na[oldc[1]]) == hl.unphased_diploid_gt_index_call(newi),
hl.unphased_diploid_gt_index_call(oldi)))
.map(lambda oldi: mt.PL[oldi])))),
hl.null(tarray(tint32)))
return mt.annotate_entries(
GT=hl.call(mt.__old_to_new_no_na[mt.GT[0]],
mt.__old_to_new_no_na[mt.GT[1]]),
AD=hl.cond(
hl.is_defined(mt.AD),
(hl.range(0, hl.len(mt.alleles))
.map(lambda newi: hl.sum(hl.range(0, hl.len(mt.old_alleles))
.filter(lambda oldi: mt.__old_to_new_no_na[oldi] == newi)
.map(lambda oldi: mt.AD[oldi])))),
hl.null(tarray(tint32))),
# DP unchanged
GQ=hl.gq_from_pl(newPL),
PL=newPL).drop('__old_to_new_no_na')
def _summary_aggs(self):
length = hl.len(self)
return hl.tuple((
hl.agg.min(length),
hl.agg.max(length),
hl.agg.mean(length),
hl.agg.explode(lambda elt: elt._all_summary_aggs(), self)))
find_worst_transcript_consequence
).values()
sorted_canonical_scores = hl.sorted(
worst_csq_gene_canonical, key=lambda tc: tc.csq_score
)
vep_data = mt[vep_root].annotate(
transcript_consequences=transcript_csqs,
worst_consequence_term=csqs.find(
lambda c: transcript_csqs.map(
lambda csq: csq.most_severe_consequence
).contains(c)
),
worst_csq_by_gene=sorted_scores,
worst_csq_for_variant=hl.or_missing(
hl.len(sorted_scores) > 0, sorted_scores[0]
),
worst_csq_by_gene_canonical=sorted_canonical_scores,
worst_csq_for_variant_canonical=hl.or_missing(
hl.len(sorted_canonical_scores) > 0, sorted_canonical_scores[0]
),
)
return (
mt.annotate_rows(**{vep_root: vep_data})
if isinstance(mt, hl.MatrixTable)
else mt.annotate(**{vep_root: vep_data})
)
lambda combined_allele_index:
hl.range(0, hl.len(row.data)).flatmap(
lambda i:
hl.cond(hl.is_missing(row.data[i].__entries),
hl.range(0, hl.len(gbl.g[i].__cols))
.map(lambda _: hl.null(row.data[i].__entries.dtype.element_type)),
hl.bind(
lambda old_to_new: row.data[i].__entries.map(
lambda e: renumber_entry(e, old_to_new)),
hl.range(0, hl.len(alleles.local[i])).map(
lambda j: combined_allele_index[alleles.local[i][j]])))),
hl.dict(hl.range(0, hl.len(alleles.globl)).map(
)
# If a sample is in sib_ht more than one time, keep only one of the sibling pairs
# First filter to only samples found in mt to keep as many pairs as possible
s_to_keep = mt.aggregate_cols(hl.agg.collect_as_set(mt.s), _localize=False)
sib_ht = sib_ht.filter(
s_to_keep.contains(sib_ht[i_col].s) & s_to_keep.contains(sib_ht[j_col].s)
)
sib_ht = sib_ht.add_index("sib_idx")
sib_ht = sib_ht.annotate(sibs=[sib_ht[i_col].s, sib_ht[j_col].s])
sib_ht = sib_ht.explode("sibs")
sib_ht = sib_ht.group_by("sibs").aggregate(
sib_idx=(hl.agg.take(sib_ht.sib_idx, 1, ordering=sib_ht.sib_idx)[0])
)
sib_ht = sib_ht.group_by(sib_ht.sib_idx).aggregate(sibs=hl.agg.collect(sib_ht.sibs))
sib_ht = sib_ht.filter(hl.len(sib_ht.sibs) == 2).persist()
logger.info(
f"Generating sibling variant sharing counts using {sib_ht.count()} pairs."
)
sib_ht = sib_ht.explode("sibs").key_by("sibs")[mt.s]
# Create sibling sharing counters
sib_stats = hl.struct(
**{
f"n_sib_shared_variants_{name}": hl.sum(
hl.agg.filter(
expr,
hl.agg.group_by(
sib_ht.sib_idx,
hl.or_missing(
hl.agg.sum(hl.is_defined(mt.GT)) == 2,
nr: A dict of sample_num -> genotype index in concordance table for all non-ref samples
:param mt:
:param common_samples:
:return:
"""
ht = mt.select_rows(
nr=hl.dict(
hl.agg.collect(
hl.agg.filter(common_samples.contains(mt.s) & hl.or_else(mt.GT.is_non_ref(), True), hl.tuple([common_samples[mt.s], hl.or_else(mt.GT.n_alt_alleles() + 2, 1)]))
)
)
).rows()
if filter_monomorphic:
ht = ht.filter(hl.len(ht.nr) > 0)
return ht
lambda row: hl.rbind(
hl.len(row.alleles), '' == row.alleles[-1],
lambda alleles_len, has_non_ref: hl.struct(
locus=row.locus,
alleles=hl.cond(has_non_ref, row.alleles[:-1], row.alleles),
rsid=row.rsid,
__entries=row.__entries.map(
lambda e:
hl.struct(
DP=e.DP,
END=row.info.END,
GQ=e.GQ,
LA=hl.range(0, alleles_len - hl.cond(has_non_ref, 1, 0)),
LAD=hl.cond(has_non_ref, e.AD[:-1], e.AD),
LGT=e.GT,
LPGT=e.PGT,
LPL=hl.cond(has_non_ref,
hl.cond(alleles_len > 2,