Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:return: Callrate MT
"""
logger.info("Computing call rate MatrixTable")
if len(intervals_ht.key) != 1 or not isinstance(
intervals_ht.key[0], hl.expr.IntervalExpression
):
logger.warning(
f"Call rate matrix computation expects `intervals_ht` with a key of type Interval. Found: {intervals_ht.key}"
)
if autosomes_only:
callrate_mt = filter_to_autosomes(mt)
if bi_allelic_only:
callrate_mt = callrate_mt.filter_rows(bi_allelic_expr(callrate_mt))
intervals_ht = intervals_ht.annotate(_interval_key=intervals_ht.key)
callrate_mt = callrate_mt.annotate_rows(
_interval_key=intervals_ht.index(
callrate_mt.locus, all_matches=match
)._interval_key
)
if match:
callrate_mt = callrate_mt.explode_rows("_interval_key")
callrate_mt = callrate_mt.filter_rows(
hl.is_defined(callrate_mt._interval_key.interval)
)
callrate_mt = callrate_mt.select_entries(
GT=hl.or_missing(hl.is_defined(callrate_mt.GT), hl.struct())
if annotation_expr:
mt = mt.annotate_rows(**annotation_expr)
filter_expr = []
if min_af is not None:
filter_expr.append((mt.af > min_af))
if min_callrate is not None:
filter_expr.append((mt.site_callrate > min_callrate))
if min_inbreeding_coeff_threshold is not None:
filter_expr.append((mt.site_inbreeding_coeff > min_inbreeding_coeff_threshold))
if min_hardy_weinberg_threshold is not None:
filter_expr.append((mt.hwe.p_value > min_hardy_weinberg_threshold))
if snv_only:
filter_expr.append(hl.is_snp(mt.alleles[0], mt.alleles[1]))
if bi_allelic_only:
filter_expr.append(bi_allelic_expr(mt))
if apply_hard_filters:
if "info" in mt.row_value:
if "QD" in mt.info:
filter_expr.append((mt.info.QD >= 2))
else:
logger.warning(
"Could not apply QD hard filter, as `info.QD` not found in schema."
)
if "FS" in mt.info:
filter_expr.append((mt.info.FS <= 60))
else:
logger.warning(
"Could not apply FS hard filter, as `info.FS` not found in schema."
)
if "MQ" in mt.info:
.. note::
Expects that `mt` is it a trio matrix table that was annotated with adj and if dealing with
a sparse MT `hl.experimental.densify` must be run first.
By default this pipeline function will filter `mt` to only autosomes and bi-allelic sites.
:param mt: A Trio Matrix Table returned from `hl.trio_matrix`. Must be dense
:param autosomes_only: If set, only autosomal intervals are used.
:param bi_allelic_only: If set, only bi-allelic sites are used for the computation
:return: Table with trio stats
"""
if autosomes_only:
mt = filter_to_autosomes(mt)
if bi_allelic_only:
mt = mt.filter_rows(bi_allelic_expr(mt))
logger.info(f"Generating trio stats using {mt.count_cols()} trios.")
trio_adj = mt.proband_entry.adj & mt.father_entry.adj & mt.mother_entry.adj
ht = mt.select_rows(
**generate_trio_stats_expr(
mt,
transmitted_strata={"raw": True, "adj": trio_adj},
de_novo_strata={"raw": True, "adj": trio_adj},
ac_strata={"raw": True, "adj": trio_adj},
)
).rows()
return ht
By default this pipeline function will filter `mt` to only autosomes and bi-allelic sites.
:param mt: Input Matrix table
:param relatedness_ht: Input relationship table
:param i_col: Column containing the 1st sample of the pair in the relationship table
:param j_col: Column containing the 2nd sample of the pair in the relationship table
:param relationship_col: Column containing the relationship for the sample pair as defined in this module constants.
:param autosomes_only: If set, only autosomal intervals are used.
:param bi_allelic_only: If set, only bi-allelic sites are used for the computation
:return: A Table with the sibling shared variant counts
"""
if autosomes_only:
mt = filter_to_autosomes(mt)
if bi_allelic_only:
mt = mt.filter_rows(bi_allelic_expr(mt))
sib_ht = relatedness_ht.filter(relatedness_ht[relationship_col] == SIBLINGS)
s_to_keep = sib_ht.aggregate(
hl.agg.explode(
lambda s: hl.agg.collect_as_set(s), [sib_ht[i_col].s, sib_ht[j_col].s]
),
_localize=False,
)
mt = mt.filter_cols(s_to_keep.contains(mt.s))
if "adj" not in mt.entry:
mt = annotate_adj(mt)
sib_stats_ht = mt.select_rows(
**generate_sib_stats_expr(
mt, sib_ht, i_col=i_col, j_col=j_col, strata={"raw": True, "adj": mt.adj},
)