Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from hail.expr.expression import ExpressionException
mt = self.get_groupable_matrix()
self.assertRaises(ExpressionException, mt.group_rows_by, mt['group1'] + 1)
self.assertRaises(ExpressionException, mt.group_cols_by, mt['group1'])
self.assertRaises(ExpressionException, mt.group_cols_by, mt['group3'] + 1)
self.assertRaises(ExpressionException, mt.group_rows_by, mt['group3'])
self.assertRaises(ExpressionException, mt.group_rows_by, group3=mt['group1'])
self.assertRaises(ExpressionException, mt.group_cols_by, group1=mt['group3'])
self.assertRaises(ExpressionException, mt.group_rows_by, foo=mt['group1'])
self.assertRaises(ExpressionException, mt.group_cols_by, foo=mt['group3'])
a = mt.group_rows_by(group5=(mt['group2']['a'] + 1))
self.assertRaises(ExpressionException, a.aggregate, group3=hl.agg.sum(mt['c']))
self.assertRaises(ExpressionException, a.aggregate, group5=hl.agg.sum(mt['c']))
self.assertRaises(ExpressionException, a.aggregate, foo=hl.agg.sum(mt['c']))
b = mt.group_cols_by(group5=(mt['group4']['a'] + 1))
self.assertRaises(ExpressionException, b.aggregate, group1=hl.agg.sum(mt['c']))
self.assertRaises(ExpressionException, b.aggregate, group5=hl.agg.sum(mt['c']))
self.assertRaises(ExpressionException, b.aggregate, foo=hl.agg.sum(mt['c']))
def _ac_an_parent_child_count(
proband_gt: hl.expr.CallExpression,
father_gt: hl.expr.CallExpression,
mother_gt: hl.expr.CallExpression,
) -> Dict[str, hl.expr.Int64Expression]:
"""
Helper method to get AC and AN for parents and children
"""
ac_parent_expr = hl.agg.sum(
father_gt.n_alt_alleles() + mother_gt.n_alt_alleles()
)
an_parent_expr = hl.agg.sum(
(hl.is_defined(father_gt) + hl.is_defined(mother_gt)) * 2
)
ac_child_expr = hl.agg.sum(proband_gt.n_alt_alleles())
an_child_expr = hl.agg.sum(hl.is_defined(proband_gt) * 2)
return {
f"ac_parents": ac_parent_expr,
f"an_parents": an_parent_expr,
f"ac_children": ac_child_expr,
f"an_children": an_child_expr,
}
def _ac_an_parent_child_count(
proband_gt: hl.expr.CallExpression,
father_gt: hl.expr.CallExpression,
mother_gt: hl.expr.CallExpression,
) -> Dict[str, hl.expr.Int64Expression]:
"""
Helper method to get AC and AN for parents and children
"""
ac_parent_expr = hl.agg.sum(
father_gt.n_alt_alleles() + mother_gt.n_alt_alleles()
)
an_parent_expr = hl.agg.sum(
(hl.is_defined(father_gt) + hl.is_defined(mother_gt)) * 2
)
ac_child_expr = hl.agg.sum(proband_gt.n_alt_alleles())
an_child_expr = hl.agg.sum(hl.is_defined(proband_gt) * 2)
return {
f"ac_parents": ac_parent_expr,
f"an_parents": an_parent_expr,
f"ac_children": ac_child_expr,
f"an_children": an_child_expr,
}
def _ac_an_parent_child_count(
proband_gt: hl.expr.CallExpression,
father_gt: hl.expr.CallExpression,
mother_gt: hl.expr.CallExpression,
) -> Dict[str, hl.expr.Int64Expression]:
"""
Helper method to get AC and AN for parents and children
"""
ac_parent_expr = hl.agg.sum(
father_gt.n_alt_alleles() + mother_gt.n_alt_alleles()
)
an_parent_expr = hl.agg.sum(
(hl.is_defined(father_gt) + hl.is_defined(mother_gt)) * 2
)
ac_child_expr = hl.agg.sum(proband_gt.n_alt_alleles())
an_child_expr = hl.agg.sum(hl.is_defined(proband_gt) * 2)
return {
f"ac_parents": ac_parent_expr,
f"an_parents": an_parent_expr,
f"ac_children": ac_child_expr,
f"an_children": an_child_expr,
}
lambda x, y: hl.bind(
lambda a:
(a.n * a.xy - a.x * a.y) /
hl.sqrt((a.n * a.xsq - a.x ** 2) *
(a.n * a.ysq - a.y ** 2)),
hl.agg.filter(hl.is_defined(x) & hl.is_defined(y),
hl.struct(x=hl.agg.sum(x),
y=hl.agg.sum(y),
xsq=hl.agg.sum(x ** 2),
ysq=hl.agg.sum(y ** 2),
xy=hl.agg.sum(x * y),
n=hl.agg.count()))),
x, y, _ctx=_agg_func.context)
def _ac_an_parent_child_count(
proband_gt: hl.expr.CallExpression,
father_gt: hl.expr.CallExpression,
mother_gt: hl.expr.CallExpression,
) -> Dict[str, hl.expr.Int64Expression]:
"""
Helper method to get AC and AN for parents and children
"""
ac_parent_expr = hl.agg.sum(
father_gt.n_alt_alleles() + mother_gt.n_alt_alleles()
)
an_parent_expr = hl.agg.sum(
(hl.is_defined(father_gt) + hl.is_defined(mother_gt)) * 2
)
ac_child_expr = hl.agg.sum(proband_gt.n_alt_alleles())
an_child_expr = hl.agg.sum(hl.is_defined(proband_gt) * 2)
return {
f"ac_parents": ac_parent_expr,
f"an_parents": an_parent_expr,
f"ac_children": ac_child_expr,
f"an_children": an_child_expr,
}
# Create aggregators
agg_expr = {}
agg_expr.update(
{
f"{prefix}{k}": hl.agg.approx_quantiles(expr, 0.5)
for k, expr in median_agg_fields.items()
}
)
agg_expr.update(
{f"{prefix}{k}": hl.agg.sum(expr) for k, expr in sum_agg_fields.items()}
)
agg_expr.update(
{
f"{prefix}{k}": hl.int32(hl.agg.sum(expr))
for k, expr in int32_sum_agg_fields.items()
}
)
agg_expr.update(
{
f"{prefix}{k}": hl.agg.array_agg(lambda x: hl.agg.sum(x), expr)
for k, expr in array_sum_agg_fields.items()
}
)
# Handle annotations combinations and casting for specific annotations
# If RAW_MQandDP is in agg_expr or if both MQ_DP and RAW_MQ are, compute MQ instead
mq_tuple = None
if f"{prefix}RAW_MQandDP" in agg_expr:
logger.info(
def get_chr_dp_ann(chrom: str) -> hl.Table:
contig_size = get_contig_size(chrom)
chr_mt = hl.filter_intervals(mt, [hl.parse_locus_interval(chrom)])
if chrom in ref.x_contigs:
chr_mt = chr_mt.filter_rows(chr_mt.locus.in_x_nonpar())
if chrom in ref.y_contigs:
chr_mt = chr_mt.filter_rows(chr_mt.locus.in_y_nonpar())
return chr_mt.select_cols(
**{
f"{chrom}_mean_dp": hl.agg.sum(
hl.cond(
chr_mt.LGT.is_hom_ref(),
chr_mt.DP * (chr_mt.END - chr_mt.locus.position),
chr_mt.DP,
)
)
/ contig_size
}
ac_child_expr = hl.agg.sum(proband_gt.n_alt_alleles())
an_child_expr = hl.agg.sum(hl.is_defined(proband_gt) * 2)
return {
f"ac_parents": ac_parent_expr,
f"an_parents": an_parent_expr,
f"ac_children": ac_child_expr,
f"an_children": an_child_expr,
}
# Create transmission counters
trio_stats = hl.struct(
**{
f"{name2}_{name}": hl.agg.filter(
trio_mt.proband_entry.GT.is_non_ref() & expr,
hl.agg.sum(
trans_count_map.get(
(
trio_mt.proband_entry.GT.n_alt_alleles(),
trio_mt.father_entry.GT.n_alt_alleles(),
trio_mt.mother_entry.GT.n_alt_alleles(),
_get_copy_state(trio_mt.locus),
),
default=(0, 0),
)[i]
),
)
for name, expr in transmitted_strata.items()
for i, name2 in enumerate(["n_transmitted", "n_untransmitted"])
}
)
median_agg_fields = _agg_list_to_dict(mt, median_agg_fields)
if isinstance(array_sum_agg_fields, list):
array_sum_agg_fields = _agg_list_to_dict(mt, array_sum_agg_fields)
# Create aggregators
agg_expr = {}
agg_expr.update(
{
f"{prefix}{k}": hl.agg.approx_quantiles(expr, 0.5)
for k, expr in median_agg_fields.items()
}
)
agg_expr.update(
{f"{prefix}{k}": hl.agg.sum(expr) for k, expr in sum_agg_fields.items()}
)
agg_expr.update(
{
f"{prefix}{k}": hl.int32(hl.agg.sum(expr))
for k, expr in int32_sum_agg_fields.items()
}
)
agg_expr.update(
{
f"{prefix}{k}": hl.agg.array_agg(lambda x: hl.agg.sum(x), expr)
for k, expr in array_sum_agg_fields.items()
}
)
# Handle annotations combinations and casting for specific annotations