Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_floor_division(self):
a_int32 = hl.array([2, 4, 8, 16, hl.null(tint32)])
a_int64 = a_int32.map(lambda x: hl.int64(x))
a_float32 = a_int32.map(lambda x: hl.float32(x))
a_float64 = a_int32.map(lambda x: hl.float64(x))
int32_4s = hl.array([4, 4, 4, 4, hl.null(tint32)])
int32_3s = hl.array([3, 3, 3, 3, hl.null(tint32)])
int64_3 = hl.int64(3)
int64_3s = int32_3s.map(lambda x: hl.int64(x))
float32_3 = hl.float32(3)
float32_3s = int32_3s.map(lambda x: hl.float32(x))
float64_3 = hl.float64(3)
float64_3s = int32_3s.map(lambda x: hl.float64(x))
expected = [0, 1, 2, 5, None]
expected_inv = [1, 0, 0, 0, None]
self.check_expr(a_int32 // 3, expected, tarray(tint32))
def test_floor_division(self):
a_int32 = hl.array([2, 4, 8, 16, hl.null(tint32)])
a_int64 = a_int32.map(lambda x: hl.int64(x))
a_float32 = a_int32.map(lambda x: hl.float32(x))
a_float64 = a_int32.map(lambda x: hl.float64(x))
int32_4s = hl.array([4, 4, 4, 4, hl.null(tint32)])
int32_3s = hl.array([3, 3, 3, 3, hl.null(tint32)])
int64_3 = hl.int64(3)
int64_3s = int32_3s.map(lambda x: hl.int64(x))
float32_3 = hl.float32(3)
float32_3s = int32_3s.map(lambda x: hl.float32(x))
float64_3 = hl.float64(3)
float64_3s = int32_3s.map(lambda x: hl.float64(x))
expected = [0, 1, 2, 5, None]
expected_inv = [1, 0, 0, 0, None]
self.check_expr(a_int32 // 3, expected, tarray(tint32))
self.check_expr(a_int64 // 3, expected, tarray(tint64))
self.check_expr(a_float32 // 3, expected, tarray(tfloat32))
self.check_expr(a_float64 // 3, expected, tarray(tfloat64))
self.check_expr(3 // a_int32, expected_inv, tarray(tint32))
ht = ht.select()
ht = ht.annotate(x=hl.scan.count())
ht = ht.annotate(y=ht.x + 1)
ht = ht.filter(ht.x // n != ht.y // n)
ht = ht.select()
ht = ht.annotate(start=hl.or_else(
hl.scan._prev_nonnull(hl.locus_from_global_position(ht.locus.global_position() + 1,
reference_genome=reference_genome)),
hl.locus_from_global_position(0, reference_genome=reference_genome)))
ht = ht.key_by()
ht = ht.select(interval=hl.interval(start=ht.start, end=ht.locus, includes_end=True))
intervals = ht.aggregate(hl.agg.collect(ht.interval))
last_st = hl.eval(
hl.locus_from_global_position(hl.literal(intervals[-1].end).global_position() + 1,
reference_genome=reference_genome))
interval = hl.Interval(start=last_st, end=end, includes_end=True)
intervals.append(interval)
return intervals
end = hl.Locus(reference_genome.contigs[-1],
reference_genome.lengths[reference_genome.contigs[-1]])
ht = ht.select()
ht = ht.annotate(x=hl.scan.count())
ht = ht.annotate(y=ht.x + 1)
ht = ht.filter(ht.x // n != ht.y // n)
ht = ht.select()
ht = ht.annotate(start=hl.or_else(
hl.scan._prev_nonnull(hl.locus_from_global_position(ht.locus.global_position() + 1,
reference_genome=reference_genome)),
hl.locus_from_global_position(0, reference_genome=reference_genome)))
ht = ht.key_by()
ht = ht.select(interval=hl.interval(start=ht.start, end=ht.locus, includes_end=True))
intervals = ht.aggregate(hl.agg.collect(ht.interval))
last_st = hl.eval(
hl.locus_from_global_position(hl.literal(intervals[-1].end).global_position() + 1,
reference_genome=reference_genome))
interval = hl.Interval(start=last_st, end=end, includes_end=True)
intervals.append(interval)
return intervals
def make_case(x):
x = hl.literal(x)
return (hl.case()
.when(x == 6, 'A')
.when(x % 3 == 0, 'B')
.when(x == 5, 'C')
.when(x < 2, 'D')
.or_missing())
import hail as hl
GOLD_STD = 'gs://hail-common/vep/vep/vep_examplars/vep_no_csq_4dc19bc1b.mt/'
GOLD_STD_CSQ = 'gs://hail-common/vep/vep/vep_examplars/vep_csq_4dc19bc1b.mt/'
for path, csq in [(GOLD_STD, False), (GOLD_STD_CSQ, True)]:
print(f"Checking 'hl.vep' replicates on '{path}'")
expected = hl.read_matrix_table(path)
actual = hl.vep(expected.rows().select(), 'gs://hail-common/vep/vep/vep85-loftee-gcloud-testing.json', csq=csq)
actual._force_count()
# vep_result_agrees = actual._same(expected)
from hail.expr.expression import ExpressionException
mt = self.get_groupable_matrix()
self.assertRaises(ExpressionException, mt.group_rows_by, mt['group1'] + 1)
self.assertRaises(ExpressionException, mt.group_cols_by, mt['group1'])
self.assertRaises(ExpressionException, mt.group_cols_by, mt['group3'] + 1)
self.assertRaises(ExpressionException, mt.group_rows_by, mt['group3'])
self.assertRaises(ExpressionException, mt.group_rows_by, group3=mt['group1'])
self.assertRaises(ExpressionException, mt.group_cols_by, group1=mt['group3'])
self.assertRaises(ExpressionException, mt.group_rows_by, foo=mt['group1'])
self.assertRaises(ExpressionException, mt.group_cols_by, foo=mt['group3'])
a = mt.group_rows_by(group5=(mt['group2']['a'] + 1))
self.assertRaises(ExpressionException, a.aggregate, group3=hl.agg.sum(mt['c']))
self.assertRaises(ExpressionException, a.aggregate, group5=hl.agg.sum(mt['c']))
self.assertRaises(ExpressionException, a.aggregate, foo=hl.agg.sum(mt['c']))
b = mt.group_cols_by(group5=(mt['group4']['a'] + 1))
self.assertRaises(ExpressionException, b.aggregate, group1=hl.agg.sum(mt['c']))
self.assertRaises(ExpressionException, b.aggregate, group5=hl.agg.sum(mt['c']))
self.assertRaises(ExpressionException, b.aggregate, foo=hl.agg.sum(mt['c']))
bound_exprs = {}
gq_dp_exprs = {}
def has_field_of_type(name, dtype):
return name in mt.entry and mt[name].dtype == dtype
if has_field_of_type('DP', hl.tint32):
gq_dp_exprs['dp_stats'] = hl.agg.stats(mt.DP).select('mean', 'stdev', 'min', 'max')
if has_field_of_type('GQ', hl.tint32):
gq_dp_exprs['gq_stats'] = hl.agg.stats(mt.GQ).select('mean', 'stdev', 'min', 'max')
if not has_field_of_type('GT', hl.tcall):
raise ValueError(f"'variant_qc': expect an entry field 'GT' of type 'call'")
bound_exprs['n_called'] = hl.agg.count_where(hl.is_defined(mt['GT']))
bound_exprs['n_not_called'] = hl.agg.count_where(hl.is_missing(mt['GT']))
bound_exprs['n_filtered'] = mt.count_cols(_localize=False) - hl.agg.count()
bound_exprs['call_stats'] = hl.agg.call_stats(mt.GT, mt.alleles)
result = hl.rbind(hl.struct(**bound_exprs),
lambda e1: hl.rbind(
hl.case().when(hl.len(mt.alleles) == 2,
hl.hardy_weinberg_test(e1.call_stats.homozygote_count[0],
e1.call_stats.AC[1] - 2 *
e1.call_stats.homozygote_count[1],
e1.call_stats.homozygote_count[1])
).or_missing(),
lambda hwe: hl.struct(**{
**gq_dp_exprs,
**e1.call_stats,
'call_rate': hl.float(e1.n_called) / (e1.n_called + e1.n_not_called + e1.n_filtered),
),
),
exon=annotation.EXON,
gene_id=annotation.Gene,
gene_symbol=annotation.SYMBOL,
gene_symbol_source=annotation.SYMBOL_SOURCE,
hgnc_id=annotation.HGNC_ID,
hgvsc=annotation.HGVSc,
hgvsp=annotation.HGVSp,
lof=annotation.LoF,
lof_filter=annotation.LoF_filter,
lof_flags=annotation.LoF_flags,
lof_info=annotation.LoF_info,
# PolyPhen field contains "polyphen_prediction(polyphen_score)"
polyphen_prediction=hl.or_missing(
hl.is_defined(annotation.PolyPhen), annotation.PolyPhen.split("\\(")[0]
),
protein_id=annotation.ENSP,
# Protein_position may contain either "start-end" or, when start == end, "start"
protein_start=split_position_start(annotation.Protein_position),
protein_end=split_position_end(annotation.Protein_position),
# SIFT field contains "sift_prediction(sift_score)"
sift_prediction=hl.or_missing(hl.is_defined(annotation.SIFT), annotation.SIFT.split("\\(")[0]),
transcript_id=annotation.Feature,
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("input")
parser.add_argument("output")
args = parser.parse_args()
hl.init(log="/tmp/hail.log")
ds = hl.read_table(args.input)
ds = format_variants_table(ds)
ds.describe()
ds.write(args.output)