Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
ht = ht.select()
ht = ht.annotate(x=hl.scan.count())
ht = ht.annotate(y=ht.x + 1)
ht = ht.filter(ht.x // n != ht.y // n)
ht = ht.select()
ht = ht.annotate(start=hl.or_else(
hl.scan._prev_nonnull(hl.locus_from_global_position(ht.locus.global_position() + 1,
reference_genome=reference_genome)),
hl.locus_from_global_position(0, reference_genome=reference_genome)))
ht = ht.key_by()
ht = ht.select(interval=hl.interval(start=ht.start, end=ht.locus, includes_end=True))
intervals = ht.aggregate(hl.agg.collect(ht.interval))
last_st = hl.eval(
hl.locus_from_global_position(hl.literal(intervals[-1].end).global_position() + 1,
reference_genome=reference_genome))
interval = hl.Interval(start=last_st, end=end, includes_end=True)
intervals.append(interval)
return intervals
:class:`.Table`
Interval-keyed table.
"""
# UCSC BED spec defined here: https://genome.ucsc.edu/FAQ/FAQformat.html#format1
t = import_table(path, no_header=True, delimiter=r"\s+", impute=False,
skip_blank_lines=True, types={'f0': tstr, 'f1': tint32,
'f2': tint32, 'f3': tstr,
'f4': tstr},
comment=["""^browser.*""", """^track.*""",
r"""^\w+=("[\w\d ]+"|\d+).*"""],
**kwargs)
if contig_recoding is not None:
contig_recoding = hl.literal(contig_recoding)
def recode_contig(x):
if contig_recoding is None:
return x
return contig_recoding.get(x, x)
if t.row.dtype == tstruct(f0=tstr, f1=tint32, f2=tint32):
t = t.select(interval=locus_interval_expr(recode_contig(t['f0']),
t['f1'] + 1,
t['f2'] + 1,
True,
False,
reference_genome,
skip_invalid_intervals))
elif len(t.row) >= 4 and tstruct(**dict([(n, typ) for n, typ in t.row.dtype._field_types.items()][:4])) == tstruct(f0=tstr, f1=tint32, f2=tint32, f3=tstr):
x_range = (ranges.x_stats.min, ranges.x_stats.max)
if y_range is None:
y_range = (ranges.y_stats.min, ranges.y_stats.max)
else:
warnings.warn('If x_range or y_range are specified in histogram_2d, and there are points '
'outside of these ranges, they will not be plotted')
x_range = list(map(float, x_range))
y_range = list(map(float, y_range))
x_spacing = (x_range[1] - x_range[0]) / x_bins
y_spacing = (y_range[1] - y_range[0]) / y_bins
def frange(start, stop, step):
from itertools import count, takewhile
return takewhile(lambda x: x <= stop, count(start, step))
x_levels = hail.literal(list(frange(x_range[0], x_range[1], x_spacing))[::-1])
y_levels = hail.literal(list(frange(y_range[0], y_range[1], y_spacing))[::-1])
grouped_ht = source.group_by(
x=hail.str(x_levels.find(lambda w: x >= w)),
y=hail.str(y_levels.find(lambda w: y >= w))
).aggregate(c=hail.agg.count())
data = grouped_ht.filter(hail.is_defined(grouped_ht.x) & (grouped_ht.x != str(x_range[1])) &
hail.is_defined(grouped_ht.y) & (grouped_ht.y != str(y_range[1]))).to_pandas()
# Use python prettier float -> str function
data['x'] = data['x'].apply(lambda e: str(float(e)))
data['y'] = data['y'].apply(lambda e: str(float(e)))
if log:
mapper = LogColorMapper(palette=colors, low=data.c.min(), high=data.c.max())
else:
>>> hl.eval(hl.is_strand_ambiguous('A', 'T'))
True
Parameters
----------
ref : :class:`.StringExpression`
Reference allele.
alt : :class:`.StringExpression`
Alternate allele.
Returns
-------
:class:`.BooleanExpression`
"""
alleles = hl.literal({('A', 'T'), ('T', 'A'), ('G', 'C'), ('C', 'G')})
return alleles.contains((ref, alt))
True
>>> interval2.contains(6)
False
Parameters
----------
value :
Object with type :meth:`.point_type`.
Returns
-------
:obj:`bool`
"""
return hl.eval(hl.literal(self, hl.tinterval(self._point_type)).contains(value))
if cov_matrix != None:
n_phens = cov_matrix.shape[0]
else:
n_phens = len(h2)
if rg == [None]:
print(f'Assuming rg=0 for all {n_phens} traits')
rg = [0]*int((n_phens**2-n_phens)/2)
assert (all(x >= -1 and x<= 1 for x in rg)), 'rg values must be between 0 and 1'
cov, rg = get_cov_matrix(h2, rg)
cov = (1/M)*cov
randstate = np.random.RandomState(int(seed)) #seed random state for replicability
betas = randstate.multivariate_normal(mean=np.zeros(n_phens),cov=cov,size=[M,])
df = pd.DataFrame([0]*M,columns=['beta'])
tb = hl.Table.from_pandas(df)
tb = tb.add_index().key_by('idx')
tb = tb.annotate(beta = hl.literal(betas.tolist())[hl.int32(tb.idx)])
mt = mt.add_row_index(name='row_idx'+tid)
mt = mt.annotate_rows(beta = tb[mt['row_idx'+tid]]['beta'])
mt = _clean_fields(mt, tid)
return mt, rg
cov_matrix = np.asarray([[1/(ptt+ptf), rg/ptt],[rg/ptt,1/(ptt+pft)]])
pff0, pff = pff, 1-sum([ptt, ptf, pft])
print(f'rg: {rg0} -> {rg}\nptt: {ptt0} -> {ptt}\npff: {pff0} -> {pff}')
pi = [ptt, ptf, pft, pff]
beta = randstate.multivariate_normal(mean=np.zeros(2),cov=cov_matrix,size=[int(M),])
zeros = np.zeros(shape=int(M)).T
beta_matrix = np.stack((beta, np.asarray([beta[:,0],zeros]).T,
np.asarray([zeros,zeros]).T,np.asarray([zeros,beta[:,1]]).T),axis=1)
idx = np.random.choice([0,1,2,3],p=pi,size=int(M))
betas = beta_matrix[range(int(M)),idx,:]
betas[:,0] *= (h2[0]/M)**(1/2)
betas[:,1] *= (h2[1]/M)**(1/2)
df = pd.DataFrame([0]*M,columns=['beta'])
tb = hl.Table.from_pandas(df)
tb = tb.add_index().key_by('idx')
tb = tb.annotate(beta = hl.literal(betas.tolist())[hl.int32(tb.idx)])
mt = mt.add_row_index()
mt = mt.annotate_rows(beta = tb[mt.row_idx]['beta'])
return mt, pi, rg
[0.9090909090082644, 0.09090909090082644, 9.090909090082645e-11]
Notes
-----
This function assumes a uniform prior on the possible genotypes.
Parameters
----------
pl : :class:`.ArrayNumericExpression` of type :py:data:`.tint32`
Array of Phred-scaled genotype likelihoods.
Returns
-------
:class:`.ArrayNumericExpression` of type :py:data:`.tfloat64`
"""
phred_table = hl.literal([10 ** (-x/10.0) for x in builtins.range(_cache_size)])
gp = hl.bind(lambda pls: pls.map(lambda x: hl.cond(x >= _cache_size, 10 ** (-x/10.0), phred_table[x])), pl)
return hl.bind(lambda gp: gp / hl.sum(gp), gp)
def filter_clinvar_to_gene_list(mt_kt, genes, gene_column_in_mt_kt):
gene_names = hl.literal(genes)
mt_kt = mt_kt.annotate(
in_gene_of_interest=gene_names.find(lambda x: mt_kt[gene_column_in_mt_kt] == x))
mt_kt = mt_kt.filter(mt_kt.in_gene_of_interest != "NA")
return mt_kt
Returns
-------
:class:`.MatrixTable`
:class:`.MatrixTable` with simulated phenotype as column field.
"""
h2 = h2.tolist() if type(h2) is np.ndarray else ([h2] if type(h2) is not list else h2)
assert popstrat_var is None or (popstrat_var >= 0), 'popstrat_var must be non-negative'
tid = ''.join(random.choices(string.ascii_uppercase + string.ascii_lowercase, k=5)) # "temporary id" -- random string to identify temporary intermediate fields generated by this method
mt = annotate_all(mt=mt,
row_exprs={'beta_'+tid:beta},
col_exprs={} if popstrat is None else {'popstrat_'+tid:popstrat},
entry_exprs={'gt_'+tid:genotype.n_alt_alleles() if genotype.dtype is dtype('call') else genotype})
mt = normalize_genotypes(mt['gt_'+tid])
if mt['beta_'+tid].dtype == dtype('array'): #if >1 traits
mt = mt.annotate_cols(y_no_noise = hl.agg.array_agg(lambda beta: hl.agg.sum(beta*mt['norm_gt']),mt['beta_'+tid]))
mt = mt.annotate_cols(y = mt.y_no_noise + hl.literal(h2).map(lambda x: hl.rand_norm(0,hl.sqrt(1-x))))
else:
mt = mt.annotate_cols(y_no_noise = hl.agg.sum(mt['beta_'+tid] * mt['norm_gt']))
mt = mt.annotate_cols(y = mt.y_no_noise + hl.rand_norm(0, hl.sqrt(1-h2[0])))
if popstrat is not None:
var_factor = 1 if popstrat_var is None else (popstrat_var**(1/2))/mt.aggregate_cols(hl.agg.stats(mt['popstrat_'+tid])).stdev
mt = mt.rename({'y':'y_no_popstrat'})
mt = mt.annotate_cols(y = mt.y_no_popstrat + mt['popstrat_'+tid]*var_factor)
mt = _clean_fields(mt, tid)
return mt