Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
- `n_trans_singletons` - count of transmitted singletons
- `n_untrans_singletons` - count of untransmitted singletons
- `n_omni` - count of omni truth variants
- `n_mills` - count of mills truth variants
- `n_hapmap` - count of hapmap truth variants
- `n_kgp_phase1_hc` - count of 1000 genomes phase 1 high confidence truth variants
:param ht: Table that aggregation will be performed on
:param fam_stats_ht: Path to family statistics HT
:return: a dictionary containing aggregations to perform on ht
"""
# Annotate binned table with the evaluation data
ht = ht._parent
indel_length = hl.abs(ht.alleles[0].length() - ht.alleles[1].length())
# Load external evaluation data
build = get_reference_genome(ht.locus).name
clinvar = (
grch37_resources.reference_data.clinvar
if build == "GRCh37"
else grch38_resources.reference_data.clinvar
).ht()[ht.key]
truth_data = (
grch37_resources.reference_data.get_truth_ht()
if build == "GRCh37"
else grch38_resources.reference_data.get_truth_ht()
)[ht.key]
fam = fam_stats_ht[ht.key]
return dict(
min_score=hl.agg.min(ht.score),
max_score=hl.agg.max(ht.score),
n=hl.agg.count(),
:param gt_expr: Name of entry field storing the genotype. Default: 'GT'
:param f_stat_cutoff: f-stat to roughly divide 'XX' from 'XY' samples. Assumes XX samples are below cutoff and XY are above cutoff.
:param float aaf_threshold: Minimum alternate allele frequency to be used in f-stat calculations.
:return: Table of samples and their imputed sex karyotypes.
"""
logger.info("Imputing sex chromosome ploidies...")
if is_sparse:
ploidy_ht = impute_sex_ploidy(
mt, excluded_intervals, included_intervals, normalization_contig
)
else:
raise NotImplementedError(
"Imputing sex ploidy does not exist yet for dense data."
)
x_contigs = get_reference_genome(mt.locus).x_contigs
logger.info(f"Filtering mt to biallelic SNPs in X contigs: {x_contigs}")
if "was_split" in list(mt.row):
mt = mt.filter_rows((~mt.was_split) & hl.is_snp(mt.alleles[0], mt.alleles[1]))
else:
mt = mt.filter_rows(
(hl.len(mt.alleles) == 2) & hl.is_snp(mt.alleles[0], mt.alleles[1])
)
mt = hl.filter_intervals(
mt, [hl.parse_locus_interval(contig) for contig in x_contigs]
)
if sites_ht is not None:
if aaf_expr == None:
logger.warning(
"sites_ht was provided, but aaf_expr is missing. Assuming name of field with alternate allele frequency is 'AF'."
)
def filter_to_autosomes(
t: Union[hl.MatrixTable, hl.Table]
) -> Union[hl.MatrixTable, hl.Table]:
"""
Filters the Table or MatrixTable to autosomes only.
This assumes that the input contains a field named `locus` of type Locus
:param t: Input MT/HT
:return: MT/HT autosomes
"""
reference = get_reference_genome(t.locus)
autosomes = hl.parse_locus_interval(
f"{reference.contigs[0]}-{reference.contigs[21]}", reference_genome=reference
)
return hl.filter_intervals(t, [autosomes])
the coverage of an autosomal chromosome (by default chr20).
Coverage is computed using the median block coverage (summed over the block size) and the non-ref coverage at non-ref genotypes.
:param mt: Input sparse Matrix Table
:param excluded_calling_intervals: Optional table of intervals to exclude from the computation.
Used only when determining contig size (not used when computing chromosome depth).
:param included_calling_intervals: Optional table of intervals to use in the computation.
Used only when determining contig size (not used when computing chromosome depth).
:param normalization_contig: Which chromosome to normalize by
:param chr_x: Optional X Chromosome contig name (by default uses the X contig in the reference)
:param chr_y: Optional Y Chromosome contig name (by default uses the Y contig in the reference)
:return: Table with mean coverage over chromosomes 20, X and Y and sex chromosomes ploidy based on normalized coverage.
"""
ref = get_reference_genome(mt.locus, add_sequence=True)
if chr_x is None:
if len(ref.x_contigs) != 1:
raise NotImplementedError(
"Found {0} X chromosome contigs ({1}) in Genome reference. sparse_impute_sex_ploidy currently only supports a single X chromosome contig. Please use the `chr_x` argument to specify which X chromosome contig to use ".format(
len(ref.x_contigs), ",".join(ref.x_contigs)
)
)
chr_x = ref.x_contigs[0]
if chr_y is None:
if len(ref.y_contigs) != 1:
raise NotImplementedError(
"Found {0} Y chromosome contigs ({1}) in Genome reference. sparse_impute_sex_ploidy currently only supports a single Y chromosome contig. Please use the `chr_y` argument to specify which Y chromosome contig to use ".format(
len(ref.y_contigs), ",".join(ref.y_contigs)
)
)
chr_y = ref.y_contigs[0]
filter_segdup: bool = True,
filter_exome_low_coverage_regions: bool = False,
high_conf_regions: Optional[List[str]] = None,
) -> Union[hl.MatrixTable, hl.Table]:
"""
Filters low-confidence regions
:param mt: MatrixTable or Table to filter
:param filter_lcr: Whether to filter LCR regions
:param filter_decoy: Whether to filter decoy regions
:param filter_segdup: Whether to filter Segdup regions
:param filter_exome_low_coverage_regions: Whether to filter exome low confidence regions
:param high_conf_regions: Paths to set of high confidence regions to restrict to (union of regions)
:return: MatrixTable or Table with low confidence regions removed
"""
build = get_reference_genome(mt.locus).name
if build == "GRCh37":
import gnomad.resources.grch37.reference_data as resources
elif build == "GRCh38":
import gnomad.resources.grch38.reference_data as resources
criteria = []
if filter_lcr:
lcr = resources.lcr_intervals.ht()
criteria.append(hl.is_missing(lcr[mt.locus]))
if filter_decoy:
decoy = resources.decoy_intervals.ht()
criteria.append(hl.is_missing(decoy[mt.locus]))
if filter_segdup:
segdup = resources.seg_dup_intervals.ht()