Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def vep_config_path(ref: str = "GRCh37"):
if ref not in VEP_REFERENCE_DATA.keys():
raise DataException(
"Select reference as one of: {}".format(",".join(VEP_REFERENCE_DATA.keys()))
)
return VEP_REFERENCE_DATA[ref]["vep_config"]
def vep_context_ht_path(ref: str = "GRCh37"):
if ref not in VEP_REFERENCE_DATA.keys():
raise DataException(
"Select reference as one of: {}".format(",".join(VEP_REFERENCE_DATA.keys()))
)
return VEP_REFERENCE_DATA[ref]["all_possible"]
def liftover(data_type: str) -> VersionedTableResource:
"""
Get the 38 liftover of gnomad v2.1.1
:param data_type: One of "exomes" or "genomes"
:return: Release Table
"""
if data_type not in DATA_TYPES:
raise DataException(f"{data_type} not in {DATA_TYPES}")
if data_type == "exomes":
current_release = CURRENT_EXOME_RELEASE
releases = EXOME_RELEASES
releases.remove("2.1")
else:
current_release = CURRENT_GENOME_RELEASE
releases = GENOME_RELEASES
return VersionedTableResource(
current_release,
{
release: TableResource(path=_liftover_data_path(data_type, release))
for release in releases
},
def coverage(data_type: str) -> VersionedTableResource:
"""
Retrieves gnomAD's coverage table by data_type
:param data_type: One of "exomes" or "genomes"
:return: Coverage Table
"""
if data_type not in DATA_TYPES:
raise DataException(
f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}"
)
if data_type == "exomes":
current_release = CURRENT_EXOME_RELEASE
releases = EXOME_RELEASES
else:
current_release = CURRENT_GENOME_RELEASE
releases = GENOME_RELEASES
return VersionedTableResource(
current_release,
{
release: TableResource(path=_public_coverage_ht_path(data_type, release))
for release in releases
},
def coverage(data_type: str) -> VersionedTableResource:
"""
Retrieves gnomAD's coverage table by data_type
:param data_type: One of "exomes" or "genomes"
:return: Coverage Table
"""
if data_type not in DATA_TYPES:
raise DataException(f"{data_type} not in {DATA_TYPES}")
if data_type == "exomes":
current_release = "2.1"
releases = EXOME_RELEASES
releases.remove("2.1.1")
else:
current_release = CURRENT_GENOME_RELEASE
releases = GENOME_RELEASES
return VersionedTableResource(
current_release,
{
release: TableResource(path=_public_coverage_ht_path(data_type, release))
for release in releases
},
raise DataException(
f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}"
)
if data_type == "exomes":
if version is None:
version = CURRENT_EXOME_RELEASE
elif version not in EXOME_RELEASES:
raise DataException(
f"Version {version} of gnomAD exomes for GRCh38 does not exist"
)
else:
if version is None:
version = CURRENT_GENOME_RELEASE
elif version not in GENOME_RELEASES:
raise DataException(
f"Version {version} of gnomAD genomes for GRCh38 does not exist"
)
return f"gs://gnomad-public/release/{version}/coverage/{data_type}/gnomad.{data_type}.r{version}.coverage.summary.tsv.bgz"
def public_release(data_type: str) -> VersionedTableResource:
"""
Retrieves publicly released versioned table resource
:param data_type: One of "exomes" or "genomes"
:return: Release Table
"""
if data_type not in DATA_TYPES:
raise DataException(
f"{data_type} not in {DATA_TYPES}, please select a data type from {DATA_TYPES}"
)
if data_type == "exomes":
current_release = CURRENT_EXOME_RELEASE
releases = EXOME_RELEASES
else:
current_release = CURRENT_GENOME_RELEASE
releases = GENOME_RELEASES
return VersionedTableResource(
current_release,
{
release: TableResource(path=_public_release_ht_path(data_type, release))
for release in releases
},
:param sample_path: Path to a file with list of samples
:param header: Whether file with samples has a header. Default is True
:param table_key: Key to sample Table. Default is "s"
:param sparse: Whether the MatrixTable is sparse. Default is False
:param gt_expr: Name of field in MatrixTable containing genotype expression. Default is "GT"
:return: MatrixTable subsetted to specified samples and their variants
"""
sample_ht = hl.import_table(sample_path, no_header=not header, key=table_key)
sample_count = sample_ht.count()
missing_ht = sample_ht.anti_join(mt.cols())
missing_ht_count = missing_ht.count()
full_count = mt.count_cols()
if missing_ht_count != 0:
missing_samples = missing_ht.s.collect()
raise DataException(
f"Only {sample_count - missing_ht_count} out of {sample_count} "
"subsetting-table IDs matched IDs in the MT.\n"
f"IDs that aren't in the MT: {missing_samples}\n"
)
mt = mt.semi_join_cols(sample_ht)
if sparse:
mt = mt.filter_rows(
hl.agg.any(mt[gt_expr].is_non_ref() | hl.is_defined(mt.END))
)
else:
mt = mt.filter_rows(hl.agg.any(mt[gt_expr].is_non_ref()))
logger.info(
f"Finished subsetting samples. Kept {mt.count_cols()} "
f"out of {full_count} samples in MT"
def public_release(data_type: str) -> VersionedTableResource:
"""
Retrieves publicly released versioned table resource
:param data_type: One of "exomes" or "genomes"
:return: Release Table
"""
if data_type not in DATA_TYPES:
raise DataException(f"{data_type} not in {DATA_TYPES}")
if data_type == "exomes":
current_release = CURRENT_EXOME_RELEASE
releases = EXOME_RELEASES
else:
current_release = CURRENT_GENOME_RELEASE
releases = GENOME_RELEASES
return VersionedTableResource(
current_release,
{
release: TableResource(path=_public_release_ht_path(data_type, release))
for release in releases
},