Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
assert normalize_chromosome("chrmt") == "chrMT"
with assert_raises(TypeError):
normalize_chromosome({"a": "b"})
with assert_raises(TypeError):
normalize_chromosome([])
with assert_raises(TypeError):
normalize_chromosome(None)
with assert_raises(ValueError):
normalize_chromosome("")
with assert_raises(ValueError):
normalize_chromosome(0)
def test_normalize_chromosome():
assert normalize_chromosome("X") == "X"
assert normalize_chromosome("chrX") == "chrX"
assert normalize_chromosome("x") == "X"
assert normalize_chromosome("chrx") == "chrX"
assert normalize_chromosome(1) == "1"
assert normalize_chromosome("1") == "1"
assert normalize_chromosome("chr1") == "chr1"
assert normalize_chromosome("chrM") == "chrM"
assert normalize_chromosome("chrMT") == "chrMT"
assert normalize_chromosome("M") == "M"
assert normalize_chromosome("MT") == "MT"
assert normalize_chromosome("m") == "M"
assert normalize_chromosome("chrm") == "chrM"
assert normalize_chromosome("mt") == "MT"
assert normalize_chromosome("chrmt") == "chrMT"
with assert_raises(TypeError):
normalize_chromosome({"a": "b"})
with assert_raises(TypeError):
normalize_chromosome([])
def dataframe(
self,
contig=None,
feature=None,
strand=None,
save_to_disk=False):
"""
Load genome entries as a DataFrame, optionally restricted to
particular contig or feature type.
"""
if contig:
contig = normalize_chromosome(contig)
if strand:
strand = normalize_strand(strand)
if feature is not None:
require_string(feature, "feature")
key = (contig, feature, strand)
if key not in self._dataframes:
def _construct_df():
full_df = self._load_full_dataframe_cached()
assert len(full_df) > 0, \
"Dataframe representation of genomic database empty!"
def on_contig(self, contig):
return normalize_chromosome(contig) == self.contig
contig,
position,
end=None,
strand=None,
distinct=False,
sorted=False):
"""
Get the non-null values of a column from the database
at a particular range of loci
"""
# TODO: combine with the query method, since they overlap
# significantly
require_string(column_name, "column_name", nonempty=True)
contig = normalize_chromosome(contig)
require_integer(position, "position")
if end is None:
end = position
require_integer(end, "end")
if not self.column_exists(feature, column_name):
raise ValueError("Table %s doesn't have column %s" % (
feature, column_name,))
if distinct:
distinct_string = "DISTINCT "
else:
distinct_string = ""
feature : str, optional
Path for subset of data restrict to given feature
column : str, optional
Restrict to single column
strand : str, optional
Positive ("+") or negative ("-") DNA strand. Default = either.
distinct : bool, optional
Only keep unique values (default=False)
"""
csv_filename = self.gtf_base_filename + ".expanded"
if contig:
contig = normalize_chromosome(contig)
csv_filename += ".contig.%s" % (contig,)
if feature:
csv_filename += ".feature.%s" % (feature,)
if column:
csv_filename += ".column.%s" % (column,)
if strand:
if strand == "+":
strand_string = "positive"
elif strand == "-":
strand_string = "negative"
else:
raise ValueError("Invalid strand value: %s" % strand)
csv_filename += ".strand.%s" % strand_string
if distinct:
csv_filename += ".distinct"
csv_filename += extension