Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_regexp_filter():
fname = "tests/data/regexp/regexp.fa"
regexps = [
("Chr.*", 2, 15),
("Scaffold.*", 1, 16),
("scaffold_.*", 3, 14),
(r"^\d+$", 4, 13),
("chr.*", 4, 13),
]
tmpfa = NamedTemporaryFile(suffix=".fa").name
for regex, match, no_match in regexps:
fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=False, force=True)
assert len(fa.keys()) == match
fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=True, force=True)
assert len(fa.keys()) == no_match
def test_regexp_filter():
fname = "tests/data/regexp/regexp.fa"
regexps = [
("Chr.*", 2, 15),
("Scaffold.*", 1, 16),
("scaffold_.*", 3, 14),
(r"^\d+$", 4, 13),
("chr.*", 4, 13),
]
tmpfa = NamedTemporaryFile(suffix=".fa").name
for regex, match, no_match in regexps:
fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=False, force=True)
assert len(fa.keys()) == match
fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=True, force=True)
assert len(fa.keys()) == no_match
def test_regexp_filter():
fname = "tests/data/regexp/regexp.fa"
regexps = [
("Chr.*", 2, 15),
("Scaffold.*", 1, 16),
("scaffold_.*", 3, 14),
(r"^\d+$", 4, 13),
("chr.*", 4, 13),
]
tmpfa = NamedTemporaryFile(suffix=".fa").name
for regex, match, no_match in regexps:
fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=False, force=True)
assert len(fa.keys()) == match
fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=True, force=True)
assert len(fa.keys()) == no_match
os.rename(fname, fname + ".gz")
ret = sp.check_call(["gunzip", "-f", fname])
if ret != 0:
raise Exception(f"Error gunzipping genome {fname}")
# process genome (e.g. masking)
if hasattr(self, "_post_process_download"):
self._post_process_download(
name=name, localname=localname, out_dir=tmp_dir, mask=mask
)
if regex:
os.rename(fname, fname + "_to_regex")
infa = fname + "_to_regex"
outfa = fname
filter_fasta(infa, outfa, regex=regex, v=invert_match, force=True)
not_included = [
k for k in Fasta(infa).keys() if k not in Fasta(outfa).keys()
]
# bgzip genome if requested
if bgzip or config.get("bgzip"):
ret = sp.check_call(["bgzip", "-f", fname])
if ret != 0:
raise Exception(f"Error bgzipping {name}. Is tabix installed?")
fname += ".gz"
# transfer the genome from the tmpdir to the genome_dir
src = fname
dst = os.path.join(genomes_dir, localname, os.path.basename(fname))
shutil.move(src, dst)