Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_binnify():
chromsizes = bioframe.read_chromsizes(testdir+"/test_data/test.chrom.sizes")
assert len(chromsizes) == 2
assert len(bioframe.binnify(chromsizes, int(np.max(chromsizes.values)))) == len(
chromsizes
)
assert len(bioframe.binnify(chromsizes, int(np.min(chromsizes.values)))) == (
len(chromsizes) + 1
)
assert len(bioframe.binnify(chromsizes, 1)) == np.sum(chromsizes.values)
def test_frac_gc():
pytest.importorskip("pysam")
chromsizes = bioframe.read_chromsizes(testdir+"/test_data/test.chrom.sizes")
fasta_records = bioframe.load_fasta(testdir+"/test_data/test.fa")
unmapped_bp = (
0
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 1), fasta_records, return_input=False
).values
)
assert np.isnan(
bioframe.frac_gc(
bioframe.binnify(chromsizes, 1),
fasta_records,
return_input=False,
mapped_only=True,
).values[unmapped_bp]
).all()
## mapped_only=True should ignore N or return np.nan if interval only contains N
np.testing.assert_equal(
np.array([0.5, 0.5, np.nan]),
bioframe.frac_gc(
bioframe.binnify(chromsizes, 5),
fasta_records,
return_input=False,
mapped_only=True,
).values,
)
## mapped_only=False should count N as zero
assert (
np.array([0.4, 0.4, 0])
== bioframe.frac_gc(
bioframe.binnify(chromsizes, 5),
fasta_records,
return_input=False,
mapped_only=False,
).values
).all()
assert (
np.array([0.4, 2 / 7])
== bioframe.frac_gc(
bioframe.binnify(chromsizes, 7),
fasta_records,
return_input=False,
mapped_only=False,
).values
).values
).all()
unmapped = np.array([0.8, 0.8, 0])
assert (
unmapped
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 5), fasta_records, return_input=False
).values
).all()
unmapped = np.array([0.8, 4 / 7])
assert (
unmapped
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 7), fasta_records, return_input=False
).values
def test_binnify():
chromsizes = bioframe.read_chromsizes(testdir+"/test_data/test.chrom.sizes")
assert len(chromsizes) == 2
assert len(bioframe.binnify(chromsizes, int(np.max(chromsizes.values)))) == len(
chromsizes
)
assert len(bioframe.binnify(chromsizes, int(np.min(chromsizes.values)))) == (
len(chromsizes) + 1
)
assert len(bioframe.binnify(chromsizes, 1)) == np.sum(chromsizes.values)
def test_frac_gc():
pytest.importorskip("pysam")
chromsizes = bioframe.read_chromsizes(testdir+"/test_data/test.chrom.sizes")
fasta_records = bioframe.load_fasta(testdir+"/test_data/test.fa")
unmapped_bp = (
0
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 1), fasta_records, return_input=False
).values
)
assert np.isnan(
bioframe.frac_gc(
bioframe.binnify(chromsizes, 1),
fasta_records,
return_input=False,
mapped_only=True,
).values[unmapped_bp]
).all()
## mapped_only=True should ignore N or return np.nan if interval only contains N
np.testing.assert_equal(
np.array([0.5, 0.5, np.nan]),
bioframe.frac_gc(
bioframe.binnify(chromsizes, 5),
chromsizes = bioframe.read_chromsizes(testdir+"/test_data/test.chrom.sizes")
fasta_records = bioframe.load_fasta(testdir+"/test_data/test.fa")
unmapped = np.array([1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0])
assert (
unmapped
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 1), fasta_records, return_input=False
).values
).all()
unmapped = np.array([0.8, 0.8, 0])
assert (
unmapped
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 5), fasta_records, return_input=False
).values
).all()
unmapped = np.array([0.8, 4 / 7])
assert (
unmapped
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 7), fasta_records, return_input=False
).values
assert (
np.array([0.5, 0.5])
== bioframe.frac_gc(
bioframe.binnify(chromsizes, 7),
fasta_records,
return_input=False,
mapped_only=True,
).values
).all()
## mapped_only=False should count N as zero
assert (
np.array([0.4, 0.4, 0])
== bioframe.frac_gc(
bioframe.binnify(chromsizes, 5),
fasta_records,
return_input=False,
mapped_only=False,
).values
).all()
assert (
np.array([0.4, 2 / 7])
== bioframe.frac_gc(
bioframe.binnify(chromsizes, 7),
fasta_records,
return_input=False,
mapped_only=False,
).values
def test_frac_mapped():
pytest.importorskip("pysam")
chromsizes = bioframe.read_chromsizes(testdir+"/test_data/test.chrom.sizes")
fasta_records = bioframe.load_fasta(testdir+"/test_data/test.fa")
unmapped = np.array([1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0])
assert (
unmapped
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 1), fasta_records, return_input=False
).values
).all()
unmapped = np.array([0.8, 0.8, 0])
assert (
unmapped
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 5), fasta_records, return_input=False
).values
).all()
unmapped = np.array([0.8, 4 / 7])
assert (
unmapped
== bioframe.frac_mapped(
bioframe.binnify(chromsizes, 7), fasta_records, return_input=False