Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
assert np.sum(pd.isna(b["index_2"].values)) == 2
b = bioframe.overlap(
df1,
df2,
on=None,
how="left",
cols1=("chrom1", "start", "end"),
cols2=("chrom2", "start2", "end2"),
return_index=True,
return_input=False,
)
assert np.sum(pd.isna(b["index_2"].values)) == 0
### test overlap 'left', 'outer', and 'right'
b = bioframe.overlap(
df1,
df2,
on=None,
how="outer",
cols1=("chrom1", "start", "end"),
cols2=("chrom2", "start2", "end2"),
)
assert len(b) == 3
b = bioframe.overlap(
df1,
df2,
on=["animal"],
how="outer",
cols1=("chrom1", "start", "end"),
cols2=("chrom2", "start2", "end2"),
cols1=("chrom1", "start", "end"),
cols2=("chrom2", "start2", "end2"),
)
assert len(b) == 5
b = bioframe.overlap(
df1,
df2,
on=["animal"],
how="inner",
cols1=("chrom1", "start", "end"),
cols2=("chrom2", "start2", "end2"),
)
assert len(b) == 0
b = bioframe.overlap(
df1,
df2,
on=["animal"],
how="right",
cols1=("chrom1", "start", "end"),
cols2=("chrom2", "start2", "end2"),
)
assert len(b) == 2
b = bioframe.overlap(
df1,
df2,
on=["animal"],
how="left",
cols1=("chrom1", "start", "end"),
cols2=("chrom2", "start2", "end2"),
### test overlap on= [] ###
df1 = pd.DataFrame(
[
["chr1", 8, 12, "+", "cat"],
["chr1", 8, 12, "-", "cat"],
["chrX", 1, 8, "+", "cat"],
],
columns=["chrom1", "start", "end", "strand", "animal"],
)
df2 = pd.DataFrame(
[["chr1", 6, 10, "+", "dog"], ["chrX", 7, 10, "-", "dog"]],
columns=["chrom2", "start2", "end2", "strand", "animal"],
)
b = bioframe.overlap(
df1,
df2,
on=["animal"],
how="left",
cols1=("chrom1", "start", "end"),
cols2=("chrom2", "start2", "end2"),
return_index=True,
return_input=False,
)
assert np.sum(pd.isna(b["index_2"].values)) == 3
b = bioframe.overlap(
df1,
df2,
on=["strand"],
how="left",
def test_overlap():
### test consistency of overlap(how='inner') with pyranges.join ###
### note does not test overlap_start or overlap_end columns of bioframe.overlap
df1 = mock_bioframe()
df2 = mock_bioframe()
assert df1.equals(df2) == False
p1 = bioframe_to_pyranges(df1)
p2 = bioframe_to_pyranges(df2)
pp = pyranges_overlap_to_bioframe(p1.join(p2, how=None))[
["chrom_1", "start_1", "end_1", "chrom_2", "start_2", "end_2"]
]
bb = bioframe.overlap(df1, df2, how="inner")[
["chrom_1", "start_1", "end_1", "chrom_2", "start_2", "end_2"]
]
pp = pp.sort_values(
["chrom_1", "start_1", "end_1", "chrom_2", "start_2", "end_2"],
ignore_index=True)
bb = bb.sort_values(
["chrom_1", "start_1", "end_1", "chrom_2", "start_2", "end_2"],
ignore_index=True)
pd.testing.assert_frame_equal(bb, pp, check_dtype=False, check_exact=True)
print("overlap elements agree")
### test overlap on= [] ###
df1 = pd.DataFrame(
[
["chr1", 8, 12, "+", "cat"],
["chr1", 8, 12, "-", "cat"],