How to use the bioframe.merge function in bioframe

To help you get started, we’ve selected a few bioframe examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mirnylab / bioframe / tests / test_ops.py View on Github external
bioframe.merge(df1),
        check_dtype=False,
        check_exact=True,
    )

    # test on=['chrom',...] argument
    df1 = pd.DataFrame(
        [
            ["chr1", 3, 8, "+", "cat", 5.5],
            ["chr1", 3, 8, "-", "dog", 6.5],
            ["chr1", 6, 10, "-", "cat", 6.5],
            ["chrX", 6, 10, "-", "cat", 6.5],
        ],
        columns=["chrom", "start", "end", "strand", "animal", "location"],
    )
    assert len(bioframe.merge(df1, on=None)) == 2
    assert len(bioframe.merge(df1, on=["strand"])) == 3
    assert len(bioframe.merge(df1, on=["strand", "location"])) == 3
    assert len(bioframe.merge(df1, on=["strand", "location", "animal"])) == 4
    d = """ chrom   start   end animal  n_intervals
        0   chr1    3   10  cat 2
        1   chr1    3   8   dog 1
        2   chrX    6   10  cat 1"""
    df = pd.read_csv(StringIO(d), sep=r"\s+")
    pd.testing.assert_frame_equal(
        df, bioframe.merge(df1, on=["animal"]), check_dtype=False,
    )
github mirnylab / bioframe / tests / test_ops.py View on Github external
df1 = pd.DataFrame(
        [["chr1", 1, 5], ["chr1", 3, 8], ["chr1", 8, 10], ["chr1", 12, 14],],
        columns=["chrom", "start", "end"],
    )

    # the last interval does not overlap the first three with default min_dist=0
    assert (bioframe.merge(df1)["n_intervals"].values == np.array([3, 1])).all()

    # adjacent intervals are not clustered with min_dist=none
    assert (
        bioframe.merge(df1, min_dist=None)["n_intervals"].values == np.array([2, 1, 1])
    ).all()

    # all intervals part of one cluster
    assert (
        bioframe.merge(df1, min_dist=2)["n_intervals"].values == np.array([4])
    ).all()

    df1.iloc[0, 0] = "chrX"
    assert (
        bioframe.merge(df1, min_dist=None)["n_intervals"].values
        == np.array([1, 1, 1, 1])
    ).all()
    assert (
        bioframe.merge(df1, min_dist=0)["n_intervals"].values == np.array([2, 1, 1])
    ).all()

    # total number of intervals should equal length of original dataframe
    mock_df = mock_bioframe()
    assert np.sum(bioframe.merge(mock_df, min_dist=0)["n_intervals"].values) == len(
        mock_df
    )
github mirnylab / bioframe / tests / test_ops.py View on Github external
def test_merge():
    df1 = pd.DataFrame(
        [["chr1", 1, 5], ["chr1", 3, 8], ["chr1", 8, 10], ["chr1", 12, 14],],
        columns=["chrom", "start", "end"],
    )

    # the last interval does not overlap the first three with default min_dist=0
    assert (bioframe.merge(df1)["n_intervals"].values == np.array([3, 1])).all()

    # adjacent intervals are not clustered with min_dist=none
    assert (
        bioframe.merge(df1, min_dist=None)["n_intervals"].values == np.array([2, 1, 1])
    ).all()

    # all intervals part of one cluster
    assert (
        bioframe.merge(df1, min_dist=2)["n_intervals"].values == np.array([4])
    ).all()

    df1.iloc[0, 0] = "chrX"
    assert (
        bioframe.merge(df1, min_dist=None)["n_intervals"].values
        == np.array([1, 1, 1, 1])
    ).all()
github mirnylab / bioframe / tests / test_ops.py View on Github external
assert (
        bioframe.merge(df1, min_dist=2)["n_intervals"].values == np.array([4])
    ).all()

    df1.iloc[0, 0] = "chrX"
    assert (
        bioframe.merge(df1, min_dist=None)["n_intervals"].values
        == np.array([1, 1, 1, 1])
    ).all()
    assert (
        bioframe.merge(df1, min_dist=0)["n_intervals"].values == np.array([2, 1, 1])
    ).all()

    # total number of intervals should equal length of original dataframe
    mock_df = mock_bioframe()
    assert np.sum(bioframe.merge(mock_df, min_dist=0)["n_intervals"].values) == len(
        mock_df
    )

    # test consistency with pyranges
    pd.testing.assert_frame_equal(
        pyranges_to_bioframe(bioframe_to_pyranges(df1).merge(count=True)),
        bioframe.merge(df1),
        check_dtype=False,
        check_exact=True,
    )

    # test on=['chrom',...] argument
    df1 = pd.DataFrame(
        [
            ["chr1", 3, 8, "+", "cat", 5.5],
            ["chr1", 3, 8, "-", "dog", 6.5],
github mirnylab / bioframe / tests / test_ops.py View on Github external
== np.array([1, 1, 1, 1])
    ).all()
    assert (
        bioframe.merge(df1, min_dist=0)["n_intervals"].values == np.array([2, 1, 1])
    ).all()

    # total number of intervals should equal length of original dataframe
    mock_df = mock_bioframe()
    assert np.sum(bioframe.merge(mock_df, min_dist=0)["n_intervals"].values) == len(
        mock_df
    )

    # test consistency with pyranges
    pd.testing.assert_frame_equal(
        pyranges_to_bioframe(bioframe_to_pyranges(df1).merge(count=True)),
        bioframe.merge(df1),
        check_dtype=False,
        check_exact=True,
    )

    # test on=['chrom',...] argument
    df1 = pd.DataFrame(
        [
            ["chr1", 3, 8, "+", "cat", 5.5],
            ["chr1", 3, 8, "-", "dog", 6.5],
            ["chr1", 6, 10, "-", "cat", 6.5],
            ["chrX", 6, 10, "-", "cat", 6.5],
        ],
        columns=["chrom", "start", "end", "strand", "animal", "location"],
    )
    assert len(bioframe.merge(df1, on=None)) == 2
    assert len(bioframe.merge(df1, on=["strand"])) == 3
github mirnylab / bioframe / tests / test_ops.py View on Github external
["chr1", 6, 10, "-", "cat", 6.5],
            ["chrX", 6, 10, "-", "cat", 6.5],
        ],
        columns=["chrom", "start", "end", "strand", "animal", "location"],
    )
    assert len(bioframe.merge(df1, on=None)) == 2
    assert len(bioframe.merge(df1, on=["strand"])) == 3
    assert len(bioframe.merge(df1, on=["strand", "location"])) == 3
    assert len(bioframe.merge(df1, on=["strand", "location", "animal"])) == 4
    d = """ chrom   start   end animal  n_intervals
        0   chr1    3   10  cat 2
        1   chr1    3   8   dog 1
        2   chrX    6   10  cat 1"""
    df = pd.read_csv(StringIO(d), sep=r"\s+")
    pd.testing.assert_frame_equal(
        df, bioframe.merge(df1, on=["animal"]), check_dtype=False,
    )