How to use the libratom.lib.pff.PffArchive function in libratom

To help you get started, we’ve selected a few libratom examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_get_message_by_id(sample_pst_file):
    with PffArchive(sample_pst_file) as archive:
        for message in archive.messages():
            msg = archive.get_message_by_id(message.identifier)
            assert msg.identifier == message.identifier
            assert archive.format_message(msg) == archive.format_message(message)
github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_extract_message_attachments(enron_dataset_part002):
    """Checking 3 known attachments, to validate the attachment extraction process
    """

    digests = {
        47685: "d48232614b01e56014293854abbb5db3",
        47717: "cf8be7cd3e6e14307972246e2942c9d1",
        47749: "081e6b66dc89671ff6460adac94dbab1",
    }

    with PffArchive(
        next(enron_dataset_part002.glob("*.pst"))
    ) as archive, TemporaryDirectory() as tmp_dir:

        # Get message by ID
        node = archive.tree.get_node(2128676)
        message = node.data

        for att in message.attachments:
            # Read attachment as bytes
            rbuf = att.read_buffer(att.size)

            # Save attachment
            filepath = (
                Path(tmp_dir) / f"attachment_{message.identifier}_{att.identifier}"
            )
            filepath.write_bytes(rbuf)
github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_pffarchive_iterate_over_messages(sample_pst_file, bfs):

    with PffArchive(sample_pst_file) as archive:
        for message in archive.messages(bfs=bfs):
            assert message.plain_text_body
github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_pffarchive_load_from_invalid_type():

    with pytest.raises(TypeError):
        _ = PffArchive(1)
github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_pffarchive_load_from_file_object(sample_pst_file):

    with sample_pst_file.open(mode="rb") as f, PffArchive(f) as archive:
        assert len(list(archive.messages())) == 2668
github libratom / libratom / tests / load / test_pst.py View on Github external
def test_extract_enron_messages(enron_dataset):
    nb_extracted = 0
    total_size = 0

    for pst_file in enron_dataset.glob("**/*.pst"):
        try:
            # Iterate over messages and copy message string
            with PffArchive(pst_file) as archive:
                for message in archive.messages():
                    _ = archive.format_message(message)

                    # Increment message count
                    nb_extracted += 1

            # Add file size to running total
            total_size += pst_file.stat().st_size

        except Exception as exc:  # pylint: disable=broad-except
            logger.info(f"Inspecting {pst_file}")
            logger.exception(exc)

    logger.info(
        f"Extracted {nb_extracted} messages from a total of {humanfriendly.format_size(total_size)}"
    )
github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_get_message_body(message, body_type):
    assert PffArchive().get_message_body(message)[1] is body_type
github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_get_attachment_metadata(mock_cls):
    message = MagicMock(identifier=123, attachments=[mock_cls(name="foo", size="0")])

    assert PffArchive().get_attachment_metadata(message)[0].mime_type is None
github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_pffarchive_format_message(enron_dataset_part004, empty_message):

    for pst_file in enron_dataset_part004.glob("*.pst"):
        with PffArchive(pst_file) as archive:
            for message in archive.messages():
                # The assertion here doesn't matter as much as
                # not getting an exception from python's email parsing module
                assert email.message_from_string(
                    archive.format_message(message), policy=policy.default
                ) or not archive.format_message(message)

    assert PffArchive.format_message(empty_message) == ""