Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_attachments_mime_type_validation(enron_dataset, mock_progress_callback):
files = get_set_of_files(enron_dataset)
for res in get_messages(files, progress_callback=mock_progress_callback):
attachments = res.get("attachments")
if attachments:
for attachment in attachments:
try:
assert attachment.mime_type in MIME_TYPES
except AssertionError:
# Some enron files have these obsolete attachment types
assert attachment.mime_type in [
"application/msexcell",
"application/mspowerpoint",
]
def test_get_messages_with_bad_files(enron_dataset_part044, mock_progress_callback):
_count = 0
for _count, res in enumerate(
get_messages(
files=enron_dataset_part044.glob("*.pst"),
progress_callback=mock_progress_callback,
),
start=1,
):
assert res
assert _count == 558
# Load the file_report table for local lookup
_file_reports = session.query(FileReport).all() # noqa: F841
# Start of multiprocessing
with multiprocessing.Pool(processes=jobs, initializer=worker_init) as pool:
logger.debug(f"Starting pool with {pool._processes} processes")
new_entities = []
msg_count = 0
try:
for msg_count, worker_output in enumerate(
pool.imap_unordered(
process_message,
get_messages(
files,
spacy_model=spacy_model,
progress_callback=processing_update_progress,
include_message_contents=include_message_contents,
with_headers=include_message_contents,
**kwargs,
),
chunksize=RATOM_MSG_BATCH_SIZE,
),
start=1,
):
# Unpack worker job output
res, error = worker_output
if error:
Store full archive report in the DB
"""
# Confirm environment settings
for key, value in get_ratom_settings():
logger.debug(f"{key}: {value}")
# Default progress callback to no-op
update_progress = progress_callback or (lambda *_, **__: None)
# Load the file_report table for local lookup
_file_reports = session.query(FileReport).all() # noqa: F841
try:
for msg_info in get_messages(
files,
progress_callback=update_progress,
with_content=include_message_contents,
with_headers=include_message_contents,
):
# Extract results
message_id = msg_info.pop("message_id")
filepath = msg_info.pop("filepath")
attachments = msg_info.pop("attachments")
if include_message_contents:
msg_info["body"] = cleanup_message_body(
msg_info["body"], msg_info.pop("body_type")
)