Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
BodyType.RTF,
"foo",
),
("<table><tbody><tr><td>foo</td></tr></tbody></table>", BodyType.HTML, "foo"),
],
)
def test_cleanup_message_body(body, body_type, result):
assert cleanup_message_body(body, body_type) == result
(Mock(), BodyType.PLAIN),
(Mock(plain_text_body=None), BodyType.RTF),
(Mock(plain_text_body=None, rtf_body=None), BodyType.HTML),
(Mock(plain_text_body=None, rtf_body=None, html_body=None), None),
],
)
def test_get_message_body(message, body_type):
assert PffArchive().get_message_body(message)[1] is body_type
def cleanup_message_body(
body: AnyStr, body_type: BodyType, size_threshold: int = 0
) -> str:
# Decode first
body = decode(body)
if body_type is BodyType.RTF:
# Strip formatting
body = rtf_to_text(body)
elif body_type is BodyType.HTML:
# Strip markup
body = BeautifulSoup(body, "html.parser").get_text()
# Strip what might be lines of base64 encoded data
if len(body) > size_threshold:
body = re.sub(r"^[>\s]*[A-Za-z0-9+/]{76,}\n?", "", body, flags=re.MULTILINE)
# Strip uuencoded attachments
if len(body) > size_threshold:
body = re.sub(r"begin [0-7]{3}.*?end", "", body, flags=re.DOTALL)
# Strip notes/calendar data
if len(body) > size_threshold:
body = re.sub(
r"<(OMNI|omni)([^>]*?)>.*?(\s)*", "", body, flags=re.DOTALL
)
def get_message_body(message: pypff.message) -> Tuple[str, Optional[BodyType]]:
"""Takes a pypff.message object and returns a body and body type
Args:
message: A pypff.message object
Returns:
A string and a body type
"""
# Try the plain text body first
if message.plain_text_body:
return message.plain_text_body, BodyType.PLAIN
if message.rtf_body:
return message.rtf_body, BodyType.RTF
if message.html_body:
return message.html_body, BodyType.HTML
return "", None
def get_message_body(message: pypff.message) -> Tuple[str, Optional[BodyType]]:
"""Takes a pypff.message object and returns a body and body type
Args:
message: A pypff.message object
Returns:
A string and a body type
"""
# Try the plain text body first
if message.plain_text_body:
return message.plain_text_body, BodyType.PLAIN
if message.rtf_body:
return message.rtf_body, BodyType.RTF
if message.html_body:
return message.html_body, BodyType.HTML
return "", None
def get_message_body(message: Message) -> Tuple[str, Optional[BodyType]]:
"""
Returns the message body along with a plain body type
"""
return MboxArchive.format_message(message), BodyType.PLAIN
def cleanup_message_body(
body: AnyStr, body_type: BodyType, size_threshold: int = 0
) -> str:
# Decode first
body = decode(body)
if body_type is BodyType.RTF:
# Strip formatting
body = rtf_to_text(body)
elif body_type is BodyType.HTML:
# Strip markup
body = BeautifulSoup(body, "html.parser").get_text()
# Strip what might be lines of base64 encoded data
if len(body) > size_threshold:
body = re.sub(r"^[>\s]*[A-Za-z0-9+/]{76,}\n?", "", body, flags=re.MULTILINE)
# Strip uuencoded attachments
if len(body) > size_threshold:
body = re.sub(r"begin [0-7]{3}.*?end", "", body, flags=re.DOTALL)
# Strip notes/calendar data