Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_repr_object_to_unicode():
value = UNICODE_STRING if py3k.PY3 else BYTES_STRING
instance = _build_test_instance("__repr__", value)
returned = py3k.to_unicode(instance)
_assert_strings_equal(UNICODE_STRING, returned)
def load_resource(path):
path = expand_resource_path(path)
with open(path, "rb") as file:
return to_unicode(file.read())
def test_title_method_with_empty_document():
summarizer = EdmundsonSummarizer()
summarizer.null_words = ("ba", "bb", "bc",)
sentences = summarizer.title_method(build_document(), 10)
assert list(map(to_unicode, sentences)) == []
def test_to_unicode():
returned = compat.to_unicode(O())
_assert_strings_equal(UNICODE_STRING, returned)
def test_data_to_unicode():
returned = py3k.to_unicode(BYTES_STRING)
_assert_strings_equal(UNICODE_STRING, returned)
def test_headings():
document = build_document_from_string("""
Nějaký muž šel kolem naší zahrady
Nějaký jiný muž šel kolem vaší zahrady
# Nová myšlenka
Už už abych taky šel
""")
assert list(map(to_unicode, document.headings)) == ["Nová myšlenka"]
def test_sentences():
document = build_document_from_string("""
Nějaký muž šel kolem naší zahrady
Nějaký jiný muž šel kolem vaší zahrady
# Nová myšlenka
Už už abych taky šel
""")
assert list(map(to_unicode, document.sentences)) == [
"Nějaký muž šel kolem naší zahrady",
"Nějaký jiný muž šel kolem vaší zahrady",
"Už už abych taky šel",
]
def test_sentences_in_right_order():
document = build_document_from_string("""
# Heading one
First sentence.
Second sentence.
Third sentence.
""")
summarizer = RandomSummarizer()
sentences = summarizer(document, 4)
assert len(sentences) == 3
assert to_unicode(sentences[0]) == "First sentence."
assert to_unicode(sentences[1]) == "Second sentence."
assert to_unicode(sentences[2]) == "Third sentence."
def __init__(self, text, tokenizer, is_heading=False):
self._text = to_unicode(text).strip()
self._tokenizer = tokenizer
self._is_heading = bool(is_heading)
def __init__(self, text, tokenizer):
super(PlaintextParser, self).__init__(tokenizer)
self._text = to_unicode(text).strip()