Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_to_tagged_text_nosents(self):
spacy_lang = cache.load_spacy_lang("en")
with spacy_lang.disable_pipes("parser"):
doc = spacy_lang("This is sentence #1. This is sentence #2.")
tagged_text = doc._.to_tagged_text()
assert isinstance(tagged_text, list)
assert len(tagged_text) == 1
assert isinstance(tagged_text[0], list)
assert isinstance(tagged_text[0][0], tuple)
assert isinstance(tagged_text[0][0][0], compat.unicode_)
def spacy_doc():
spacy_lang = cache.load_spacy_lang("en")
text = """
The unit tests aren't going well.
I love Python, but I don't love backwards incompatibilities.
No programmers were permanently damaged for textacy's sake.
Thank God for Stack Overflow."""
spacy_doc = spacy_lang(text.strip())
return spacy_doc
def test_bad_name(self):
for name in ("unk", "un"):
with pytest.raises((OSError, IOError)):
_ = cache.load_spacy_lang(name)
with pytest.raises(ImportError):
_ = cache.load_spacy_lang("un", allow_blank=True)
def test_to_gensim(spacy_doc):
spacy_lang = cache.load_spacy_lang("en")
result = export.docs_to_gensim(
[spacy_doc], spacy_lang.vocab,
filter_stops=True, filter_punct=True, filter_nums=True,
)
assert isinstance(result[0], compat.unicode_)
assert isinstance(result[1], list)
assert isinstance(result[1][0], list)
assert isinstance(result[1][0][0], tuple)
assert (
isinstance(result[1][0][0][0], compat.int_types)
and isinstance(result[1][0][0][1], compat.int_types)
)
def test_bad_name(self):
for name in ("unk", "un"):
with pytest.raises((OSError, IOError)):
_ = cache.load_spacy_lang(name)
with pytest.raises(ImportError):
_ = cache.load_spacy_lang("un", allow_blank=True)
def doc(text):
spacy_lang = cache.load_spacy_lang("en")
return make_spacy_doc(text, lang=spacy_lang)