Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_issue1537():
"""Test that Span.as_doc() doesn't segfault."""
string = "The sky is blue . The man is pink . The dog is purple ."
doc = Doc(Vocab(), words=string.split())
doc[0].sent_start = True
for word in doc[1:]:
if word.nbor(-1).text == ".":
word.sent_start = True
else:
word.sent_start = False
sents = list(doc.sents)
sent0 = sents[0].as_doc()
sent1 = sents[1].as_doc()
assert isinstance(sent0, Doc)
assert isinstance(sent1, Doc)
def test_create(self):
vocab = Vocab()
def test_issue3199():
"""Test that Span.noun_chunks works correctly if no noun chunks iterator
is available. To make this test future-proof, we're constructing a Doc
with a new Vocab here and setting is_parsed to make sure the noun chunks run.
"""
doc = Doc(Vocab(), words=["This", "is", "a", "sentence"])
doc.is_parsed = True
assert list(doc[0:3].noun_chunks) == []
def test_get_lexeme(self):
vocab = Vocab()
lexeme = vocab[u'Hello']
self.assertEqual(lexeme.orth_, u'Hello')
def test_issue743():
doc = Doc(Vocab(), ["hello", "world"])
token = doc[0]
s = set([token])
items = list(s)
assert items[0] is token
def test_lookups_to_from_disk_via_vocab():
table_name = "test"
vocab = Vocab()
vocab.lookups.add_table(table_name, {"foo": "bar", "hello": "world"})
assert len(vocab.lookups) == 1
assert table_name in vocab.lookups
with make_tempdir() as tmpdir:
vocab.to_disk(tmpdir)
new_vocab = Vocab()
new_vocab.from_disk(tmpdir)
assert len(new_vocab.lookups) == 1
assert table_name in new_vocab.lookups
table = new_vocab.lookups.get_table(table_name)
assert len(table) == 2
assert table["hello"] == "world"
def test_doc_api_similarity_match():
doc = Doc(Vocab(), words=["a"])
assert doc.similarity(doc[0]) == 1.0
assert doc.similarity(doc.vocab["a"]) == 1.0
doc2 = Doc(doc.vocab, words=["a", "b", "c"])
with pytest.warns(ModelsWarning):
assert doc.similarity(doc2[:1]) == 1.0
assert doc.similarity(doc2) == 0.0
def test_serialize_vocab_roundtrip_disk(strings1, strings2):
vocab1 = Vocab(strings=strings1)
vocab2 = Vocab(strings=strings2)
with make_tempdir() as d:
file_path1 = d / "vocab1"
file_path2 = d / "vocab2"
vocab1.to_disk(file_path1)
vocab2.to_disk(file_path2)
vocab1_d = Vocab().from_disk(file_path1)
vocab2_d = Vocab().from_disk(file_path2)
assert list(vocab1_d) == list(vocab1)
assert list(vocab2_d) == list(vocab2)
if strings1 == strings2:
assert list(vocab1_d) == list(vocab2_d)
else:
assert list(vocab1_d) != list(vocab2_d)
def wp(name):
return TransformersWordPiecer.from_pretrained(Vocab(), trf_name=name)
def test_issue1868():
"""Test Vocab.__contains__ works with int keys."""
vocab = Vocab()
lex = vocab["hello"]
assert lex.orth in vocab
assert lex.orth_ in vocab
assert "some string" not in vocab
int_id = vocab.strings.add("some string")
assert int_id not in vocab