How to use the spacy.vocab.Vocab function in spacy

To help you get started, weโ€™ve selected a few spacy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / spaCy / tests / regression / test_issue1501-2000.py View on Github external
def test_issue1537():
    """Test that Span.as_doc() doesn't segfault."""
    string = "The sky is blue . The man is pink . The dog is purple ."
    doc = Doc(Vocab(), words=string.split())
    doc[0].sent_start = True
    for word in doc[1:]:
        if word.nbor(-1).text == ".":
            word.sent_start = True
        else:
            word.sent_start = False
    sents = list(doc.sents)
    sent0 = sents[0].as_doc()
    sent1 = sents[1].as_doc()
    assert isinstance(sent0, Doc)
    assert isinstance(sent1, Doc)
github explosion / spaCy / tests / regression / test_issue3001-3500.py View on Github external
def test_issue3199():
    """Test that Span.noun_chunks works correctly if no noun chunks iterator
    is available. To make this test future-proof, we're constructing a Doc
    with a new Vocab here and setting is_parsed to make sure the noun chunks run.
    """
    doc = Doc(Vocab(), words=["This", "is", "a", "sentence"])
    doc.is_parsed = True
    assert list(doc[0:3].noun_chunks) == []
github explosion / spaCy / tests / test_basic_create.py View on Github external
def test_get_lexeme(self):
        vocab = Vocab()
        lexeme = vocab[u'Hello']
        self.assertEqual(lexeme.orth_, u'Hello')
github explosion / spaCy / tests / regression / test_issue1-1000.py View on Github external
def test_issue743():
    doc = Doc(Vocab(), ["hello", "world"])
    token = doc[0]
    s = set([token])
    items = list(s)
    assert items[0] is token
github explosion / spaCy / tests / vocab_vectors / test_lookups.py View on Github external
def test_lookups_to_from_disk_via_vocab():
    table_name = "test"
    vocab = Vocab()
    vocab.lookups.add_table(table_name, {"foo": "bar", "hello": "world"})
    assert len(vocab.lookups) == 1
    assert table_name in vocab.lookups
    with make_tempdir() as tmpdir:
        vocab.to_disk(tmpdir)
        new_vocab = Vocab()
        new_vocab.from_disk(tmpdir)
    assert len(new_vocab.lookups) == 1
    assert table_name in new_vocab.lookups
    table = new_vocab.lookups.get_table(table_name)
    assert len(table) == 2
    assert table["hello"] == "world"
github explosion / spaCy / tests / doc / test_doc_api.py View on Github external
def test_doc_api_similarity_match():
    doc = Doc(Vocab(), words=["a"])
    assert doc.similarity(doc[0]) == 1.0
    assert doc.similarity(doc.vocab["a"]) == 1.0
    doc2 = Doc(doc.vocab, words=["a", "b", "c"])
    with pytest.warns(ModelsWarning):
        assert doc.similarity(doc2[:1]) == 1.0
        assert doc.similarity(doc2) == 0.0
github explosion / spaCy / tests / serialize / test_serialize_vocab_strings.py View on Github external
def test_serialize_vocab_roundtrip_disk(strings1, strings2):
    vocab1 = Vocab(strings=strings1)
    vocab2 = Vocab(strings=strings2)
    with make_tempdir() as d:
        file_path1 = d / "vocab1"
        file_path2 = d / "vocab2"
        vocab1.to_disk(file_path1)
        vocab2.to_disk(file_path2)
        vocab1_d = Vocab().from_disk(file_path1)
        vocab2_d = Vocab().from_disk(file_path2)
        assert list(vocab1_d) == list(vocab1)
        assert list(vocab2_d) == list(vocab2)
        if strings1 == strings2:
            assert list(vocab1_d) == list(vocab2_d)
        else:
            assert list(vocab1_d) != list(vocab2_d)
github explosion / spacy-transformers / tests / test_wordpiecer.py View on Github external
def wp(name):
    return TransformersWordPiecer.from_pretrained(Vocab(), trf_name=name)
github explosion / spaCy / tests / regression / test_issue1501-2000.py View on Github external
def test_issue1868():
    """Test Vocab.__contains__ works with int keys."""
    vocab = Vocab()
    lex = vocab["hello"]
    assert lex.orth in vocab
    assert lex.orth_ in vocab
    assert "some string" not in vocab
    int_id = vocab.strings.add("some string")
    assert int_id not in vocab