Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_underscore_docstring(en_vocab):
"""Test that docstrings are available for extension methods, even though
they're partials."""
def test_method(doc, arg1=1, arg2=2):
"""I am a docstring"""
return (arg1, arg2)
Doc.set_extension("test_docstrings", method=test_method)
doc = Doc(en_vocab, words=["hello", "world"])
assert test_method.__doc__ == "I am a docstring"
assert doc._.test_docstrings.__doc__.rsplit(". ")[-1] == "I am a docstring"
def test_doc_to_json_underscore_error_serialize(doc):
"""Test that Doc.to_json() raises an error if a custom attribute value
isn't JSON-serializable."""
Doc.set_extension("json_test4", method=lambda doc: doc.text)
with pytest.raises(ValueError):
doc.to_json(underscore=["json_test4"])
def spacy_model():
nlp = spacy_nlp_en
from spacy.tokens import Doc
if not Doc.has_extension('triples'):
Doc.set_extension('triples', getter=extract_triples)
return nlp
def test_underscore_mutable_defaults_list(en_vocab):
"""Test that mutable default arguments are handled correctly (see #2581)."""
Doc.set_extension("mutable", default=[])
doc1 = Doc(en_vocab, words=["one"])
doc2 = Doc(en_vocab, words=["two"])
doc1._.mutable.append("foo")
assert len(doc1._.mutable) == 1
assert doc1._.mutable[0] == "foo"
assert len(doc2._.mutable) == 0
doc1._.mutable = ["bar", "baz"]
doc1._.mutable.append("foo")
assert len(doc1._.mutable) == 3
assert len(doc2._.mutable) == 0
def doc_w_attrs(en_tokenizer):
Doc.set_extension("_test_attr", default=False)
Doc.set_extension("_test_prop", getter=lambda doc: len(doc.text))
Doc.set_extension(
"_test_method", method=lambda doc, arg: "{}{}".format(len(doc.text), arg)
)
doc = en_tokenizer("This is a test.")
doc._._test_attr = "test"
return doc
def __init__(self, nlp) -> None:
Doc.set_extension("compound_cases", default=[], force=True)
self.matcher = Matcher(nlp.vocab)
common_pattern = [{"ent_type": "CASENAME"}, {"ent_type": "CITATION", "OP": "+"}]
possessive_pattern = [
{"ent_type": "CASENAME"},
{"lower": "case"},
{"ent_type": "CITATION"},
]
self.matcher.add("compound_case", None, common_pattern, possessive_pattern)
self.global_matcher = Matcher(nlp.vocab)
merge_ents = nlp.create_pipe("merge_entities")
nlp.add_pipe(merge_ents)
def __init__(self, nlp) -> None:
Doc.set_extension("abbreviations", default=[], force=True)
Span.set_extension("long_form", default=None, force=True)
self.matcher = Matcher(nlp.vocab)
self.matcher.add("parenthesis", None, [{'ORTH': '('}, {'OP': '+'}, {'ORTH': ')'}])
self.global_matcher = Matcher(nlp.vocab)
"""
self.label = nlp.vocab.strings[label] # get entity label ID
# Set up the PhraseMatcher โ it can now take Doc objects as patterns,
# so even if the list of companies is long, it's very efficient
patterns = [nlp(org) for org in companies]
self.matcher = PhraseMatcher(nlp.vocab)
self.matcher.add("TECH_ORGS", None, *patterns)
# Register attribute on the Token. We'll be overwriting this based on
# the matches, so we're only setting a default value, not a getter.
Token.set_extension("is_tech_org", default=False)
# Register attributes on Doc and Span via a getter that checks if one of
# the contained tokens is set to is_tech_org == True.
Doc.set_extension("has_tech_org", getter=self.has_tech_org)
Span.set_extension("has_tech_org", getter=self.has_tech_org)
spacy_download("en_core_web_sm")
import en_core_web_sm as spacy_en
spacy_nlp_en = spacy_en.load()
empty_token = spacy_nlp_en(u" ")[0]
# TODO: Make agent id configurable, rename nlp:contains-roboy to nlp:agent-mentioned
about_roboy = ('you', 'roboy', 'robot', 'roboboy', 'your')
def roboy_getter(doc) -> bool:
return any(roboy in doc.text.lower() for roboy in about_roboy)
from spacy.tokens import Doc
Doc.set_extension('about_roboy', getter=roboy_getter)
Doc.set_extension('empty_token', getter=lambda doc: empty_token)
Doc.set_extension('triples', getter=extract_triples)
Doc.set_extension('yesno', getter=yes_no)
return spacy_nlp_en
def __init__(self, attrs=('languages', 'language_scores')):
self._languages, self._scores = attrs
Doc.set_extension(self._languages, getter=get_languages)
Doc.set_extension(self._scores, getter=get_scores)
Span.set_extension(self._languages, getter=get_languages)
Span.set_extension(self._scores, getter=get_scores)