Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, vocab=None, pretrained_cdb=None, tokenizer=None):
self.vocab = vocab
if pretrained_cdb is None:
self.cdb = CDB()
else:
self.cdb = pretrained_cdb
# Build the required spacy pipeline
self.nlp = SpacyPipe(spacy_split_all, disable=['ner', 'parser'])
self.nlp.add_punct_tagger(tagger=partial(spacy_tag_punct, skip_stopwords=False))
# Get the tokenizer
if tokenizer is not None:
self.tokenizer = tokenizer
else:
self.tokenizer = self._tok #BertTokenizer.from_pretrained('bert-base-uncased')
def __init__(self, vocab=None, pretrained_cdb=None, word_tokenizer=None):
self.vocab = vocab
if pretrained_cdb is None:
self.cdb = CDB()
else:
self.cdb = pretrained_cdb
# Build the required spacy pipeline
self.nlp = SpacyPipe(spacy_split_all, disable=['ner', 'parser'])
self.nlp.add_punct_tagger(tagger=partial(spacy_tag_punct, skip_stopwords=self.SKIP_STOPWORDS))
# Get the tokenizer
if word_tokenizer is not None:
self.tokenizer = word_tokenizer
else:
self.tokenizer = self._tok
def __init__(self, cdb, vocab=None, skip_stopwords=True, meta_cats=[]):
self.cdb = cdb
self.vocab = vocab
# Build the required spacy pipeline
self.nlp = SpacyPipe(spacy_split_all)
#self.nlp.add_punct_tagger(tagger=spacy_tag_punct)
self.nlp.add_punct_tagger(tagger=partial(spacy_tag_punct,
skip_stopwords=skip_stopwords,
keep_punct=self.KEEP_PUNCT))
# Add spell checker pipe
self.spell_checker = CustomSpellChecker(cdb_vocab=self.cdb.vocab, data_vocab=self.vocab)
self.nlp.add_spell_checker(spell_checker=self.spell_checker)
# Add cat
self.spacy_cat = SpacyCat(cdb=self.cdb, vocab=self.vocab)
self.nlp.add_cat(spacy_cat=self.spacy_cat)
for meta_cat in meta_cats:
self.nlp.add_meta_cat(meta_cat, meta_cat.category_name)
def __init__(self, cdb, vocab=None, skip_stopwords=True, meta_cats=[], config={}, tokenizer=None):
self.cdb = cdb
self.vocab = vocab
self.config = config
# Build the spacy pipeline
self.nlp = SpacyPipe(spacy_split_all)
#self.nlp.add_punct_tagger(tagger=spacy_tag_punct)
self.nlp.add_punct_tagger(tagger=partial(spacy_tag_punct,
skip_stopwords=skip_stopwords,
keep_punct=self.config.get("keep_punct", [':', '.'])))
# Add spell checker
self.spell_checker = CustomSpellChecker(cdb_vocab=self.cdb.vocab, data_vocab=self.vocab)
self.nlp.add_spell_checker(spell_checker=self.spell_checker)
# Add them cat class that does entity detection
self.spacy_cat = SpacyCat(cdb=self.cdb, vocab=self.vocab, tokenizer=tokenizer)
self.nlp.add_cat(spacy_cat=self.spacy_cat)
# Add meta_annotaiton classes if they exist
self._meta_annotations = False
for meta_cat in meta_cats:
self.nlp.add_meta_cat(meta_cat, meta_cat.category_name)
self._meta_annotations = True