Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if not nlp_artifacts.tokens:
self.logger.info('Skipping context extraction due to '
'lack of NLP artifacts')
# if there are no nlp artifacts, this is ok, we can
# extract context and we return a valid, yet empty
# context
return ''
# Get the already prepared words in the given text, in their
# LEMMATIZED version
lemmatized_keywords = nlp_artifacts.keywords
# since the list of tokens is not necessarily aligned
# with the actual index of the match, we look for the
# token index which corresponds to the match
token_index = EntityRecognizer.find_index_of_match_token(
word,
start,
nlp_artifacts.tokens,
nlp_artifacts.tokens_indices)
# index i belongs to the PII entity, take the preceding n words
# and the successing m words into a context string
context_str = ''
context_str = \
self.__add_n_words_backward(token_index,
EntityRecognizer.CONTEXT_PREFIX_COUNT,
nlp_artifacts.lemmas,
lemmatized_keywords,
context_str)
context_str = \
self.__add_n_words_forward(token_index,