Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def tokenize(words):
if isinstance(words, basestring):
return word_tokenize(words, include_punc=False)
else:
return words
all_words = chain.from_iterable(tokenize(words) for words, _ in dataset)
def _get_document_tokens(document):
if isinstance(document, basestring):
tokens = set((strip_punc(w, all=False)
for w in word_tokenize(document, include_punc=False)))
else:
tokens = set(strip_punc(w, all=False) for w in document)
return tokens
def tokenize(text):
"""
Tokenize the given text with textblob.tokenizers.word_tokenize
Args:
text (str): text to tokenize
Returns:
iterable: tokens
"""
return textblob.tokenizers.word_tokenize(text)
def words(self):
"""Return a list of word tokens. This excludes punctuation characters.
If you want to include punctuation characters, access the ``tokens``
property.
:returns: A :class:`WordList ` of word tokens.
"""
return WordList(word_tokenize(self.raw, include_punc=False))