Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def build_text_automatic(self, record):
text = record[Constants.TEXT_FIELD]
sentences = nlp_utils.get_sentences(text)
lemmatized_words = []
for sentence in sentences:
lemmatized_words.append(nlp_utils.lemmatize_sentence(
sentence, nltk.re.compile(''),
min_length=1, max_length=100))
doc_parts = []
itemize = Itemize()
for sentence in lemmatized_words:
new_words = []
itemize.add_item('')
for tagged_word in sentence:
tag = tagged_word[1]
word = tagged_word[0]
singular = pattern.text.en.singularize(word)
word_found = False
# if tag == 'VBD':
def lemmatize_sentences(records):
print('%s: lemmatize sentences' % time.strftime("%Y/%m/%d-%H:%M:%S"))
sentence_records = []
record_index = 0
document_level = Constants.DOCUMENT_LEVEL
for record in records:
sentences = \
nlp_utils.get_sentences(record[Constants.TEXT_FIELD])
sentence_index = 0
for sentence in sentences:
if isinstance(document_level, (int, float)) and\
sentence_index >= document_level:
break
tagged_words = nlp_utils.lemmatize_sentence(sentence)
sentence_record = {}
sentence_record.update(record)
sentence_record[Constants.TEXT_FIELD] = sentence
sentence_record['sentence_index'] = sentence_index
sentence_record[Constants.POS_TAGS_FIELD] = tagged_words
sentence_records.append(sentence_record)
sentence_index += 1
# print(sentence_record)
record_index += 1
# print('\rrecord index: %d/%d' % (record_index, len(records))),
return sentence_records
def build_text_manual(self, record):
text = record[Constants.TEXT_FIELD]
sentences = nlp_utils.get_sentences(text)
lemmatized_words = []
for sentence in sentences:
lemmatized_words.append(nlp_utils.lemmatize_sentence(
sentence, nltk.re.compile(''),
min_length=1, max_length=100))
doc_parts = []
itemize = Itemize()
for sentence in lemmatized_words:
new_words = []
itemize.add_item('')
for tagged_word in sentence:
tag = tagged_word[1]
word = tagged_word[0]
singular = pattern.text.en.singularize(word)
word_found = False
if tag == 'VBD':