Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def convert_into_sentences(lines):
stack = []
sent_L = []
n_sent = 0
for chunk in lines:
if not chunk.strip():
if stack:
sents = text_to_sentences(
" ".join(stack).strip().replace('\n', ' ')).split('\n')
sent_L.extend(sents)
n_sent += len(sents)
sent_L.append('\n')
stack = []
continue
stack.append(chunk.strip())
if stack:
sents = text_to_sentences(
" ".join(stack).strip().replace('\n', ' ')).split('\n')
sent_L.extend(sents)
n_sent += len(sents)
return sent_L, n_sent
def blingfire_tokenize(text):
return blingfire.text_to_sentences(text).split('\n')
def blingfire_tokenize(text):
return blingfire.text_to_sentences(text).split('\n')
def text2sentences(text: str) -> str:
lines = [line.strip() for line in text.splitlines()]
stack = []
sentences = []
for line in lines:
if line:
stack.append(line)
elif stack: # empty line and non-empty stack
sentences += text_to_sentences(' '.join(stack).strip()).splitlines()
stack = []
return '\n'.join(sentences)