Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#add extra space to make things easier
text = " " + text + " "
#ending quotes
text = re.sub(r'"', " '' ", text)
text = re.sub(r'(\S)(\'\')', r'\1 \2 ', text)
# Split on contractions and clitics.
if self.tokenization_style == 'cintil':
text = contr.split_contractions(text, self.contractions,
use_cintil_format=True)
text = clit.split_clitics(text, self.clitics, self.suffixes,
use_cintil_format=True)
else:
text = contr.split_contractions(text, self.contractions,
use_cintil_format=False)
text = clit.split_clitics(text, self.clitics, self.suffixes,
use_cintil_format=False)
text = re.sub(" +", " ", text)
text = text.strip()
#add space at end to match up with MacIntyre's output (for debugging)
if text != "":
text += " "
return text.split()