Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _doc2features(doc, i) -> dict:
word = doc[i][0]
postag = doc[i][1]
# Features from current word
features = {
"word.word": word,
"word.stopword": _is_stopword(word),
"word.isthai": isthai(word),
"word.isspace": word.isspace(),
"postag": postag,
"word.isdigit": word.isdigit(),
}
if word.isdigit() and len(word) == 5:
features["word.islen5"] = True
# Features from previous word
if i > 0:
prevword = doc[i - 1][0]
prevpostag = doc[i - 1][1]
prev_features = {
"word.prevword": prevword,
"word.previsspace": prevword.isspace(),
"word.previsthai": isthai(prevword),
"word.prevstopword": _is_stopword(prevword),
"word.isthai": isthai(word),
"word.isspace": word.isspace(),
"postag": postag,
"word.isdigit": word.isdigit(),
}
if word.isdigit() and len(word) == 5:
features["word.islen5"] = True
# Features from previous word
if i > 0:
prevword = doc[i - 1][0]
prevpostag = doc[i - 1][1]
prev_features = {
"word.prevword": prevword,
"word.previsspace": prevword.isspace(),
"word.previsthai": isthai(prevword),
"word.prevstopword": _is_stopword(prevword),
"word.prevpostag": prevpostag,
"word.prevwordisdigit": prevword.isdigit(),
}
features.update(prev_features)
else:
features["BOS"] = True # Special "Beginning of Sequence" tag
# Features from next word
if i < len(doc) - 1:
nextword = doc[i + 1][0]
nextpostag = doc[i + 1][1]
next_features = {
"word.nextword": nextword,
"word.nextisspace": nextword.isspace(),
"word.nextpostag": nextpostag,
"word.prevpostag": prevpostag,
"word.prevwordisdigit": prevword.isdigit(),
}
features.update(prev_features)
else:
features["BOS"] = True # Special "Beginning of Sequence" tag
# Features from next word
if i < len(doc) - 1:
nextword = doc[i + 1][0]
nextpostag = doc[i + 1][1]
next_features = {
"word.nextword": nextword,
"word.nextisspace": nextword.isspace(),
"word.nextpostag": nextpostag,
"word.nextisthai": isthai(nextword),
"word.nextstopword": _is_stopword(nextword),
"word.nextwordisdigit": nextword.isdigit(),
}
features.update(next_features)
else:
features["EOS"] = True # Special "End of Sequence" tag
return features