Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main():
argv = sys.argv
if len(argv) < 2:
usage()
return
algorithm = 'english'
if len(argv) > 2:
algorithm = argv[1]
argv = argv[2:]
else:
argv = argv[1:]
stemmer = snowballstemmer.stemmer(algorithm)
splitter = re.compile(r"[\s\.-]")
for arg in argv:
for word in splitter.split(arg):
if word == '':
continue
original = word.lower()
print(original + " -> " + stemmer.stemWord(original))
main()
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('turkish')
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('swedish')
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('romanian')
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('hungarian')
'twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,'
'will,with,would,yet,you,your').lower().split(',')
def is_stopword(str):
'''文字がストップワードかどうかを返す
大小文字は同一視する
戻り値:
ストップワードならTrue、違う場合はFalse
'''
return str.lower() in stop_words
# 素性抽出
stemmer = snowballstemmer.stemmer('english')
word_counter = Counter()
with codecs.open(fname_sentiment, 'r', fencoding) as file_in:
for line in file_in:
for word in line[3:].split(' '): # line[3:]で極性ラベル除去
# 前後の空白文字除去
word = word.strip()
# ストップワード除去
if is_stopword(word):
continue
# ステミング
word = stemmer.stemWord(word)
'''
serialVersionUID = 1
a_0 = [
Among(u"", -1, 6),
Among(u"U", 0, 2),
Among(u"Y", 0, 1),
Among(u"\u00E4", 0, 3),
Among(u"\u00F6", 0, 4),
Among(u"\u00FC", 0, 5)
]
a_1 = [
Among(u"e", -1, 2),
Among(u"em", -1, 1),
Among(u"en", -1, 2),
Among(u"ern", -1, 1),
Among(u"er", -1, 1),
Among(u"s", -1, 3),
Among(u"es", 5, 2)
]
a_2 = [
Among(u"en", -1, 1),
Among(u"er", -1, 1),
Among(u"st", -1, 2),
Among(u"est", 2, 1)
]
a_3 = [
Among(u"ig", -1, 1),
Among(u"lich", -1, 1)
Among(u"imento", -1, 6),
Among(u"ivo", -1, 9),
Among(u"it\u00E0", -1, 8),
Among(u"ist\u00E0", -1, 1),
Among(u"ist\u00E8", -1, 1),
Among(u"ist\u00EC", -1, 1)
]
a_7 = [
Among(u"isca", -1, 1),
Among(u"enda", -1, 1),
Among(u"ata", -1, 1),
Among(u"ita", -1, 1),
Among(u"uta", -1, 1),
Among(u"ava", -1, 1),
Among(u"eva", -1, 1),
Among(u"iva", -1, 1),
Among(u"erebbe", -1, 1),
Among(u"irebbe", -1, 1),
Among(u"isce", -1, 1),
Among(u"ende", -1, 1),
Among(u"are", -1, 1),
Among(u"ere", -1, 1),
Among(u"ire", -1, 1),
Among(u"asse", -1, 1),
Among(u"ate", -1, 1),
Among(u"avate", 16, 1),
Among(u"evate", 16, 1),
Among(u"ivate", 16, 1),
Among(u"ete", -1, 1),
Among(u"erete", 20, 1),
Among(u"irete", 20, 1),
serialVersionUID = 1
a_0 = [
Among(u"", -1, 6),
Among(u"\u00E1", 0, 1),
Among(u"\u00E9", 0, 2),
Among(u"\u00ED", 0, 3),
Among(u"\u00F3", 0, 4),
Among(u"\u00FA", 0, 5)
]
a_1 = [
Among(u"la", -1, -1),
Among(u"sela", 0, -1),
Among(u"le", -1, -1),
Among(u"me", -1, -1),
Among(u"se", -1, -1),
Among(u"lo", -1, -1),
Among(u"selo", 5, -1),
Among(u"las", -1, -1),
Among(u"selas", 7, -1),
Among(u"les", -1, -1),
Among(u"los", -1, -1),
Among(u"selos", 10, -1),
Among(u"nos", -1, -1)
]
a_2 = [
Among(u"ando", -1, 6),
Among(u"iendo", -1, 6),
Among(u"yendo", -1, 7),
Among(u"\u00E1ndo", -1, 2),
a_1 = [
Among(u"", -1, 3),
Among(u"a~", 0, 1),
Among(u"o~", 0, 2)
]
a_2 = [
Among(u"ic", -1, -1),
Among(u"ad", -1, -1),
Among(u"os", -1, -1),
Among(u"iv", -1, 1)
]
a_3 = [
Among(u"ante", -1, 1),
Among(u"avel", -1, 1),
Among(u"\u00EDvel", -1, 1)
]
a_4 = [
Among(u"ic", -1, 1),
Among(u"abil", -1, 1),
Among(u"iv", -1, 1)
]
a_5 = [
Among(u"ica", -1, 1),
Among(u"\u00E2ncia", -1, 1),
Among(u"\u00EAncia", -1, 4),
Among(u"ira", -1, 9),
Among(u"adora", -1, 1),
Among(u"osa", -1, 1),