Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import opencc
import sys
import os
progname = os.path.basename(sys.argv[0])
if progname != 'test-opencc.py':
cc = opencc.OpenCC(progname + '.json')
else:
cc = opencc.OpenCC('s2j.json')
if len(sys.argv) == 1:
print (cc.convert("亚"))
else:
print (cc.convert(sys.argv[1]))
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import opencc
import sys
import os
progname = os.path.basename(sys.argv[0])
if progname != 'test-opencc.py':
cc = opencc.OpenCC(progname + '.json')
else:
cc = opencc.OpenCC('s2j.json')
if len(sys.argv) == 1:
print (cc.convert("亚"))
else:
print (cc.convert(sys.argv[1]))
def test_convert():
text = '乾坤一擲'
expect = '乾坤一掷'
assert convert(text) == expect
def init():
global _g_ime_reverse
_g_ime_reverse = ime_reverse()
global _g_ime_single_mode
_g_ime_single_mode = os.path.exists(os.path.join(os.environ["HOME"], ".sdim-single"))
global _g_ime_s2t_mode
_g_ime_s2t_mode = os.path.exists(os.path.join(os.environ["HOME"], ".sdim-s2t"))
global _g_opencc_s2t
global _g_opencc_s2j
try:
import opencc
_g_opencc_s2t = opencc.OpenCC("s2t.json")
_g_opencc_s2j = opencc.OpenCC("s2j.json")
except:
pass
global _g_ime_trans
_g_ime_trans = ime_trans()
global _g_ime_quail
_g_ime_quail = ime_quail()
global _g_ime_history
_g_ime_history = ime_history()
print('ime init complete')
sys.stdout.flush()
if __name__ == '__main__':
os.path.basename(output_file_path))
# KindleGen introduced redundant data, use kindlestrip to remove that.
data_file = file(original_output_path, 'rb').read()
strippedFile = kindlestrip.SectionStripper(data_file)
outf = file(output_file_path, 'wb')
outf.write(strippedFile.getResult())
outf.close()
print "Removing temporary directory %s" % input_path
shutil.rmtree(input_path)
if len(sys.argv) < 2:
print "usage: %s "
sys.exit(1)
with opencc.OpenCC(config="t2s.json") as converter:
(input_file_path, extracted_path, output_file_path) = find_paths(converter)
opf_path = find_opf_path(extracted_path)
if opf_path:
files = find_files_to_convert(extracted_path, opf_path)
if len(files):
convert_files_in_place(converter, files)
repack_files(extracted_path, output_file_path, opf_path)
else:
print "%s is not in Open Packaging Format, abort." % extracted_path
sys.exit(1)
"""
if self.matched == True:
if self.left is not None:
self.left.convert_tree(test_dict)
if self.right is not None:
self.right.convert_tree(test_dict)
else:
test_len = min (self.string_len, test_dict[0])
while test_len != 0:
# Loop through trying successively smaller substrings in the dictionary
for i in range(0, self.string_len - test_len + 1):
if self.string[i:i+test_len] in test_dict[1]:
# Match found.
if i > 0:
# Put everything to the left of the match into the left sub-tree and further process it
self.left = StringTree(self.string[:i])
self.left.convert_tree(test_dict)
if (i+test_len) < self.string_len:
# Put everything to the left of the match into the left sub-tree and further process it
self.right = StringTree(self.string[i+test_len:])
self.right.convert_tree(test_dict)
# Save the dictionary value in this tree
value = test_dict[1][self.string[i:i+test_len]]
if len(value.split(' ')) > 1:
# multiple mapping, use the first one for now
value = value.split(' ')[0]
self.string = value
self.string_len = len(self.string)
self.matched = True
return
test_len -= 1
if self.right is not None:
self.right.convert_tree(test_dict)
else:
test_len = min (self.string_len, test_dict[0])
while test_len != 0:
# Loop through trying successively smaller substrings in the dictionary
for i in range(0, self.string_len - test_len + 1):
if self.string[i:i+test_len] in test_dict[1]:
# Match found.
if i > 0:
# Put everything to the left of the match into the left sub-tree and further process it
self.left = StringTree(self.string[:i])
self.left.convert_tree(test_dict)
if (i+test_len) < self.string_len:
# Put everything to the left of the match into the left sub-tree and further process it
self.right = StringTree(self.string[i+test_len:])
self.right.convert_tree(test_dict)
# Save the dictionary value in this tree
value = test_dict[1][self.string[i:i+test_len]]
if len(value.split(' ')) > 1:
# multiple mapping, use the first one for now
value = value.split(' ')[0]
self.string = value
self.string_len = len(self.string)
self.matched = True
return
test_len -= 1
Given a Chinese language string, return a list of alfred items for each of the results
'''
index = 0
results = []
config_list = [
('t2s.json', u'繁體到簡體', 'SimplifiedChinese.png'),
('s2t.json', u'簡體到繁體', 'TraditionalChinese.png'),
('s2tw.json', u'簡體到臺灣正體', 'TW_taiwan.png'),
('tw2s.json', u'臺灣正體到簡體', 'CN_china.png'),
('s2hk.json', u'簡體到香港繁體', 'HK_hongKong.png'),
('hk2s.json', u'香港繁體(香港小學學習字詞表標準)到簡體', 'CN_china.png'),
('s2twp.json', u'簡體到繁體(臺灣正體標準)並轉換爲臺灣常用詞彙', 'TW_taiwan.png'),
]
for config_file, description, icon in config_list:
converter = opencc.OpenCC(config=config_file, opencc_path='/usr/local/bin/opencc')
item_value = converter.convert(query_str)
results.append(alfred.Item(
title=item_value,
subtitle=description,
attributes={
'uid': alfred.uid(index),
'arg': item_value,
},
icon=icon,
))
index += 1
return results
# -*- coding: utf-8 -*-
import json
import jieba
import pickle
import numpy as np
from tqdm import tqdm
from gensim.models.word2vec import Word2Vec
from opencc import OpenCC
jieba.set_dictionary('dict/dict.txt.big')
w2v = Word2Vec.load('word2vec/zh.bin')
s2t, t2s = OpenCC('s2twp'), OpenCC('tw2sp')
def toW2V(s):
offset = 0
offsets = []
for i, w in enumerate(s):
ws = t2s.convert(w)
wt = s2t.convert(w)
if w in w2v.wv:
s[i] = w2v.wv[w]
elif ws in w2v.wv:
s[i] = w2v.wv[ws]
elif wt in w2v.wv:
s[i] = w2v.wv[wt]
else:
s[i] = np.zeros((300, ))
offsets.append(offset)
offset += len(w)
help='Read original text from .')
parser.add_argument('-o', '--output', metavar='',
help='Write converted text to .')
parser.add_argument('-c', '--config', metavar='',
help='Configuration file')
parser.add_argument('--in-enc', metavar='', default='UTF-8',
help='Encoding for input')
parser.add_argument('--out-enc', metavar='', default='UTF-8',
help='Encoding for output')
args = parser.parse_args()
if args.config is None:
print("Please specify a configuration file.", file=sys.stderr)
return 1
cc = OpenCC(args.config)
with io.open(args.input if args.input else 0, encoding=args.in_enc) as f:
input_str = f.read()
output_str = cc.convert(input_str)
with io.open(args.output if args.output else 1, 'w',
encoding=args.out_enc) as f:
f.write(output_str)
return 0