Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
parser.add_argument('--afix-freq-cut',
type=int,
default=5,
help='only allow afixes which appear >= freq-cut')
parser.add_argument('--char-freq-cut',
type=int,
default=5,
help='only allow characters which appear >= freq-cut')
parser.add_argument('--mode',
choices=['train', 'test'],
default='train')
args = parser.parse_args()
if args.mode == 'train':
TrainingDataCreator.create_traindata(args)
else:
TrainingDataCreator.create_testdata(args)
def convert_keyaki_to_json(keyakipath):
return TrainingDataCreator.convert_json(keyakipath)
def create_testdata(args):
self = TrainingDataCreator(args.PATH,
args.word_freq_cut,
args.char_freq_cut,
args.cat_freq_cut)
trees = [tree for tree in read_keyaki(self.filepath)]
# trees = [] # TODO
# for line in open(self.filepath):
# try:
# trees.append(KeyakiParser(line.strip()).parse())
# except Exception:
# continue
self._create_samples(trees)
with open(args.OUT / 'testdata.json', 'w') as f:
logger.info(f'writing to {f.name}')
json.dump(self.samples, f)
default=5,
help='only allow afixes which appear >= freq-cut')
parser.add_argument('--char-freq-cut',
type=int,
default=5,
help='only allow characters which appear >= freq-cut')
parser.add_argument('--mode',
choices=['train', 'test'],
default='train')
args = parser.parse_args()
if args.mode == 'train':
TrainingDataCreator.create_traindata(args)
else:
TrainingDataCreator.create_testdata(args)
def create_traindata(args):
self = TrainingDataCreator(args.PATH,
args.word_freq_cut,
args.char_freq_cut,
args.cat_freq_cut)
trees = [tree for tree in read_keyaki(self.filepath)]
# trees = [] # TODO
# for line in open(self.filepath):
# try:
# trees.append(KeyakiParser(line.strip()).parse())
# except Exception:
# continue
for tree in trees:
self._traverse(tree)
self._create_samples(trees)
def convert_json(autopath):
self = TrainingDataCreator(autopath, None, None, None)
trees = [tree for tree in read_keyaki(self.filepath)]
logger.info(f'loaded {len(trees)} trees')
self._create_samples(trees)
return self.samples