Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def create_tokenizer(tokenizer_type, model_path, vocab_path):
if tokenizer_type == 'spm':
return tokenizers.create(tokenizer_type, model_path=model_path, vocab=vocab_path)
elif tokenizer_type == 'subword_nmt':
return tokenizers.create(tokenizer_type, codec_path=model_path, vocab_path=vocab_path)
elif tokenizer_type == 'yttm':
return tokenizers.create(tokenizer_type, model_path=model_path)
elif tokenizer_type == 'hf_bytebpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
elif tokenizer_type == 'hf_wordpiece':
return tokenizers.create(tokenizer_type, vocab_file=vocab_path)
elif tokenizer_type == 'hf_bpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
else:
raise NotImplementedError
def create_tokenizer(tokenizer_type, model_path, vocab_path):
if tokenizer_type == 'whitespace':
return tokenizers.create(tokenizer_type, vocab=Vocab.load(vocab_path))
elif tokenizer_type == 'spm':
return tokenizers.create(tokenizer_type, model_path=model_path, vocab=vocab_path)
elif tokenizer_type == 'subword_nmt':
return tokenizers.create(tokenizer_type, codec_path=model_path, vocab_path=vocab_path)
elif tokenizer_type == 'yttm':
return tokenizers.create(tokenizer_type, model_path=model_path)
elif tokenizer_type == 'hf_bytebpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
elif tokenizer_type == 'hf_wordpiece':
return tokenizers.create(tokenizer_type, vocab_file=vocab_path)
elif tokenizer_type == 'hf_bpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
else:
raise NotImplementedError
def create_tokenizer(tokenizer_type, model_path, vocab_path):
if tokenizer_type == 'spm':
return tokenizers.create(tokenizer_type, model_path=model_path, vocab=vocab_path)
elif tokenizer_type == 'subword_nmt':
return tokenizers.create(tokenizer_type, codec_path=model_path, vocab_path=vocab_path)
elif tokenizer_type == 'yttm':
return tokenizers.create(tokenizer_type, model_path=model_path)
elif tokenizer_type == 'hf_bytebpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
elif tokenizer_type == 'hf_wordpiece':
return tokenizers.create(tokenizer_type, vocab_file=vocab_path)
elif tokenizer_type == 'hf_bpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
else:
raise NotImplementedError
def create_tokenizer(tokenizer_type, model_path, vocab_path):
if tokenizer_type == 'spm':
return tokenizers.create(tokenizer_type, model_path=model_path, vocab=vocab_path)
elif tokenizer_type == 'subword_nmt':
return tokenizers.create(tokenizer_type, codec_path=model_path, vocab_path=vocab_path)
elif tokenizer_type == 'yttm':
return tokenizers.create(tokenizer_type, model_path=model_path)
elif tokenizer_type == 'hf_bytebpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
elif tokenizer_type == 'hf_wordpiece':
return tokenizers.create(tokenizer_type, vocab_file=vocab_path)
elif tokenizer_type == 'hf_bpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
else:
raise NotImplementedError
def create_tokenizer(tokenizer_type, model_path, vocab_path):
if tokenizer_type == 'whitespace':
return tokenizers.create(tokenizer_type, vocab=Vocab.load(vocab_path))
elif tokenizer_type == 'spm':
return tokenizers.create(tokenizer_type, model_path=model_path, vocab=vocab_path)
elif tokenizer_type == 'subword_nmt':
return tokenizers.create(tokenizer_type, codec_path=model_path, vocab_path=vocab_path)
elif tokenizer_type == 'yttm':
return tokenizers.create(tokenizer_type, model_path=model_path)
elif tokenizer_type == 'hf_bytebpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
elif tokenizer_type == 'hf_wordpiece':
return tokenizers.create(tokenizer_type, vocab_file=vocab_path)
elif tokenizer_type == 'hf_bpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
else:
raise NotImplementedError
def create_tokenizer(tokenizer_type, model_path, vocab_path):
if tokenizer_type == 'spm':
return tokenizers.create(tokenizer_type, model_path=model_path, vocab=vocab_path)
elif tokenizer_type == 'subword_nmt':
return tokenizers.create(tokenizer_type, codec_path=model_path, vocab_path=vocab_path)
elif tokenizer_type == 'yttm':
return tokenizers.create(tokenizer_type, model_path=model_path)
elif tokenizer_type == 'hf_bytebpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
elif tokenizer_type == 'hf_wordpiece':
return tokenizers.create(tokenizer_type, vocab_file=vocab_path)
elif tokenizer_type == 'hf_bpe':
return tokenizers.create(tokenizer_type, merges_file=model_path, vocab_file=vocab_path)
else:
raise NotImplementedError