Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def compile(args):
params = {key: value for key, value in args.compiler_params}
dict_type = args.dict_type
if dict_type == 'json':
dictionary = keyvi.JsonDictionaryCompiler(params)
elif dict_type == 'string':
dictionary = keyvi.StringDictionaryCompiler(params)
elif dict_type == 'int':
dictionary = keyvi.IntDictionaryCompiler(params)
elif dict_type == 'completion':
dictionary = keyvi.CompletionDictionaryCompiler(params)
elif dict_type == 'key-only':
dictionary = keyvi.KeyOnlyDictionaryCompiler(params)
else:
return 'Must never reach here'
with open(args.input_file) as file_in:
for line in file_in:
line = line.rstrip('\n')
try:
splits = line.split('\t')
if dict_type == 'key-only':
dictionary.Add(splits[0])
elif dict_type == 'int' or dict_type == 'completion':
dictionary.Add(splits[0], int(splits[1]))
else:
dictionary.Add(splits[0], splits[1])
if not PERMUTATION_LOOKUP_TABLE.has_key(length):
yield query
return
for permutation in PERMUTATION_LOOKUP_TABLE[len(query_tokens_bow)]:
if len(permutation) < 3:
first_token = query_tokens_bow[permutation[0]]
if first_token != query_tokens[permutation[0]] and len(first_token) == 1:
continue
yield " ".join([query_tokens_bow[i] for i in permutation]) + MULTIWORD_QUERY_SEPARATOR + query
if __name__ == '__main__':
pipeline = []
pipeline.append(MultiWordPermutation())
c = keyvi.CompletionDictionaryCompiler()
for line in sys.stdin:
key, weight = line.split("\t")
for q in reduce(lambda x, y: y(x), pipeline, key):
c.Add(q, int(weight))
c.Compile()
c.WriteToFile("mw-completion.keyvi")