Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
similarity_algorithm=2, filtering_algorithm=1,
number_of_terms=1000, simplify_terms=False, model=None,
data_dump_path=None):
"""returns a pair similarity dictionary for the map and set of terms in the map. Heatmap can
be calculated seperately and then overlaid. Will need to convert dictionary representation
to dot file format"""
flattened = flatten(structured_nps)
set_status('ranking terms', model=model)
if start_words is not None:
# start words should be a list like ["machine learning", "artificial intelligence"]
start_words = [tuple(s.split()) for s in start_words]
ranked_phrases, phrase_frequencies, scored_phrases = call_rank(ranking_algorithm, flattened, number_of_terms, start_words=start_words, model=model)
else:
ranked_phrases, phrase_frequencies, scored_phrases = call_rank(ranking_algorithm, flattened, number_of_terms, model=model)
if simplify_terms:
structured_nps = simplification.term_replacement(structured_nps, ranked_phrases)
set_status('calculating similarity', model=model)
sim_matrix, phrase_lookups = call_similarity(similarity_algorithm, structured_nps, ranked_phrases, model=model, status_callback=lambda s: set_status(s, model=model))
if data_dump_path:
import pickle
from os.path import join
def prefix_path(rel):
return join(data_dump_path, rel)
with open(prefix_path('sim_matrix.pickle'), 'w') as f:
pickle.dump(sim_matrix, f)
with open(prefix_path('phrase_lookups.pickle'), 'w') as f:
pickle.dump(phrase_lookups, f)
with open(prefix_path('phrase_frequencies.pickle'), 'w') as f:
pickle.dump(phrase_frequencies, f)
phrase_pairs = call_filter(filtering_algorithm, sim_matrix, phrase_lookups, model=model)
normed = similarity.similarity_dict_to_distance(phrase_pairs)
# build set of terms in graph