Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def analyze_ngrams(transcripts, grouping_fn=ngram.group_text_by_category, output_dir=None, n=5):
if output_dir is None:
output_dir = stats_output
if not os.path.exists(output_dir):
os.makedirs(output_dir)
top_ngrams_by_agent = []
for agent_type in args.agent_types:
grouped_utterances = grouping_fn(transcripts, agent_type)
analyzer = ngram.NgramAnalyzer(grouped_utterances, n=n, agent_type=agent_type)
top_ngrams_by_cat = analyzer.analyze()
top_ngrams_by_agent.append(top_ngrams_by_cat)
ngram.plot_top_ngrams(top_ngrams_by_agent, agents=args.agent_types,
output_dir=output_dir,
# tf_idf_by_winner(transcripts)
tf_idf_dir = os.path.join(stats_output, 'tfidf')
if not os.path.exists(tf_idf_dir):
os.makedirs(tf_idf_dir)
n_range = xrange(2, 4)
for i in n_range:
analyze_tf_idf(transcripts,
grouping_fn=tf_idf.group_by_category_role_winner,
n=i,
output_dir=os.path.join(tf_idf_dir, 'by_winner'))
if args.ngram:
ngram_dir = os.path.join(stats_output, 'ngram')
if not os.path.exists(ngram_dir):
os.makedirs(ngram_dir)
analyze_ngrams(transcripts, grouping_fn=ngram.group_text_by_category,
output_dir=os.path.join(ngram_dir, 'by_category'))
analyze_ngrams(transcripts, grouping_fn=ngram.group_text_by_role,
output_dir=os.path.join(ngram_dir, 'by_role'))
analyze_ngrams(transcripts, grouping_fn=ngram.group_text_by_winner,
output_dir=os.path.join(ngram_dir, 'by_winner'))