Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_stats():
text = "the quick fox and the cat. The turtle and the rabbit."
doc = make_spacy_doc(text, lang=SPACY_MODEL)
stats = nlp.compute_stats(doc)
assert stats.counts.sentences == 2
assert stats.counts.words == 11
def empty_spacy_doc():
return textacy.make_spacy_doc("", lang="en")
def test_terms():
text = "the quick fox and the cat. The turtle and the rabbit."
doc = make_spacy_doc(text, lang=SPACY_MODEL)
terms = nlp.extract_key_terms(doc, num_terms=5)
terms = [t[0] for t in terms] # remove scores
assert 'fox' in terms
assert 'cat' in terms
assert 'turtle' in terms
assert 'rabbit' in terms
def empty_spacy_doc():
return textacy.make_spacy_doc("", lang="en")
def spacy_doc():
ds = datasets.CapitolWords()
text = next(ds.texts(min_len=1500, limit=1))
return textacy.make_spacy_doc(text, lang="en")
def filter_content(self, titles):
if self._content is not None:
ct = 0
for document in self._content.documents:
metadata = {}
try:
if any(self._content.titles[ct] in s for s in titles):
metadata['title'] = self._content.titles[ct]
# self._corpus.add_text(
# textacy.preprocess_text(document, lowercase=True, no_punct=True, no_numbers=True),
# metadata=metadata)
#doc_text = textacy.preprocess_text(document, lowercase=True, no_punct=True, no_numbers=True)
doc_text = preprocessing.replace.replace_numbers(preprocessing.remove.remove_punctuation(document), 'NUM').lower()
doc = textacy.make_spacy_doc((doc_text, metadata), lang=self._en)
self._corpus.add_doc(doc)
except IndexError:
metadata['title'] = 'Empty'
ct += 1
self.load_matrix()
def main(input_file):
# content property returns the entire text and the documents returns the array of documents
data = ReadData()
data.read_file(input_file)
q = Qrmine()
all_interviews = Content(data.content)
q.content = data
## Summary
click.echo(" ".join(all_interviews.generate_summary(2)))
click.echo("_________________________________________")
doc = textacy.make_spacy_doc(all_interviews.doc)
## Sentiment
s = Sentiment()
x = []
for sentence in doc.sents:
if len(sentence) > 3:
x.append(sentence.text)
sent = s.sentiment_analyzer_scores(sentence.text)
click.echo("{:-<40} {}\n".format(sent["sentence"], str(sent["score"])))
click.echo("{:-<40} {}\n".format(sentence.text, str(s.similarity(sentence.text, "Dummy sentence"))))
## Network
n = Network()
click.echo(n.sents_to_network(x))
# n.draw_graph(True)
click.echo(n.draw_graph(False))
if len(tags) > 0:
ct = 0
for title in data.titles:
for tag in tags:
if title == tag:
click.echo(tag)
content = data.documents[ct]
ct += 1
interview = Content(content)
doc = textacy.make_spacy_doc(interview.doc)
return q.print_categories(doc, num)
else:
all_interviews = Content(data.content)
doc = textacy.make_spacy_doc(all_interviews.doc)
return q.print_categories(doc, num)
if len(tags) > 0:
ct = 0
for title in data.titles:
for tag in tags:
if title == tag:
click.echo(tag)
content = data.documents[ct]
ct += 1
interview = Content(content)
doc = textacy.make_spacy_doc(interview.doc)
return q.print_categories(doc, num)
else:
all_interviews = Content(data.content)
doc = textacy.make_spacy_doc(all_interviews.doc)
return q.print_categories(doc, num)