How to use the sumy.summarizers.text_rank.TextRankSummarizer function in sumy

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / tests / test_summarizers / test_text_rank.py View on Github external
def test_empty_document(self):
        document = build_document()
        summarizer = TextRankSummarizer(Stemmer("english"))

        returned = summarizer(document, 10)
        self.assertEqual(len(returned), 0)
github miso-belica / sumy / tests / test_summarizers / test_text_rank.py View on Github external
def test_three_sentences_but_second_winner(self):
        document = build_document([
            "I am that 1. sentence",
            "And I am 2. sentence - winning sentence",
            "And I am 3. sentence - winner is my 2nd name",
        ])
        summarizer = TextRankSummarizer()
        summarizer.stop_words = ["I", "am", "and", "that"]

        returned = summarizer(document, 1)
        self.assertEqual(len(returned), 1)
        self.assertEqual(to_unicode(returned[0]), "And I am 2. sentence - winning sentence")
github miso-belica / sumy / tests / test_summarizers / test_text_rank.py View on Github external
def test_single_sentence(self):
        document = build_document(("I am one sentence",))
        summarizer = TextRankSummarizer()
        summarizer.stop_words = ("I", "am",)

        returned = summarizer(document, 10)
        self.assertEqual(len(returned), 1)
github miso-belica / sumy / tests / test_summarizers / test_text_rank.py View on Github external
def test_sentences_rating(self):
        document = build_document([
            "a c e g",
            "a b c d e f g",
            "b d f",
        ])
        summarizer = TextRankSummarizer()
        summarizer.stop_words = ["I", "am", "and", "that"]

        ratings = summarizer.rate_sentences(document)
        self.assertEqual(len(ratings), 3)
        self.assertTrue(ratings[document.sentences[1]] > ratings[document.sentences[0]])
        self.assertTrue(ratings[document.sentences[0]] > ratings[document.sentences[2]])
github miso-belica / sumy / sumy / evaluation / __main__.py View on Github external
def build_text_rank(parser, language):
    summarizer = TextRankSummarizer(Stemmer(language))
    summarizer.stop_words = get_stop_words(language)

    return summarizer
github megansquire / masteringDM / ch7 / sumySummarize.py View on Github external
from sumy.utils import get_stop_words

LANGUAGE = "english"
SENTENCES_COUNT = 4

parser = PlaintextParser.from_file("sampleText.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)

print("\n====== Luhn ======")
summarizerLuhn = LuhnSummarizer(stemmer)
summarizerLuhn.stop_words = get_stop_words(LANGUAGE)
for sentenceLuhn in summarizerLuhn(parser.document, SENTENCES_COUNT):
    print(sentenceLuhn, "\n")

print("====== TextRank ======")
summarizerTR = TextRankSummarizer(stemmer)
summarizerTR.stop_words = get_stop_words(LANGUAGE)
for sentenceTR in summarizerTR(parser.document, SENTENCES_COUNT):
    print(sentenceTR, "\n")

print("====== LSA ======")
summarizerLSA = LsaSummarizer(stemmer)
summarizerLSA.stop_words = get_stop_words(LANGUAGE)
for sentenceLSA in summarizerLSA(parser.document, SENTENCES_COUNT):
    print(sentenceLSA, "\n")

print("====== Edmonson ======")
summarizerEd = EdmundsonSummarizer(stemmer)
summarizerEd.bonus_words = ('focus', 'proposed', 'method', 'describes')
summarizerEd.stigma_words = ('example')
summarizerEd.null_words = ('literature', 'however')
for sentenceEd in summarizerEd(parser.document, SENTENCES_COUNT):
github dataiku / dataiku-contrib / text-summarization / custom-recipes / text-summarization-compute / recipe.py View on Github external
def summarize(text):
    if isvalid(text): 
        all_capital = False
        # to avoid that all capital letter sentence gives empty output: we lower all and the upper all later on
        if text.upper() == text:
            text = text.lower()
            all_capital = True
        
        if (sys.version_info > (3,0)):
            parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
        else:
            parser = PlaintextParser.from_string(text.decode('ascii', errors='ignore'), Tokenizer(LANGUAGE))

        stemmer = Stemmer(LANGUAGE)

        summarizer = Summarizer(stemmer)
        summarizer.stop_words = get_stop_words(LANGUAGE)

        sentences = [str(s) for s in summarizer(
            parser.document, sentences_count=n_sentences)]
        
        if all_capital:
            output_sentences = ' '.join(sentences).upper()
            all_capital = False
        else:
            output_sentences = ' '.join(sentences)

        return output_sentences
    else:
        return ''
github miso-belica / sumy / sumy / __main__.py View on Github external
from .summarizers.text_rank import TextRankSummarizer
from .summarizers.lex_rank import LexRankSummarizer
from .summarizers.sum_basic import SumBasicSummarizer
from .summarizers.kl import KLSummarizer
from .nlp.stemmers import Stemmer

PARSERS = {
    "html": HtmlParser,
    "plaintext": PlaintextParser,
}

AVAILABLE_METHODS = {
    "luhn": LuhnSummarizer,
    "edmundson": EdmundsonSummarizer,
    "lsa": LsaSummarizer,
    "text-rank": TextRankSummarizer,
    "lex-rank": LexRankSummarizer,
    "sum-basic": SumBasicSummarizer,
    "kl": KLSummarizer,
}


def main(args=None):
    args = docopt(to_string(__doc__), args, version=__version__)
    summarizer, parser, items_count = handle_arguments(args)

    for sentence in summarizer(parser.document, items_count):
        if PY3:
            print(to_unicode(sentence))
        else:
            print(to_bytes(sentence))