How to use the sumy.nlp.stemmers.Stemmer function in sumy

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / tests / test_summarizers / test_reduction.py View on Github external
def test_empty_document():
    document = build_document()
    summarizer = ReductionSummarizer(Stemmer("english"))

    returned = summarizer(document, 10)
    assert len(returned) == 0
github megansquire / masteringDM / ch7 / sumySummarize.py View on Github external
@author: megan squire
"""
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.edmundson import EdmundsonSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

LANGUAGE = "english"
SENTENCES_COUNT = 4

parser = PlaintextParser.from_file("sampleText.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)

print("\n====== Luhn ======")
summarizerLuhn = LuhnSummarizer(stemmer)
summarizerLuhn.stop_words = get_stop_words(LANGUAGE)
for sentenceLuhn in summarizerLuhn(parser.document, SENTENCES_COUNT):
    print(sentenceLuhn, "\n")

print("====== TextRank ======")
summarizerTR = TextRankSummarizer(stemmer)
summarizerTR.stop_words = get_stop_words(LANGUAGE)
for sentenceTR in summarizerTR(parser.document, SENTENCES_COUNT):
    print(sentenceTR, "\n")

print("====== LSA ======")
summarizerLSA = LsaSummarizer(stemmer)
summarizerLSA.stop_words = get_stop_words(LANGUAGE)
github jjangsangy / ExplainToMe / ExplainToMe / textrank.py View on Github external
def run_summarizer(parser, sentences, language='english'):
    """
    :params parser: Parser for selected document type
    :params sentences: Maximum sentences for summarizer.

    :returns summary: Summarized page.
    """

    summarizer = Summarizer(Stemmer(language))
    summarizer.stop_words = get_stop_words(language)
    return [str(sentence)
            for sentence in summarizer(parser.document, sentences)]
github soprasteria / cybersecurity-dfm / dfm / feed.py View on Github external
except Exception as e:
                results.add_error({'url':url,'lib':last_lib,'message':str(e)})

        #detect lang of the text
        try:
            lang_detect=detect(text)
        except Exception as e:
            results.add_error({'url':url,'lib':last_lib,'message':str(e)})
            lang_detect=""

        #generate summary
        sumy_summary=""
        sum_title=""
        if lang_detect!="":
            parser = PlaintextParser.from_string(text, Tokenizer(self.LANGUAGES[lang_detect]))
            stemmer = Stemmer(self.LANGUAGES[lang_detect])

            summarizer = Summarizer(stemmer)
            summarizer.stop_words = get_stop_words(self.LANGUAGES[lang_detect])

            #build title from summary
            try:
                for sentence in summarizer(parser.document, 1):
                    sum_title+=sentence.__unicode__()
            # build summary
                for sentence in summarizer(parser.document, self.SENTENCES_COUNT):
                    sumy_summary+=sentence.__unicode__()+u"\n"
            except:
                sumy_summary=""

        doc={"link":url,"content":[{"base":url,"language":lang_detect}]}
github miso-belica / sumy / sumy / __main__.py View on Github external
parser = PARSERS[document_format or "plaintext"]
        document_content = args["--text"]
    else:
        parser = PARSERS[document_format or "plaintext"]
        document_content = default_input_stream.read()

    items_count = ItemsCount(args["--length"])

    language = args["--language"]
    if args['--stopwords']:
        stop_words = read_stop_words(args['--stopwords'])
    else:
        stop_words = get_stop_words(language)

    parser = parser(document_content, Tokenizer(language))
    stemmer = Stemmer(language)

    summarizer_class = next(cls for name, cls in AVAILABLE_METHODS.items() if args[name])
    summarizer = build_summarizer(summarizer_class, stop_words, stemmer, parser)

    return summarizer, parser, items_count
github jim-schwoebel / voicebook / chapter_5_generation / generate_summary.py View on Github external
summarizer.stop_words = get_stop_words(LANGUAGE)

    # now summarize: output as [txtfile]_summary.txt
    g=open(textfile[0:-4]+'_summary.txt','w')
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
        g.write(str(sentence))
    g.close()
    os.system('open %s'%(textfile[0:-4]+'_summary.txt'))
elif ftype in ['w']:
    # for URLS
    url=input('what link would you like to summarize on Wikipedia? \n')
    parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
    # for plaintext
    #parser = PlaintextParser.from_file("poetry.txt", Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)
    summarizer = Summarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)

    # now summarize: output as [txtfile]_summary.txt
    g=open('web_summary.txt','w')
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
        g.write(str(sentence))
    g.close()
    os.system('open web_summary.txt')
github hoaxanalyzer / hoax-search-vote / query_builder / backup / summarization.py View on Github external
def main():
    filename = sys.argv[1]
    # or for plain text files
    fp = open(filename)
    content = fp.read()
    fp.close()

    fp2 = open(filename, "w")
    result = content.decode("ascii", "replace").replace(u"\ufffd", " ")
    fp2.write(result)
    fp2.close()  

    parser = PlaintextParser.from_file(filename, Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)

    summarizer = Summarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)

    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)