How to use the sumy.parsers.plaintext.PlaintextParser.from_string function in sumy

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / tests / test_summarizers / test_lsa.py View on Github external
def test_real_example():
    """Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
    parser = PlaintextParser.from_string(
        load_resource("snippets/prevko.txt"),
        Tokenizer("czech")
    )
    summarizer = LsaSummarizer(Stemmer("czech"))
    summarizer.stop_words = get_stop_words("czech")

    sentences = summarizer(parser.document, 2)
    assert len(sentences) == 2
github miso-belica / sumy / tests / test_parsers.py View on Github external
def test_parse_plaintext_long(self):
        parser = PlaintextParser.from_string("""
            Ako sa máš? Ja dobre! A ty? No
            mohlo to byť aj lepšie!!! Ale pohodička.

            TOTO JE AKOŽE NADPIS
            A toto je text pod ním, ktorý je textový.
            A tak ďalej...

            VEĽKOLEPÉ PREKVAPENIE
            Tretí odstavec v tomto texte je úplne o ničom. Ale má
            vety a to je hlavné. Takže sa majte na pozore ;-)

            A tak ďalej...


            A tak este dalej!
        """, Tokenizer("czech"))
github miso-belica / sumy / tests / test_summarizers / test_lex_rank.py View on Github external
def test_article_example():
    """Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
    parser = PlaintextParser.from_string(
        load_resource("articles/prevko_cz_1.txt"),
        Tokenizer("czech")
    )
    summarizer = LexRankSummarizer(stem_word)
    summarizer.stop_words = get_stop_words("czech")

    sentences = summarizer(parser.document, 20)
    assert len(sentences) == 20
github sidhusmart / WACAO / webwhatsapi / __init__.py View on Github external
inputLine = ''
        for message in messages:
            if '\\/' not in message:
                inputLine = inputLine + message['message'] + '. '
        # blob = TextBlob(inputLine)
        # wordCounts = blob.word_counts
        # sortedWordCounts = sorted(wordCounts, key=wordCounts.get, reverse=True)
        # outputLine = " ".join(sortedWordCounts[:5])
        # outputLine = groupName.capitalize() + " summarized as " + outputLine
        # self.send_to_whatsapp_id("WACAO!",outputLine)

        LANGUAGE = "english"
        SENTENCES_COUNT = '20%'

        outputLine = groupName.capitalize() + " summarized as: \n"
        parser = PlaintextParser.from_string(inputLine, Tokenizer(LANGUAGE))
        stemmer = Stemmer(LANGUAGE)
        summarizer = LsaSummarizer(stemmer)
        summarizer.stop_words = get_stop_words(LANGUAGE)
        for sentence in summarizer(parser.document, SENTENCES_COUNT):
            outputLine = outputLine + unicode(str(sentence), "utf-8") + "\n"
        self.send_to_whatsapp_id("WACAO!",outputLine)
        # print "sum_basic:"
github DeFacto / DeFacto / python / trustworthiness / features_core.py View on Github external
def get_summary_lex_rank(self,num_sentence):
        from sumy.parsers.plaintext import PlaintextParser  # other parsers available for HTML etc.
        from sumy.nlp.tokenizers import Tokenizer
        from sumy.summarizers.lex_rank import LexRankSummarizer  # We're choosing Lexrank, other algorithms are also built in

        try:
            parser = HtmlParser.from_url(self.url, Tokenizer("english"))
        except:
            try:
                parser = PlaintextParser.from_string(self.body, Tokenizer("english"))
            except Exception as e:
                raise(e)

        summarizer = LexRankSummarizer()
        summary = summarizer(parser.document, num_sentence)
        out=''
        for sentence in summary:
            out+= str(sentence)
        return out
github OpenGenus / vidsum / code / sum.py View on Github external
def summarize(srt_file, n_sentences, language="english"):
    """ Generate segmented summary

    Args:
        srt_file(str) : The name of the SRT FILE
        n_sentences(int): No of sentences
        language(str) : Language of subtitles (default to English)

    Returns:
        list: segment of subtitles

    """
    parser = PlaintextParser.from_string(
        srt_to_txt(srt_file), Tokenizer(language))
    stemmer = Stemmer(language)
    summarizer = LsaSummarizer(stemmer)
    summarizer.stop_words = get_stop_words(language)
    segment = []
    for sentence in summarizer(parser.document, n_sentences):
        index = int(re.findall("\(([0-9]+)\)", str(sentence))[0])
        item = srt_file[index]
        segment.append(srt_segment_to_range(item))
    return segment