How to use the flair.data.Sentence function in flair

To help you get started, we’ve selected a few flair examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github flairNLP / flair / tests / test_data.py View on Github external
assert text == sentence.to_original_text()

    text = ":    nation on"
    sentence = Sentence(text, use_tokenizer=segtok_tokenizer)
    assert text == sentence.to_original_text()

    text = "I love Berlin."
    sentence = Sentence(text)
    assert text == sentence.to_original_text()

    text = 'Schartau sagte dem " Tagesspiegel " vom Freitag , Fischer sei " in einer Weise aufgetreten , die alles andere als überzeugend war " .'
    sentence = Sentence(text)
    assert text == sentence.to_original_text()

    text = 'Schartau sagte dem " Tagesspiegel " vom Freitag , Fischer sei " in einer Weise aufgetreten , die alles andere als überzeugend war " .'
    sentence = Sentence(text, use_tokenizer=segtok_tokenizer)
    assert text == sentence.to_original_text()
github flairNLP / flair / tests / test_language_model_trainer.py View on Github external
# init forward LM with 128 hidden states and 1 layer
    language_model: LanguageModel = LanguageModel(dictionary, is_forward_lm=True, hidden_size=128, nlayers=1)

    # get the example corpus and process at character level in forward direction
    corpus: TextCorpus = TextCorpus(str(Path(__file__).parent / 'resources/corpora/lorem_ipsum'),
                                    dictionary,
                                    language_model.is_forward_lm,
                                    character_level=True)

    # train the language model
    trainer: LanguageModelTrainer = LanguageModelTrainer(language_model, corpus)
    trainer.train('./results', sequence_length=10, mini_batch_size=10, max_epochs=5)

    # use the character LM as embeddings to embed the example sentence 'I love Berlin'
    char_lm_embeddings = CharLMEmbeddings('./results/best-lm.pt')
    sentence = Sentence('I love Berlin')
    char_lm_embeddings.embed(sentence)
    print(sentence[1].embedding.size())

    # clean up results directory
    shutil.rmtree('./results', ignore_errors=True)
github flairNLP / flair / tests / test_transformer_embeddings.py View on Github external
def embed_sentence(
        sentence: str,
        pooling_operation,
        layers: str = "1",
        use_scalar_mix: bool = False,
    ) -> Sentence:
        embeddings = CamembertEmbeddings(
            pretrained_model_name_or_path=camembert_model,
            layers=layers,
            pooling_operation=pooling_operation,
            use_scalar_mix=use_scalar_mix,
        )
        flair_sentence = Sentence(sentence)
        embeddings.embed(flair_sentence)

        return flair_sentence
github flairNLP / flair / tests / test_transformer_embeddings.py View on Github external
def embed_sentence(
        sentence: str,
        pooling_operation,
        layers: str = "1",
        use_scalar_mix: bool = False,
    ) -> Sentence:
        embeddings = OpenAIGPTEmbeddings(
            pretrained_model_name_or_path=gpt_model,
            layers=layers,
            pooling_operation=pooling_operation,
            use_scalar_mix=use_scalar_mix,
        )
        flair_sentence = Sentence(sentence)
        embeddings.embed(flair_sentence)

        return flair_sentence
github flairNLP / flair / tests / test_transformer_embeddings.py View on Github external
def embed_sentence(
        sentence: str,
        pooling_operation,
        layers: str = "1",
        use_scalar_mix: bool = False,
    ) -> Sentence:
        embeddings = RoBERTaEmbeddings(
            pretrained_model_name_or_path=roberta_model,
            layers=layers,
            pooling_operation=pooling_operation,
            use_scalar_mix=use_scalar_mix,
        )
        flair_sentence = Sentence(sentence)
        embeddings.embed(flair_sentence)

        return flair_sentence
github flairNLP / flair / tests / test_data.py View on Github external
def test_sentence_to_real_string(tasks_base_path):
    sentence: Sentence = Sentence("I love Berlin.", use_tokenizer=segtok_tokenizer)
    assert "I love Berlin." == sentence.to_plain_string()

    corpus = flair.datasets.GERMEVAL(base_path=tasks_base_path)

    sentence = corpus.train[0]
    assert (
        'Schartau sagte dem " Tagesspiegel " vom Freitag , Fischer sei " in einer Weise aufgetreten , die alles andere als überzeugend war " .'
        == sentence.to_tokenized_string()
    )
    assert (
        'Schartau sagte dem "Tagesspiegel" vom Freitag, Fischer sei "in einer Weise aufgetreten, die alles andere als überzeugend war".'
        == sentence.to_plain_string()
    )

    sentence = corpus.train[1]
    assert (
github flairNLP / flair / tests / test_transformer_embeddings.py View on Github external
def embed_sentence(
        sentence: str,
        pooling_operation,
        layers: str = "1",
        use_scalar_mix: bool = False,
    ) -> Sentence:
        embeddings = OpenAIGPT2Embeddings(
            pretrained_model_name_or_path=gpt_model,
            layers=layers,
            pooling_operation=pooling_operation,
            use_scalar_mix=use_scalar_mix,
        )
        flair_sentence = Sentence(sentence)
        embeddings.embed(flair_sentence)

        return flair_sentence
github dcavar / Flair-JSON-NLP / flairjsonnlp / __init__.py View on Github external
def get_sentences(text, lang, use_ontonotes, fast, use_embeddings, char_embeddings, bpe_size, expressions, pos, sentiment) -> List[Sentence]:
        """Process text using Flair and return the output from Flair"""

        if lang not in ('en', 'multi', 'de', 'nl', 'fr'):
            raise TypeError(
                f'{lang} is not supported! Try multi. See https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_2_TAGGING.md')

        # tokenize sentences
        sentences = []
        for s in segment(text):
            sentence = Sentence()
            sentences.append(sentence)
            for t in s:
                sentence.add_token(Token(t.value, start_position=t.offset, whitespace_after=t.space_after))

        # run models
        for model in get_models(lang=lang, use_ontonotes=use_ontonotes, fast=fast, expressions=expressions, pos=pos, sentiment=sentiment):
            model.predict(sentences)

        # load embedding models
        if use_embeddings or char_embeddings or bpe_size > 0:
            get_embeddings([e.strip() for e in use_embeddings.split(',')], char_embeddings, lang, bpe_size).embed(sentences)

        return sentences
github fishjh2 / merge_label / mg_lb / data_loading / fl_embeds.py View on Github external
def sent_to_flair(sent):
    """
    Convert a tokenized sentence (list of words) to a Flair sentence object
    """
    sentence = Sentence()

    for w in sent:
        token = Token(w)
        sentence.add_token(token)
        sentence.infer_space_after()

    return sentence
github flairNLP / flair / flair / datasets.py View on Github external
def __getitem__(self, index: int = 0) -> Sentence:
        text = self.texts[index]
        return Sentence(text, use_tokenizer=self.use_tokenizer)