How to use the flair.models.SequenceTagger function in flair

To help you get started, we’ve selected a few flair examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github flairNLP / flair / tests / test_sequence_labeler_trainer.py View on Github external
def test_training(tasks_base_path):

    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.FASHION, tasks_base_path)
    tag_dictionary = corpus.make_tag_dictionary('ner')

    embeddings = WordEmbeddings('glove')

    tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                            embeddings=embeddings,
                                            tag_dictionary=tag_dictionary,
                                            tag_type='ner',
                                            use_crf=False)

    # initialize trainer
    trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus, test_mode=True)

    trainer.train('./results', learning_rate=0.1, mini_batch_size=2, max_epochs=10)

    # clean up results directory
    shutil.rmtree('./results')
github undertheseanlp / ner / egs / vlsp2016_flair / train_memory.py View on Github external
# WordEmbeddings('glove'),

    # comment in this line to use character embeddings
    # CharacterEmbeddings(),

    # comment in these lines to use contextual string embeddings
    # CharLMEmbeddings('news-forward'),
    # CharLMEmbeddings('news-backward'),
]

# embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)
embeddings = MemoryEmbeddings(tag_type=tag_type, tag_dictionary=tag_dictionary)
# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

# 6. initialize trainer
from flair.trainers import SequenceTaggerTrainer

trainer: SequenceTaggerTrainer = SequenceTaggerTrainer(tagger, corpus)

# 7. start training
trainer.train('resources/taggers/example-ner',
              learning_rate=0.1,
              mini_batch_size=128,
              max_epochs=150)
github ELS-RD / anonymisation / flair_train.py View on Github external
corpus: Corpus = prepare_flair_train_dev_corpus(
        spacy_model=nlp, data_folder=data_folder, dev_size=dev_size, nb_segment=nb_segment, segment=segment
    )
    tag_dictionary = corpus.make_tag_dictionary(tag_type="ner")
    print(tag_dictionary.idx2item)

    embedding_types: List[TokenEmbeddings] = [
        WordEmbeddings("fr"),
        FlairEmbeddings("fr-forward"),
        FlairEmbeddings("fr-backward"),
    ]

    embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

    tagger: SequenceTagger = SequenceTagger(
        hidden_size=256, embeddings=embeddings, use_crf=True, tag_dictionary=tag_dictionary, tag_type="ner"
    )

    trainer: ModelTrainer = ModelTrainer(model=tagger, corpus=corpus, use_tensorboard=False)

    # TODO optimize LR https://github.com/flairNLP/flair/blob/master/resources/docs/TUTORIAL_8_MODEL_OPTIMIZATION.md
    trainer.train(
        model_folder,
        max_epochs=nb_epochs,
        learning_rate=0.1,
        mini_batch_size=32,
        embeddings_storage_mode="cpu",
        checkpoint=False,
    )
github RasaHQ / rasa / ner-evaluation / evaluation / evaluate_flair.py View on Github external
tag_type = "ner"

    tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)

    embedding_types: List[TokenEmbeddings] = [
        WordEmbeddings("glove"),
        FlairEmbeddings("news-forward"),
        FlairEmbeddings("news-backward"),
    ]

    embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

    # 5. initialize sequence tagger
    from flair.models import SequenceTagger

    tagger: SequenceTagger = SequenceTagger(
        hidden_size=256,
        embeddings=embeddings,
        tag_dictionary=tag_dictionary,
        tag_type=tag_type,
        use_crf=True,
    )

    # 6. initialize trainer
    from flair.trainers import ModelTrainer

    trainer: ModelTrainer = ModelTrainer(tagger, corpus)

    # 7. start training
    trainer.train(model_path, learning_rate=0.1, mini_batch_size=16, max_epochs=10)
github ELS-RD / anonymisation / flair_generate_data.py View on Github external
#  under the License.
import os
import random
from typing import List

import spacy
from flair.data import Sentence, build_spacy_tokenizer
from flair.models import SequenceTagger

from ner.model_factory import get_tokenizer
from resources.config_provider import get_config_default
from xml_extractions.extract_node_values import Paragraph, get_paragraph_from_file

random.seed(5)

tagger: SequenceTagger = SequenceTagger.load('resources/flair_ner/ca/best-model.pt')

config_training = get_config_default()
nlp = spacy.blank('fr')
nlp.tokenizer = get_tokenizer(nlp)
tokenizer = build_spacy_tokenizer(nlp)

xml_train_path = "../similar_legal_case/data/jurica_original_xml/arrets-juri"  # config_training["xml_train_path"]
files = [os.path.join(path, name) for path, _, files in os.walk(xml_train_path) for name in files]
random.shuffle(files)

with open("./resources/training_data/generated_annotations.txt", mode='w') as generated_text:
    with open("./resources/training_data/generated_annotations.ent", mode='w') as generated_entities:
        for filename in files:
            if filename.endswith(".xml"):
                try:
                    print(f"--- {filename} ---")
github RasaHQ / rasa / rasa / nlu / extractors / flair_entity_extractor.py View on Github external
tag_type = "ner"

        tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)

        embedding_types: List[TokenEmbeddings] = []

        if self.component_config["use_glove_embeddings"]:
            embedding_types.append(WordEmbeddings("glove"))

        if self.component_config["use_flair_embeddings"]:
            embedding_types.append(FlairEmbeddings("news-forward"))
            embedding_types.append(FlairEmbeddings("news-backward"))

        embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

        tagger: SequenceTagger = SequenceTagger(
            hidden_size=self.component_config["hidden_size"],
            embeddings=embeddings,
            tag_dictionary=tag_dictionary,
            tag_type=tag_type,
            use_crf=True,
        )

        trainer: ModelTrainer = ModelTrainer(tagger, corpus)

        trainer.train(
            self.model_path,
            learning_rate=self.component_config["learning_rate"],
            mini_batch_size=self.component_config["mini_batch_size"],
            max_epochs=self.component_config["max_epochs"],
        )
github ELS-RD / anonymisation / flair_generate_html_from_xml.py View on Github external
sentences: List[Sentence] = list()
    with tqdm(total=len(filenames), unit=" XML", desc="Parsing XML") as progress_bar:
        for filename in filenames:
            paragraphs: List[Paragraph] = get_paragraph_from_file(
                path=os.path.join(data_folder, filename), keep_paragraph_without_annotation=True
            )
            if len(paragraphs) > top_n:
                for paragraph in paragraphs[:top_n]:
                    if len(paragraph.text) > 0:
                        s = Sentence(text=paragraph.text, tokenizer=tokenizer)
                        sentences.append(s)
            progress_bar.update()
    if len(sentences) == 0:
        raise Exception("No example loaded, causes: no cases in provided path or sample size is to high")

    tagger: SequenceTagger = SequenceTagger.load(os.path.join(model_folder, "best-model.pt"))
    _ = tagger.predict(sentences=sentences, mini_batch_size=32, verbose=True)

    print("prepare html")
    page_html = render_ner_html(sentences, colors=colors)
    print("write html")
    with open("sentence.html", "w") as writer:
        writer.write(page_html)