How to use the flair.embeddings.WordEmbeddings function in flair

To help you get started, we’ve selected a few flair examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github flairNLP / flair / tests / test_model_integration.py View on Github external
def test_train_load_use_tagger_multicorpus(results_base_path, tasks_base_path):
    corpus_1 = flair.datasets.ColumnCorpus(
        data_folder=tasks_base_path / "fashion", column_format={0: "text", 2: "ner"}
    )
    corpus_2 = flair.datasets.GERMEVAL(base_path=tasks_base_path)

    corpus = MultiCorpus([corpus_1, corpus_2])
    tag_dictionary = corpus.make_tag_dictionary("ner")

    embeddings = WordEmbeddings("turian")

    tagger: SequenceTagger = SequenceTagger(
        hidden_size=64,
        embeddings=embeddings,
        tag_dictionary=tag_dictionary,
        tag_type="ner",
        use_crf=False,
    )

    # initialize trainer
    trainer: ModelTrainer = ModelTrainer(tagger, corpus)

    trainer.train(
        results_base_path,
        learning_rate=0.1,
        mini_batch_size=2,
github flairNLP / flair / tests / test_text_classifier_trainer.py View on Github external
def test_text_classifier_single_label(tasks_base_path):
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, tasks_base_path)
    label_dict = corpus.make_label_dictionary()

    glove_embedding: WordEmbeddings = WordEmbeddings('en-glove')
    document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1, False, 64, False, False)

    model = TextClassifier(document_embeddings, label_dict, False)

    trainer = TextClassifierTrainer(model, corpus, label_dict, False)
    trainer.train('./results', max_epochs=2)

    sentence = Sentence("Berlin is a really nice city.")

    for s in model.predict(sentence):
        for l in s.labels:
            assert(l.value is not None)
            assert(0.0 <= l.score <= 1.0)
            assert(type(l.score) is float)

    # clean up results directory
github flairNLP / flair / tests / test_model_integration.py View on Github external
def test_train_classifier_with_sampler(results_base_path, tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(tasks_base_path / "imdb")
    label_dict = corpus.make_label_dictionary()

    word_embedding: WordEmbeddings = WordEmbeddings("turian")
    document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
        [word_embedding], 32, 1, False, 64, False, False
    )

    model: TextClassifier = TextClassifier(document_embeddings, label_dict, False)

    trainer = ModelTrainer(model, corpus)
    trainer.train(
        results_base_path,
        max_epochs=2,
        shuffle=False,
        sampler=ImbalancedClassificationDatasetSampler,
    )

    sentence = Sentence("Berlin is a really nice city.")
github flairNLP / flair / tests / test_text_classifier_trainer.py View on Github external
def test_text_classifier_mulit_label(tasks_base_path):
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB, tasks_base_path)
    label_dict = corpus.make_label_dictionary()

    glove_embedding: WordEmbeddings = WordEmbeddings('en-glove')
    document_embeddings: DocumentMeanEmbeddings = DocumentMeanEmbeddings([glove_embedding])

    model = TextClassifier(document_embeddings, label_dict, True)

    trainer = TextClassifierTrainer(model, corpus, label_dict, False)
    trainer.train('./results', max_epochs=2)

    sentence = Sentence("Berlin is a really nice city.")

    for s in model.predict(sentence):
        for l in s.labels:
            assert(l.value is not None)
            assert(0.0 <= l.score <= 1.0)
            assert(type(l.score) is float)

    # clean up results directory
github ZihanWangKi / CrossWeigh / flair_scripts / flair_ner.py View on Github external
if args.include_weight:
    model_folder += '_w'
# print(column_format)
corpus: Corpus = NLPTaskDataFetcher.load_column_corpus(data_folder,
                                                       column_format=column_format,
                                                       tag_to_biloes="ner")

tag_type = 'ner'

tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)

embedding_types: List[TokenEmbeddings] = [

    # GloVe embeddings
    WordEmbeddings('glove'),

    # contextual string embeddings, forward
    FlairEmbeddings('news-forward'),
    # PooledFlairEmbeddings('news-forward', pooling='min'),

    # contextual string embeddings, backward
    FlairEmbeddings('news-backward'),
    # PooledFlairEmbeddings('news-backward', pooling='min'),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

tagger: SequenceTagger = WeightedSequenceTagger(hidden_size=256,
                                                embeddings=embeddings,
                                                tag_dictionary=tag_dictionary,
                                                tag_type=tag_type)
github alexandrainst / danlp / danlp / models / embeddings.py View on Github external
def load_context_embeddings_with_flair(direction='bi', word_embeddings=True,
                                       cache_dir=DEFAULT_CACHE_DIR,
                                       verbose=False):
    """
    :param bidirectional:
    :param cache_dir:
    :param verbose:
    """
    from flair.embeddings import FlairEmbeddings
    from flair.embeddings import WordEmbeddings
    from flair.embeddings import StackedEmbeddings

    embeddings = []

    if word_embeddings:
        fasttext_embedding = WordEmbeddings('da')
        embeddings.append(fasttext_embedding)

    if direction == 'bi' or direction == 'fwd':
        fwd_weight_path = download_model('flair.fwd', cache_dir,
                                         verbose=verbose,
                                         process_func=_unzip_process_func)
        embeddings.append(FlairEmbeddings(fwd_weight_path))

    if direction == 'bi' or direction == 'bwd':
        bwd_weight_path = download_model('flair.bwd', cache_dir,
                                         verbose=verbose,
                                         process_func=_unzip_process_func)
        embeddings.append(FlairEmbeddings(bwd_weight_path))

    if len(embeddings) == 1:
        return embeddings[0]
github flairNLP / flair / textc.py View on Github external
from flair.data import TaggedCorpus
from flair.data_fetcher import NLPTaskDataFetcher, NLPTask
from flair.embeddings import WordEmbeddings, FlairEmbeddings, DocumentLSTMEmbeddings
from flair.models import TextClassifier
from flair.trainers import ModelTrainer


# 1. get the corpus
corpus: TaggedCorpus = NLPTaskDataFetcher.load_corpus(NLPTask.AG_NEWS, 'agnews/').downsample(0.1)

# 2. create the label dictionary
label_dict = corpus.make_label_dictionary()

# 3. make a list of word embeddings
word_embeddings = [WordEmbeddings('glove'),

                   # comment in flair embeddings for state-of-the-art results 
                    FlairEmbeddings('news-forward'),
                    FlairEmbeddings('news-backward'),
                   ]

# 4. init document embedding by passing list of word embeddings
document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings(word_embeddings,
                                                                     hidden_size=512,
                                                                     reproject_words=True,
                                                                     reproject_words_dimension=256,
                                                                     
                                                                     )

# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict, multi_label=False, attention=True)
github prrao87 / fine-grained-sentiment / training / train_flair.py View on Github external
"""Train sentiment model using Flair NLP library:
    https://github.com/zalandoresearch/flair/blob/master/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md

    To help provide added context, we can stack Glove, Bert or ELMo embeddings along with Flair embeddings.
    """
    # pip install flair allennlp
    from flair.datasets import ClassificationCorpus
    from flair.embeddings import FlairEmbeddings, DocumentRNNEmbeddings
    from flair.models import TextClassifier
    from flair.trainers import ModelTrainer
    from flair.training_utils import EvaluationMetric
    from flair.visual.training_curves import Plotter

    if stack == "glove":
        from flair.embeddings import WordEmbeddings
        stacked_embedding = WordEmbeddings('glove')
    elif stack == "elmo":
        from flair.embeddings import ELMoEmbeddings
        stacked_embedding = ELMoEmbeddings('original')
    elif stack == "bert":
        from flair.embeddings import BertEmbeddings
        stacked_embedding = BertEmbeddings('bert-base-cased')
    else:
        stacked_embedding = None

    # Define and Load corpus from the provided dataset
    train, dev, test = filenames
    corpus = ClassificationCorpus(
        file_path,
        train_file=train,
        dev_file=dev,
        test_file=test,
github undertheseanlp / ner / egs / vlsp2016_flair / train_wv_character.py View on Github external
train_file="train.txt",
                                                              test_file="test.txt",
                                                              dev_file="dev.txt")
print(corpus)

# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary.idx2item)

# 4. initialize embeddings
embedding_types: List[TokenEmbeddings] = [
    CharacterEmbeddings(),
    WordEmbeddings("tmp/glove.1.8G.bin")

]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=1024,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

# 6. initialize trainer
from flair.trainers import SequenceTaggerTrainer
github dcavar / Flair-JSON-NLP / flairjsonnlp / __init__.py View on Github external
def get_embeddings(embeddings: List[str], character: bool, lang: str, bpe_size: int) -> StackedEmbeddings:
    """To Construct and return a embedding model"""
    stack = []
    for e in embeddings:
        if e != '':
            if 'forward' in e or 'backward' in e:
                stack.append(FlairEmbeddings(e))
            else:
                stack.append(WordEmbeddings(e))
    if character:
        stack.append(CharacterEmbeddings())
    if bpe_size > 0:
        stack.append(BytePairEmbeddings(language=lang, dim=bpe_size))

    return StackedEmbeddings(embeddings=stack)