Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))
for dataset in datasets_for_vocab_creation:
if dataset not in all_datasets:
raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")
logger.info("From dataset instances, %s will be considered for vocabulary creation.",
", ".join(datasets_for_vocab_creation))
vocab = Vocabulary.from_params(
params.pop("vocabulary", {}),
(instance for key, dataset in all_datasets.items()
for instance in dataset
if key in datasets_for_vocab_creation)
)
model = Model.from_params(vocab=vocab, params=params.pop('model'))
model = transfer_prev_model_weights_to_new_model(prev_best_model, model)
# Initializing the model can have side effect of expanding the vocabulary
vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))
iterator = DataIterator.from_params(params.pop("iterator"))
iterator.index_with(vocab)
validation_iterator_params = params.pop("validation_iterator", None)
if validation_iterator_params:
validation_iterator = DataIterator.from_params(validation_iterator_params)
validation_iterator.index_with(vocab)
else:
validation_iterator = None
train_data = all_datasets['train']
validation_data = all_datasets.get('validation')
from allennlp.modules import TokenEmbedder
from allennlp.nn import InitializerApplicator, RegularizerApplicator
from allennlp.training.metrics import Average
from overrides import overrides
from scipy import sparse
from tabulate import tabulate
from vampire.common.util import (compute_background_log_frequency, load_sparse,
read_json)
from vampire.modules import VAE
logger = logging.getLogger(__name__)
@Model.register("vampire")
class VAMPIRE(Model):
"""
VAMPIRE is a variational document model for pretraining under low
resource environments.
Parameters
----------
vocab : ``Vocabulary``, required
A Vocabulary, required in order to compute sizes for input/output projections.
bow_embedder : ``TextFieldEmbedder``, required
Used to embed the ``tokens`` ``TextField`` we get as input to the model
into a bag-of-word-counts.
vae : ``VAE``, required
The variational autoencoder used to project the BoW into a latent space.
kl_weight_annealing : ``string``, required
Annealing weight on the KL divergence of ELBO.
Choice between `sigmoid`, `linear` and `constant` annealing.
Embedding,
Seq2SeqEncoder,
Seq2VecEncoder,
TextFieldEmbedder,
TimeDistributed,
)
from allennlp.modules.seq2vec_encoders import BagOfEmbeddingsEncoder
from allennlp.nn import util
from allennlp.semparse import ParsingError
from allennlp.semparse.domain_languages.domain_language import ExecutionError
from allennlp.semparse.domain_languages import WikiTablesLanguage, START_SYMBOL
from allennlp.state_machines.states import GrammarBasedState, GrammarStatelet, RnnStatelet
from allennlp.training.metrics import Average
class WikiTablesSemanticParser(Model):
"""
A ``WikiTablesSemanticParser`` is a :class:`Model` which takes as input a table and a question,
and produces a logical form that answers the question when executed over the table. The
logical form is generated by a `type-constrained`, `transition-based` parser. This is an
abstract class that defines most of the functionality related to the transition-based parser. It
does not contain the implementation for actually training the parser. You may want to train it
using a learning-to-search algorithm, in which case you will want to use
``WikiTablesErmSemanticParser``, or if you have a set of approximate logical forms that give the
correct denotation, you will want to use ``WikiTablesMmlSemanticParser``.
Parameters
----------
vocab : ``Vocabulary``
question_embedder : ``TextFieldEmbedder``
Embedder for questions.
action_embedding_dim : ``int``
import numpy
from allennlp.common import Params
from allennlp.data import Vocabulary
from allennlp.models.model import Model
from allennlp.modules import TextFieldEmbedder, Seq2SeqEncoder, Attention, TimeDistributed
from allennlp.nn import InitializerApplicator
from allennlp.nn.util import get_text_field_mask, weighted_sum
from allennlp.training.metrics import F1Measure, CategoricalAccuracy
from allennlp.modules.similarity_functions.bilinear import BilinearSimilarity
from allennlp.training.metrics import SpanBasedF1Measure
from allennlp.nn.util import sequence_cross_entropy_with_logits
@Model.register("ProLocalModel")
class ProLocalModel(Model):
"""
This ``Model`` takes as input a dataset read by stateChangeDatasetReader
Input: sentence, focus entity, focus verb
Output: state change types for the focus entity, state change tags (mainly before, after locations of focus entity)
The basic outline of this model is to
1. get an embedded representation for the sentence tokens,
2. concatenate each token embedding with verb and entity bits,
3. pass them through bidirectional LSTM Seq2VecEncoder
to create a contextual sentence embedding vector,
4. apply bilinear attention to compute attention weights over sentence tokens
5. apply dense layer to get most likely state_change_type among
{Create, Destroy, Move, None}
Parameters
----------
vocab : ``Vocabulary``
from allennlp.modules import Attention, Seq2SeqEncoder, TextFieldEmbedder, Embedding
from allennlp.nn import util
from allennlp.nn.initializers import InitializerApplicator
from allennlp.nn.regularizers import RegularizerApplicator
from allennlp.semparse.contexts.sql_context_utils import action_sequence_to_sql
from allennlp.state_machines.states import GrammarBasedState
from allennlp.state_machines.transition_functions import BasicTransitionFunction
from allennlp.state_machines import BeamSearch
from allennlp.state_machines.trainers import MaximumMarginalLikelihood
from allennlp.state_machines.states import GrammarStatelet, RnnStatelet
from allennlp.training.metrics import Average
logger = logging.getLogger(__name__)
@Model.register("text2sql_parser")
class Text2SqlParser(Model):
"""
Parameters
----------
vocab : ``Vocabulary``
utterance_embedder : ``TextFieldEmbedder``
Embedder for utterances.
action_embedding_dim : ``int``
Dimension to use for action embeddings.
encoder : ``Seq2SeqEncoder``
The encoder to use for the input utterance.
decoder_beam_search : ``BeamSearch``
Beam search used to retrieve best sequences after training.
max_decoding_steps : ``int``
When we're decoding with a beam search, what's the maximum number of steps we should take?
This only applies at evaluation time, not during training.
#overrides
import torch
from torch.nn.modules.linear import Linear
from allennlp.common.checks import check_dimensions_match
from allennlp.data import Vocabulary
from allennlp.modules import Seq2SeqEncoder, TimeDistributed, TextFieldEmbedder
from allennlp.modules import ConditionalRandomField, FeedForward
from allennlp.modules.conditional_random_field import allowed_transitions
from allennlp.models.model import Model
from allennlp.nn import InitializerApplicator, RegularizerApplicator
import allennlp.nn.util as util
from allennlp.training.metrics import SpanBasedF1Measure
class CrfTagger(Model):
u"""
The ``CrfTagger`` encodes a sequence of text with a ``Seq2SeqEncoder``,
then uses a Conditional Random Field model to predict a tag for each token in the sequence.
Parameters
----------
vocab : ``Vocabulary``, required
A Vocabulary, required in order to compute sizes for input/output projections.
text_field_embedder : ``TextFieldEmbedder``, required
Used to embed the tokens ``TextField`` we get as input to the model.
encoder : ``Seq2SeqEncoder``
The encoder that we will use in between embedding tokens and predicting output tags.
label_namespace : ``str``, optional (default=``labels``)
This is needed to compute the SpanBasedF1Measure metric.
Unless you did something unusual, the default value should be what you want.
feedforward : ``FeedForward``, optional, (default = None).
from allennlp.data import Vocabulary
from allennlp.models.model import Model
from allennlp.modules import Highway
from allennlp.modules import Seq2SeqEncoder, TextFieldEmbedder
from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention
from allennlp.nn import util, InitializerApplicator, RegularizerApplicator
from allennlp.training.metrics import BooleanAccuracy, CategoricalAccuracy
from allennlp.nn.util import masked_softmax
from allennlp_rc.models.util import get_best_span
from allennlp_rc.eval import SquadEmAndF1
@Model.register("qanet")
class QaNet(Model):
"""
This class implements Adams Wei Yu's `QANet Model `_
for machine reading comprehension published at ICLR 2018.
The overall architecture of QANet is very similar to BiDAF. The main difference is that QANet
replaces the RNN encoder with CNN + self-attention. There are also some minor differences in the
modeling layer and output layer.
Parameters
----------
vocab : ``Vocabulary``
text_field_embedder : ``TextFieldEmbedder``
Used to embed the ``question`` and ``passage`` ``TextFields`` we get as input to the model.
num_highway_layers : ``int``
The number of highway layers to use in between embedding the input and passing it through
the phrase layer.
from allennlp.models.model import Model
from allennlp.modules import Attention, TextFieldEmbedder, Seq2SeqEncoder
from allennlp.nn import Activation
from allennlp_semparse.domain_languages import NlvrLanguage
from allennlp_semparse.fields.production_rule_field import ProductionRule
from allennlp_semparse.models.nlvr.nlvr_semantic_parser import NlvrSemanticParser
from allennlp_semparse.state_machines import BeamSearch
from allennlp_semparse.state_machines.states import GrammarBasedState
from allennlp_semparse.state_machines.trainers import MaximumMarginalLikelihood
from allennlp_semparse.state_machines.transition_functions import BasicTransitionFunction
logger = logging.getLogger(__name__)
@Model.register("nlvr_direct_parser")
class NlvrDirectSemanticParser(NlvrSemanticParser):
"""
``NlvrDirectSemanticParser`` is an ``NlvrSemanticParser`` that gets around the problem of lack
of logical form annotations by maximizing the marginal likelihood of an approximate set of target
sequences that yield the correct denotation. The main difference between this parser and
``NlvrCoverageSemanticParser`` is that while this parser takes the output of an offline search
process as the set of target sequences for training, the latter performs search during training.
Parameters
----------
vocab : ``Vocabulary``
Passed to super-class.
sentence_embedder : ``TextFieldEmbedder``
Passed to super-class.
action_embedding_dim : ``int``
Passed to super-class.
Seq2VecEncoder)
from allennlp.modules import ConditionalRandomField, FeedForward, Pruner, Highway
from allennlp.modules.conditional_random_field import allowed_transitions
import allennlp
from allennlp.modules.span_extractors import SelfAttentiveSpanExtractor, EndpointSpanExtractor
from allennlp.models.model import Model
from allennlp.nn import InitializerApplicator, RegularizerApplicator
import allennlp.nn.util as util
from allennlp.training.metrics import CategoricalAccuracy
from modules.span_based_chunker import SpanBasedChunker
from metrics.span_f1 import MySpanF1
@Model.register("soft_dictionary_span_classifier_HSCRF")
class soft_dictionary_span_classifier_HSCRF(Model):
def __init__(self, vocab: Vocabulary,
text_field_embedder: TextFieldEmbedder,
feature_size: int,
max_span_width: int,
encoder: Seq2SeqEncoder,
span_label_namespace: str = "span_tags",
token_label_namespace: str = "token_tags",
calculate_span_f1: bool = None,
verbose_metrics: bool = True,
feedforward: Optional[FeedForward] = None,
initializer: InitializerApplicator = InitializerApplicator(),
regularizer: Optional[RegularizerApplicator] = None,
class_weight=None) -> None:
super().__init__(vocab, regularizer)
from torch.autograd import Variable
from torch.nn.functional import normalize
from allennlp.common import Params
from allennlp.common.checks import check_dimensions_match
from allennlp.data import Vocabulary
from allennlp.models.model import Model
from allennlp.modules import FeedForward
from allennlp.modules import Seq2SeqEncoder, SimilarityFunction, TimeDistributed, TextFieldEmbedder
from allennlp.nn import InitializerApplicator, RegularizerApplicator
from allennlp.nn.util import get_text_field_mask, last_dim_softmax, weighted_sum, replace_masked_values
from allennlp.training.metrics import CategoricalAccuracy
from endtasks import util
from endtasks.modules import VariationalDropout
@Model.register("esim-pair2vec")
class ESIMPair2Vec(Model):
"""
This ``Model`` implements the ESIM sequence model described in `"Enhanced LSTM for Natural Language Inference"
`_
by Chen et al., 2017.
Parameters
----------
vocab : ``Vocabulary``
text_field_embedder : ``TextFieldEmbedder``
Used to embed the ``premise`` and ``hypothesis`` ``TextFields`` we get as input to the
model.
attend_feedforward : ``FeedForward``
This feedforward network is applied to the encoded sentence representations before the
similarity matrix is computed between words in the premise and words in the hypothesis.
similarity_function : ``SimilarityFunction``