Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import abstractmethod
from .component import Component
from .serializable import Serializable
class Estimator(Component, Serializable):
"""Abstract class for components that could be fitted on the data as a whole."""
@abstractmethod
def fit(self, *args, **kwargs):
pass
def save(self) -> None:
main_component = self.get_main_component()
if isinstance(main_component, Serializable):
main_component.save()
from abc import ABCMeta, abstractmethod
from logging import getLogger
from pathlib import Path
from typing import List, Union, Iterator
import numpy as np
from overrides import overrides
from deeppavlov.core.data.utils import zero_pad
from deeppavlov.core.models.component import Component
from deeppavlov.core.models.serializable import Serializable
log = getLogger(__name__)
class Embedder(Component, Serializable, metaclass=ABCMeta):
"""
Class implements fastText embedding model
Args:
load_path: path where to load pre-trained embedding model from
pad_zero: whether to pad samples or not
Attributes:
model: model instance
tok2emb: dictionary with already embedded tokens
dim: dimension of embeddings
pad_zero: whether to pad sequence of tokens with zeros or not
mean: whether to return one mean embedding vector per sample
load_path: path with pre-trained fastText binary model
"""
def __init__(self, load_path: Union[str, Path], pad_zero: bool = False, mean: bool = False, **kwargs) -> None:
import sys
import csv
import pickle as pkl
from logging import getLogger
import numpy as np
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.serializable import Serializable
from bert_dp.preprocessing import InputFeatures
logger = getLogger(__name__)
@register('response_base_loader')
class ResponseBaseLoader(Serializable):
"""Class for loading a base with text responses (and contexts) and their vector representations."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.resps = None
self.resp_features = None
self.resp_vecs = None
self.conts = None
self.cont_features = None
self.cont_vecs = None
self.load()
def load(self):
if self.load_path is not None:
resp_file = self.load_path / "responses.csv"
if resp_file.exists():
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import abstractmethod
from .component import Component
from .serializable import Serializable
class NNModel(Component, Serializable):
"""Abstract class for deep learning components."""
@abstractmethod
def train_on_batch(self, x: list, y: list):
pass
def process_event(self, event_name, data):
pass
import numpy as np
from pathlib import Path
from overrides import overrides
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.component import Component
from deeppavlov.core.common.log import get_logger
from deeppavlov.core.models.serializable import Serializable
from typing import List
log = get_logger(__name__)
@register('dict_emb')
class DictEmbedder(Component, Serializable):
def __init__(self, load_path, save_path=None, dim=100, **kwargs):
super().__init__(save_path=save_path, load_path=load_path)
self.tok2emb = {}
self.dim = dim
self.load()
def save(self, *args, **kwargs):
raise NotImplementedError
def load(self):
"""
Load dictionary of embeddings from file.
"""
if not Path(self.load_path).exists():
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
import numpy as np
import pickle
from deeppavlov.core.models.serializable import Serializable
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.component import Component
from pathlib import Path
@register('answer_generation_rus')
class AnswerGeneration(Component, Serializable):
"""
Class for generation of answer using triplets with the entity
in the question and relations predicted from the question by the
relation prediction model.
We search a triplet with the predicted relations
"""
def __init__(self, load_path: str, *args, **kwargs) -> None:
super().__init__(save_path = None, load_path = load_path)
self.load()
def load(self) -> None:
load_path = Path(self.load_path).expanduser()
with open(load_path, 'rb') as fl:
self.q_to_name = pickle.load(fl)
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Tuple
from pathlib import Path
import numpy as np
from deeppavlov.core.models.serializable import Serializable
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.component import Component
@register('kb_answer_parser')
class KBAnswerParser(Component, Serializable):
"""
Class for generation of answer using triplets with the entity
in the question and relations predicted from the question by the
relation prediction model.
We search a triplet with the predicted relations
"""
def __init__(self, load_path: str, top_k_classes: int, classes_vocab_keys: Tuple, *args, **kwargs) -> None:
super().__init__(save_path=None, load_path=load_path)
self.top_k_classes = top_k_classes
self.classes = list(classes_vocab_keys)
self.names_dict = None
self.load()
def __call__(self, relations_probs: List[List[str]],
entity_triplets: List[List[List[str]]],
from scipy.sparse import csr_matrix
from sklearn.linear_model import LogisticRegression
from deeppavlov.core.common.registry import register
from deeppavlov.core.common.log import get_logger
from deeppavlov.core.models.estimator import Estimator
from deeppavlov.core.common.file import save_pickle
from deeppavlov.core.common.file import load_pickle
from deeppavlov.core.commands.utils import expand_path, make_all_dirs
from deeppavlov.core.models.serializable import Serializable
logger = get_logger(__name__)
@register("logreg_classifier")
class LogregClassifier(Estimator, Serializable):
"""
Logistic Regression Classifier
Parameters:
top_n: how many top answers classifier'll return for input vectorized question
c: regularization strength in logistic regression model
penalty: regularization penalty type in logistic regression model
save_path: path to save the model
load_path: path to load the model
Returns:
None
"""
def __init__(self, top_n: int = 1, c: int = 1, penalty: str = 'l2', save_path: str = None, load_path: str = None, **kwargs) -> None:
self.save_path = save_path
self.load_path = load_path
from scipy.sparse import vstack
from scipy.sparse import csr_matrix
from deeppavlov.core.common.registry import register
from deeppavlov.core.common.log import get_logger
from deeppavlov.core.models.estimator import Estimator
from deeppavlov.core.common.file import save_pickle
from deeppavlov.core.common.file import load_pickle
from deeppavlov.core.commands.utils import expand_path, make_all_dirs
from deeppavlov.core.models.serializable import Serializable
logger = get_logger(__name__)
@register("cos_sim_classifier")
class CosineSimilarityClassifier(Estimator, Serializable):
"""
Classifier based on cosine similarity between vectorized sentences
Parameters:
save_path: path to save the model
load_path: path to load the model
Returns:
None
"""
def __init__(self, top_n: int = 1, save_path: str = None, load_path: str = None, **kwargs) -> None:
self.save_path = save_path
self.load_path = load_path
self.top_n = top_n
if kwargs['mode'] != 'train':