Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
import numpy as np
import pickle
from deeppavlov.core.models.serializable import Serializable
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.component import Component
from pathlib import Path
@register('answer_generation_rus')
class AnswerGeneration(Component, Serializable):
"""
Class for generation of answer using triplets with the entity
in the question and relations predicted from the question by the
relation prediction model.
We search a triplet with the predicted relations
"""
def __init__(self, load_path: str, *args, **kwargs) -> None:
super().__init__(save_path = None, load_path = load_path)
self.load()
def load(self) -> None:
load_path = Path(self.load_path).expanduser()
with open(load_path, 'rb') as fl:
self.q_to_name = pickle.load(fl)>>> self.prettify(sent, tags)
1 John _ PROPN _ Number=Sing _ _ _ _
2 really _ ADV _ _ _ _ _ _
3 likes _ VERB _ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin _ _ _ _
4 pizza _ NOUN _ Number=Sing _ _ _ _
5 . _ PUNCT _ _ _ _ _ _
"""
answer = []
for i, (word, tag) in enumerate(zip(tokens, tags)):
answer.append(self.format_string.format(i + 1, word, *make_pos_and_tag(tag)))
if self.return_string:
answer = self.begin + self.sep.join(answer) + self.end
return answer
@register('lemmatized_output_prettifier')
class LemmatizedOutputPrettifier(Component):
"""Class which prettifies morphological tagger output to 4-column
or 10-column (Universal Dependencies) format.
Args:
format_mode: output format,
in `basic` mode output data contains 4 columns (id, word, pos, features),
in `conllu` or `ud` mode it contains 10 columns:
id, word, lemma, pos, xpos, feats, head, deprel, deps, misc
(see http://universaldependencies.org/format.html for details)
Only id, word, tag and pos values are a in current version,
other columns are filled by `_` value.
return_string: whether to return a list of strings or a single string
begin: a string to append in the beginning
end: a string to append in the end
sep: separator between word analysesfrom sklearn.linear_model.logistic import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from scipy.sparse import vstack
from scipy.sparse import csr_matrix
from deeppavlov.core.common.registry import register
from deeppavlov.core.common.log import get_logger
from deeppavlov.core.models.estimator import Estimator
from deeppavlov.core.common.errors import ConfigError
log = get_logger(__name__)
@register("logistic_regression")
class LogReg(Estimator):
"""
The class implements the Logistic Regression Classifier from Sklearn library.
Args:
save_path (str): save path
load_path (str): load path
mode: train/infer trigger
**kwargs: additional arguments
Attributes:
model: Logistic Regression Classifier class from sklearn
"""
def __init__(self, penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1,
class_weight=None, random_state=None, solver='liblinear', max_iter=100, multi_class='ovr',from tqdm import tqdm
from pathlib import Path
from typing import Dict, List, Union, Tuple
from logging import getLogger
from overrides import overrides
from deeppavlov.core.common.registry import register
from deeppavlov.core.data.dataset_reader import DatasetReader
from deeppavlov.core.data.utils import download_decompress, mark_done
log = getLogger()
@register('multiwoz_reader')
class MultiWOZDatasetReader(DatasetReader):
"""
# TODO: add docs
"""
url = 'http://files.deeppavlov.ai/datasets/multiwoz.tar.gz'
DATA_FILES = ['data.json', 'dialogue_acts.json', 'valListFile.json',
'testListFile.json',
'attraction_db.json', 'bus_db.json',
'hospital_db.json', 'hotel_db.json', 'police_db.json',
'restaurant_db.json', 'taxi_db.json', 'train_db.json']
PREPROCESSED = ['data_prep.json']
@classmethod
@overridesfrom enum import Enum
import numpy as np
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.inferable import Inferable
ENTITIES = {
'': None,
'': None,
'': None,
'': None,
}
@register('hcn_et')
class EntityTracker(Inferable):
def __init__(self, entities=copy.deepcopy(ENTITIES)):
self.entities = entities
self.num_features = 4 # tracking 4 entities
self.rating = None
# constants
self.party_sizes = ['1', '2', '3', '4', '5', '6', '7', '8', 'one', 'two', 'three',
'four', 'five', 'six', 'seven', 'eight']
self.locations = ['bangkok', 'beijing', 'bombay', 'hanoi', 'paris', 'rome', 'london',
'madrid', 'seoul', 'tokyo']
self.cuisines = ['british', 'cantonese', 'french', 'indian', 'italian', 'japanese',
'korean', 'spanish', 'thai', 'vietnamese']
self.rest_types = ['cheap', 'expensive', 'moderate']
self.EntType = Enum('Entity Type',def _try_load_spacy_model(model_name: str, disable: Iterable[str] = ()):
disable = set(disable)
try:
model = spacy.load(model_name, disable=disable)
except OSError as e:
try:
model = __import__(model_name).load(disable=disable)
if not isinstance(model, spacy.language.Language):
raise RuntimeError(f'{model_name} is not a spacy model module')
except Exception:
raise e
return model
@register('stream_spacy_tokenizer')
class StreamSpacyTokenizer(Component):
"""Tokenize or lemmatize a list of documents. Default spacy model is **en_core_web_sm**.
Return a list of tokens or lemmas for a whole document.
If is called onto ``List[str]``, performs detokenizing procedure.
Args:
disable: spacy pipeline elements to disable, serves a purpose of performing; if nothing
stopwords: a list of stopwords that should be ignored during tokenizing/lemmatizing
and ngrams creation
batch_size: a batch size for spaCy buffering
ngram_range: size of ngrams to create; only unigrams are returned by default
lemmas: whether to perform lemmatizing or not
lowercase: whether to perform lowercasing or not; is performed by default by :meth:`_tokenize`
and :meth:`_lemmatize` methods
alphas_only: whether to filter out non-alpha tokens; is performed by default by
:meth:`_filter` method# limitations under the License.
from copy import deepcopy
from logging import getLogger
from pathlib import Path
from typing import List, Any
import numpy as np
from deeppavlov.core.common.params_search import ParamsSearch
from deeppavlov.core.common.registry import register
log = getLogger(__name__)
@register('params_evolution')
class ParamsEvolution(ParamsSearch):
"""
Class performs full evolutionary process (task scores -> max):
1. initializes random population
2. makes replacement to get next generation:
a. selection according to obtained scores
b. crossover (recombination) with given probability p_crossover
c. mutation with given mutation rate p_mutation (probability to mutate)
according to given mutation power sigma
(current mutation power is randomly from -sigma to sigma)
Args:
population_size: number of individuums per generation
p_crossover: probability to cross over for current replacement
crossover_power: part of EVOLVING parents parameters to exchange for offsprings
p_mutation: probability of mutation for current replacement"""
import numpy as np
from pathlib import Path
from overrides import overrides
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.component import Component
from deeppavlov.core.common.log import get_logger
from deeppavlov.core.models.serializable import Serializable
from typing import List
log = get_logger(__name__)
@register('dict_emb')
class DictEmbedder(Component, Serializable):
def __init__(self, load_path, save_path=None, dim=100, **kwargs):
super().__init__(save_path=save_path, load_path=load_path)
self.tok2emb = {}
self.dim = dim
self.load()
def save(self, *args, **kwargs):
raise NotImplementedError
def load(self):
"""
Load dictionary of embeddings from file.
"""
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import csv
from pathlib import Path
from typing import List, Tuple, Union, Dict
from deeppavlov.core.common.registry import register
from deeppavlov.core.data.dataset_reader import DatasetReader
@register('ubuntu_v2_mt_reader')
class UbuntuV2MTReader(DatasetReader):
"""The class to read the Ubuntu V2 dataset from csv files taking into account multi-turn dialogue ``context``.
Please, see https://github.com/rkadlec/ubuntu-ranking-dataset-creator.
Args:
data_path: A path to a folder with dataset csv files.
num_context_turns: A maximum number of dialogue ``context`` turns.
padding: "post" or "pre" context sentences padding
"""
def read(self, data_path: str,
num_context_turns: int = 1,
padding: str = "post",
*args, **kwargs) -> Dict[str, List[Tuple[List[str], int]]]:
"""Read the Ubuntu V2 dataset from csv files taking into account multi-turn dialogue ``context``.# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from logging import getLogger
from overrides import overrides
from jsonschema import validate
from deeppavlov.core.common.registry import register
from deeppavlov.core.models.component import Component
log = getLogger(__name__)
@register("chitchat_bot_adapter")
class ChitChatBotAdapter(Component):
"""
Expample of input_data:
[
{
"dialog": [
{
"sender_id": "text",
"sender_class": "text",
"text": "text",
"system": False,
"time": "text",
}
],
"start_time": "text",
"users": [