Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from ludwig.models.modules.loss_modules import sampled_softmax_cross_entropy
from ludwig.models.modules.loss_modules import weighted_softmax_cross_entropy
from ludwig.models.modules.measure_modules import accuracy as get_accuracy
from ludwig.models.modules.measure_modules import hits_at_k as get_hits_at_k
from ludwig.utils.math_utils import int_type
from ludwig.utils.math_utils import softmax
from ludwig.utils.metrics_utils import ConfusionMatrix
from ludwig.utils.misc import set_default_value
from ludwig.utils.strings_utils import UNKNOWN_SYMBOL
from ludwig.utils.strings_utils import create_vocabulary
logger = logging.getLogger(__name__)
class CategoryBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = CATEGORY
preprocessing_defaults = {
'most_common': 10000,
'lowercase': False,
'missing_value_strategy': FILL_WITH_CONST,
'fill_value': UNKNOWN_SYMBOL
}
@staticmethod
def get_feature_meta(column, preprocessing_parameters):
idx2str, str2idx, str2freq, _ = create_vocabulary(
column, 'stripped',
num_most_frequent=preprocessing_parameters['most_common'],
import numpy as np
import tensorflow as tf
from dateutil.parser import parse
from ludwig.constants import *
from ludwig.features.base_feature import BaseFeature
from ludwig.features.base_feature import InputFeature
from ludwig.models.modules.date_encoders import DateEmbed, DateWave
from ludwig.utils.misc import set_default_value, get_from_registry
logger = logging.getLogger(__name__)
DATE_VECTOR_LENGTH = 9
class DateBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = DATE
preprocessing_defaults = {
'missing_value_strategy': FILL_WITH_CONST,
'fill_value': '',
'datetime_format': None
}
@staticmethod
def get_feature_meta(column, preprocessing_parameters):
return {
'preprocessing': preprocessing_parameters
}
from ludwig.constants import *
from ludwig.features.base_feature import BaseFeature
from ludwig.features.base_feature import InputFeature
from ludwig.models.modules.image_encoders import ResNetEncoder
from ludwig.models.modules.image_encoders import Stacked2DCNN
from ludwig.utils.data_utils import get_abs_path
from ludwig.utils.image_utils import greyscale
from ludwig.utils.image_utils import num_channels_in_image
from ludwig.utils.image_utils import resize_image
from ludwig.utils.misc import get_from_registry
from ludwig.utils.misc import set_default_value
logger = logging.getLogger(__name__)
class ImageBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = IMAGE
preprocessing_defaults = {
'missing_value_strategy': BACKFILL,
'in_memory': True,
'resize_method': 'interpolate',
'scaling': 'pixel_normalization',
'num_processes': 1
}
@staticmethod
def get_feature_meta(column, preprocessing_parameters):
return {
'preprocessing': preprocessing_parameters
from ludwig.features.sequence_feature import SequenceOutputFeature
from ludwig.models.modules.measure_modules import absolute_error
from ludwig.models.modules.measure_modules import error
from ludwig.models.modules.measure_modules import r2
from ludwig.models.modules.measure_modules import squared_error
from ludwig.utils.misc import get_from_registry
from ludwig.utils.misc import set_default_value
from ludwig.utils.strings_utils import tokenizer_registry
logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
class TimeseriesBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = TIMESERIES
preprocessing_defaults = {
'timeseries_length_limit': 256,
'padding_value': 0,
'padding': 'right',
'tokenizer': 'space',
'missing_value_strategy': FILL_WITH_CONST,
'fill_value': ''
}
@staticmethod
def get_feature_meta(column, preprocessing_parameters):
tokenizer = get_from_registry(
from ludwig.features.base_feature import OutputFeature
from ludwig.models.modules.fully_connected_modules import fc_layer
from ludwig.models.modules.initializer_modules import get_initializer
from ludwig.models.modules.measure_modules import \
absolute_error as get_absolute_error
from ludwig.models.modules.measure_modules import error as get_error
from ludwig.models.modules.measure_modules import r2 as get_r2
from ludwig.models.modules.measure_modules import \
squared_error as get_squared_error
from ludwig.utils.misc import set_default_value
from ludwig.utils.misc import set_default_values
logger = logging.getLogger(__name__)
class NumericalBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = NUMERICAL
preprocessing_defaults = {
'missing_value_strategy': FILL_WITH_CONST,
'fill_value': 0,
'normalization': None
}
@staticmethod
def get_feature_meta(column, preprocessing_parameters):
if preprocessing_parameters['normalization'] is not None:
if preprocessing_parameters['normalization'] == 'zscore':
return {
'mean': column.astype(np.float32).mean(),
from ludwig.features.base_feature import OutputFeature
from ludwig.models.modules.dense_encoders import Dense
from ludwig.models.modules.loss_modules import weighted_softmax_cross_entropy
from ludwig.models.modules.measure_modules import \
absolute_error as get_absolute_error
from ludwig.models.modules.measure_modules import error as get_error
from ludwig.models.modules.measure_modules import r2 as get_r2
from ludwig.models.modules.measure_modules import \
squared_error as get_squared_error
from ludwig.utils.misc import get_from_registry
from ludwig.utils.misc import set_default_value
logger = logging.getLogger(__name__)
class VectorBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = VECTOR
preprocessing_defaults = {
'missing_value_strategy': FILL_WITH_CONST,
'fill_value': ""
}
@staticmethod
def get_feature_meta(column, preprocessing_parameters):
return {
'preprocessing': preprocessing_parameters
}
@staticmethod
from ludwig.models.modules.sequence_encoders import RNN
from ludwig.models.modules.sequence_encoders import StackedCNN
from ludwig.models.modules.sequence_encoders import StackedParallelCNN
from ludwig.utils.math_utils import softmax
from ludwig.utils.metrics_utils import ConfusionMatrix
from ludwig.utils.misc import get_from_registry
from ludwig.utils.misc import set_default_value
from ludwig.utils.strings_utils import PADDING_SYMBOL
from ludwig.utils.strings_utils import UNKNOWN_SYMBOL
from ludwig.utils.strings_utils import build_sequence_matrix
from ludwig.utils.strings_utils import create_vocabulary
logger = logging.getLogger(__name__)
class SequenceBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = SEQUENCE
preprocessing_defaults = {
'sequence_length_limit': 256,
'most_common': 20000,
'padding_symbol': PADDING_SYMBOL,
'unknown_symbol': UNKNOWN_SYMBOL,
'padding': 'right',
'tokenizer': 'space',
'lowercase': False,
'vocab_file': None,
'missing_value_strategy': FILL_WITH_CONST,
'fill_value': ''
}
from ludwig.features.base_feature import BaseFeature
from ludwig.features.sequence_feature import SequenceInputFeature
from ludwig.features.sequence_feature import SequenceOutputFeature
from ludwig.utils.math_utils import softmax
from ludwig.utils.metrics_utils import ConfusionMatrix
from ludwig.utils.misc import set_default_value
from ludwig.utils.misc import set_default_values
from ludwig.utils.strings_utils import PADDING_SYMBOL
from ludwig.utils.strings_utils import UNKNOWN_SYMBOL
from ludwig.utils.strings_utils import build_sequence_matrix
from ludwig.utils.strings_utils import create_vocabulary
logger = logging.getLogger(__name__)
class TextBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = TEXT
preprocessing_defaults = {
'char_tokenizer': 'characters',
'char_vocab_file': None,
'char_sequence_length_limit': 1024,
'char_most_common': 70,
'word_tokenizer': 'space_punct',
'word_vocab_file': None,
'word_sequence_length_limit': 256,
'word_most_common': 20000,
'padding_symbol': PADDING_SYMBOL,
'unknown_symbol': UNKNOWN_SYMBOL,
'padding': 'right',
import numpy as np
import tensorflow as tf
from ludwig.constants import *
from ludwig.features.base_feature import BaseFeature
from ludwig.features.base_feature import InputFeature
from ludwig.features.feature_utils import set_str_to_idx
from ludwig.models.modules.embedding_modules import EmbedWeighted
from ludwig.utils.misc import set_default_value
from ludwig.utils.strings_utils import create_vocabulary
logger = logging.getLogger(__name__)
class BagBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = BAG
preprocessing_defaults = {
'tokenizer': 'space',
'most_common': 10000,
'lowercase': False,
'missing_value_strategy': FILL_WITH_CONST,
'fill_value': ''
}
@staticmethod
def get_feature_meta(column, preprocessing_parameters):
idx2str, str2idx, str2freq, max_size = create_vocabulary(
column,
from ludwig.features.base_feature import OutputFeature
from ludwig.models.modules.initializer_modules import get_initializer
from ludwig.models.modules.loss_modules import mean_confidence_penalty
from ludwig.models.modules.measure_modules import accuracy as get_accuracy
from ludwig.utils.metrics_utils import ConfusionMatrix
from ludwig.utils.metrics_utils import average_precision_score
from ludwig.utils.metrics_utils import precision_recall_curve
from ludwig.utils.metrics_utils import roc_auc_score
from ludwig.utils.metrics_utils import roc_curve
from ludwig.utils.misc import set_default_value
from ludwig.utils.misc import set_default_values
logger = logging.getLogger(__name__)
class BinaryBaseFeature(BaseFeature):
def __init__(self, feature):
super().__init__(feature)
self.type = BINARY
preprocessing_defaults = {
'missing_value_strategy': FILL_WITH_CONST,
'fill_value': 0
}
@staticmethod
def get_feature_meta(column, preprocessing_parameters):
return {}
@staticmethod
def add_feature_data(
feature,