Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self):
super(HasInducedError, self).__init__()
self.inducedError = Param(self, "inducedError",
"Uniformly-distributed error added to feature")
def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3):
"""
__init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3)
"""
super(CrossValidator, self).__init__()
#: param for estimator to be cross-validated
self.estimator = Param(self, "estimator", "estimator to be cross-validated")
#: param for estimator param maps
self.estimatorParamMaps = Param(self, "estimatorParamMaps", "estimator param maps")
#: param for the evaluator used to select hyper-parameters that
#: maximize the cross-validated metric
self.evaluator = Param(
self, "evaluator",
"evaluator used to select hyper-parameters that maximize the cross-validated metric")
#: param for number of folds for cross validation
self.numFolds = Param(self, "numFolds", "number of folds for cross validation")
self._setDefault(numFolds=3)
kwargs = self.__init__._input_kwargs
self._set(**kwargs)
typeConverter=TypeConverters.toInt)
epochs = Param(Params._dummy(), "epochs",
"The number of passes done over the training data. Must be > 0. ",
typeConverter=TypeConverters.toInt)
predictor_type = Param(Params._dummy(), "predictor_type",
"Whether training is for binary classification or regression. "
"Supported options: 'binary_classifier', and 'regressor'. ",
typeConverter=TypeConverters.toString)
use_bias = Param(Params._dummy(), "use_bias",
"Whether model should include bias. ",
typeConverter=TypeConverters.toString)
num_models = Param(Params._dummy(), "num_models",
"Number of models to train in parallel. Must be > 0 or 'auto'. ",
typeConverter=TypeConverters.toString)
num_calibration_samples = Param(Params._dummy(), "num_calibration_samples",
"Number of samples to use from validation dataset for doing "
"model calibration (finding the best threshold). "
"Must be > 0.",
typeConverter=TypeConverters.toInt)
init_method = Param(Params._dummy(), "init_method",
"Initialization function for the model weights. "
"Supported options: 'uniform' and 'normal'. ",
typeConverter=TypeConverters.toString)
init_scale = Param(Params._dummy(), "init_scale",
"Scale for init method uniform. Must be > 0. ",
gamma = Param(
Params._dummy(), "gamma",
"Minimum loss reduction required to make an additional partition on a leaf node"
" of the tree. The larger the value, the more conservative the algorithm will be."
"Must be >= 0.",
typeConverter=TypeConverters.toFloat)
max_depth = Param(
Params._dummy(), "max_depth",
"Maximum depth of a tree. Increasing this value makes the model more complex and "
"likely to be overfitted. 0 indicates no limit. A limit is required when"
"grow_policy=depth-wise. Must be >= 0. Default value is 6",
typeConverter=TypeConverters.toInt)
min_child_weight = Param(
Params._dummy(), "min_child_weight",
"Minimum sum of instance weight (hessian) needed in a child. If the tree partition step "
"results in a leaf node with the sum of instance weight less than min_child_weight, then "
"the building process will give up further partitioning. In linear regression mode, "
"this simply corresponds to minimum number of instances needed to be in each node. "
"The larger the value, the more conservative the algorithm will be. Must be >= 0.",
typeConverter=TypeConverters.toFloat)
max_delta_step = Param(
Params._dummy(), "max_delta_step",
"Maximum delta step we allow each tree's weight estimation to be. "
"If the value is set to 0, it means there is no constraint. If it is set to a positive "
"value, it can help make the update step more conservative. Usually this parameter is "
"not needed, but it might help in logistic regression when the classes are extremely"
" imbalanced. Setting it to value of 1-10 might help control the update. Must be >= 0.",
typeConverter=TypeConverters.toFloat)
"Which booster to use. Can be 'gbtree', 'gblinear' or 'dart'. "
"gbtree and dart use tree based model while gblinear uses linear function.",
typeConverter=TypeConverters.toString)
silent = Param(
Params._dummy(), "silent",
"Whether in silent mode."
"0 means print running messages, 1 means silent mode.",
typeConverter=TypeConverters.toInt)
nthread = Param(
Params._dummy(), "nthread",
"Number of parallel threads used to run xgboot. Must be >= 1.",
typeConverter=TypeConverters.toInt)
eta = Param(
Params._dummy(), "eta",
"Step size shrinkage used in update to prevent overfitting. After each boosting step, "
"we can directly get the weights of new features. and eta shrinks the feature weights "
"to make the boosting process more conservative. Must be in [0, 1].",
typeConverter=TypeConverters.toFloat)
gamma = Param(
Params._dummy(), "gamma",
"Minimum loss reduction required to make an additional partition on a leaf node"
" of the tree. The larger the value, the more conservative the algorithm will be."
"Must be >= 0.",
typeConverter=TypeConverters.toFloat)
max_depth = Param(
Params._dummy(), "max_depth",
"Maximum depth of a tree. Increasing this value makes the model more complex and "
tensorflowGraph = Param(Params._dummy(), "tensorflowGraph", "", typeConverter=TypeConverters.toString)
tfInput = Param(Params._dummy(), "tfInput", "", typeConverter=TypeConverters.toString)
tfOutput = Param(Params._dummy(), "tfOutput", "", typeConverter=TypeConverters.toString)
tfLabel = Param(Params._dummy(), "tfLabel", "", typeConverter=TypeConverters.toString)
tfOptimizer = Param(Params._dummy(), "tfOptimizer", "", typeConverter=TypeConverters.toString)
tfLearningRate = Param(Params._dummy(), "tfLearningRate", "", typeConverter=TypeConverters.toFloat)
iters = Param(Params._dummy(), "iters", "", typeConverter=TypeConverters.toInt)
partitions = Param(Params._dummy(), "partitions", "", typeConverter=TypeConverters.toInt)
miniBatchSize = Param(Params._dummy(), "miniBatchSize", "", typeConverter=TypeConverters.toInt)
miniStochasticIters = Param(Params._dummy(), "miniStochasticIters", "", typeConverter=TypeConverters.toInt)
verbose = Param(Params._dummy(), "verbose", "", typeConverter=TypeConverters.toInt)
acquireLock = Param(Params._dummy(), "acquireLock", "", typeConverter=TypeConverters.toBoolean)
shufflePerIter = Param(Params._dummy(), "shufflePerIter", "", typeConverter=TypeConverters.toBoolean)
tfDropout = Param(Params._dummy(), "tfDropout", "", typeConverter=TypeConverters.toString)
toKeepDropout = Param(Params._dummy(), "toKeepDropout", "", typeConverter=TypeConverters.toBoolean)
partitionShuffles = Param(Params._dummy(), "partitionShuffles", "", typeConverter=TypeConverters.toInt)
optimizerOptions = Param(Params._dummy(), "optimizerOptions", "", typeConverter=TypeConverters.toString)
port = Param(Params._dummy(), "port", "", typeConverter=TypeConverters.toInt)
@keyword_only
def __init__(self,
inputCol=None,
tensorflowGraph=None,
tfInput=None,
tfLabel=None,
tfOutput=None,
tfOptimizer=None,
tfLearningRate=None,
iters=None,
predictionCol=None,
partitions=None,
miniBatchSize = None,
PysparkReaderWriter,
MLReadable,
MLWritable,
Identifiable
):
torchObj = Param(Params._dummy(), "torchObj", "The serialized torch object", typeConverter=TypeConverters.toString)
mode = Param(Params._dummy(), "mode", "The training mode", typeConverter=TypeConverters.toString)
device = Param(Params._dummy(), "device", "", typeConverter=TypeConverters.toString)
iters = Param(Params._dummy(), "iters", "", typeConverter=TypeConverters.toInt)
partitions = Param(Params._dummy(), "partitions", "", typeConverter=TypeConverters.toInt)
verbose = Param(Params._dummy(), "verbose", "", typeConverter=TypeConverters.toInt)
acquireLock = Param(Params._dummy(), "acquireLock", "", typeConverter=TypeConverters.toBoolean)
partitionShuffles = Param(Params._dummy(), "partitionShuffles", "", typeConverter=TypeConverters.toInt)
port = Param(Params._dummy(), "port", "", typeConverter=TypeConverters.toInt)
useBarrier = Param(Params._dummy(), "useBarrier", "", typeConverter=TypeConverters.toBoolean)
useVectorOut = Param(Params._dummy(), "useVectorOut", "", typeConverter=TypeConverters.toBoolean)
earlyStopPatience = Param(Params._dummy(), "earlyStopPatience", "", typeConverter=TypeConverters.toInt)
miniBatch = Param(Params._dummy(), "miniBatch", "", typeConverter=TypeConverters.toInt)
validationPct = Param(Params._dummy(), "validationPct", "", typeConverter=TypeConverters.toFloat)
@keyword_only
def __init__(
self,
inputCol=None,
labelCol=None,
torchObj=None,
iters=None,
predictionCol=None,
partitions=None,
acquireLock=None,
verbose=None,
def __init__(self):
super(HasRawPredictionCol, self).__init__()
#: param for raw prediction (a.k.a. confidence) column name
self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
self._setDefault(rawPredictionCol='rawPrediction')
return self
def getRegParam(self):
"""
Gets the value of regParam or its default value.
"""
return self.getOrDefault(self.regParam)
class HasFeaturesCol(Params):
"""
Mixin for param featuresCol: features column name.
"""
# a placeholder to make it appear in the generated doc
featuresCol = Param(Params._dummy(), "featuresCol", "features column name")
def __init__(self):
super(HasFeaturesCol, self).__init__()
#: param for features column name
self.featuresCol = Param(self, "featuresCol", "features column name")
self._setDefault(featuresCol='features')
def setFeaturesCol(self, value):
"""
Sets the value of :py:attr:`featuresCol`.
"""
self._paramMap[self.featuresCol] = value
return self
def getFeaturesCol(self):
"""
Params._dummy(), "max_depth",
"Maximum depth of a tree. Increasing this value makes the model more complex and "
"likely to be overfitted. 0 indicates no limit. A limit is required when"
"grow_policy=depth-wise. Must be >= 0. Default value is 6",
typeConverter=TypeConverters.toInt)
min_child_weight = Param(
Params._dummy(), "min_child_weight",
"Minimum sum of instance weight (hessian) needed in a child. If the tree partition step "
"results in a leaf node with the sum of instance weight less than min_child_weight, then "
"the building process will give up further partitioning. In linear regression mode, "
"this simply corresponds to minimum number of instances needed to be in each node. "
"The larger the value, the more conservative the algorithm will be. Must be >= 0.",
typeConverter=TypeConverters.toFloat)
max_delta_step = Param(
Params._dummy(), "max_delta_step",
"Maximum delta step we allow each tree's weight estimation to be. "
"If the value is set to 0, it means there is no constraint. If it is set to a positive "
"value, it can help make the update step more conservative. Usually this parameter is "
"not needed, but it might help in logistic regression when the classes are extremely"
" imbalanced. Setting it to value of 1-10 might help control the update. Must be >= 0.",
typeConverter=TypeConverters.toFloat)
subsample = Param(
Params._dummy(), "subsample",
"Subsample ratio of the training instance. Setting it to 0.5 means that XGBoost will "
"randomly collect half of the data instances to grow trees and this will "
"prevent overfitting. Must be (0, 1].",
typeConverter=TypeConverters.toFloat)
colsample_bytree = Param(