Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def trainModel():
global trainingResult
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and labels
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth)
# Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData)
# Create an algorithm object to train the AdaBoost model
algorithm = training.Batch()
# Pass the training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
# Train the AdaBoost model and retrieve the results of the training algorithm
trainingResult = algorithm.compute()
def testModel(trainingResult):
# Initialize FileDataSource to retrieve the input data from a .csv file
testDataSource = FileDataSource(testDatasetFileName,
DataSource.doAllocateNumericTable,
DataSource.doDictionaryFromContext)
# Create Numeric Tables for testing data and ground truth values
testData = HomogenNumericTable(nFeatures, 0, NumericTable.doNotAllocate)
testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTable.doNotAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth)
# Retrieve the data from the input file
testDataSource.loadDataBlock(mergedData)
# Create an algorithm object to predict values of ridge regression
algorithm = prediction.Batch()
# Pass a testing data set and the trained model to the algorithm
algorithm.input.setTable(prediction.data, testData)
algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
# Predict values of ridge regression and retrieve the algorithm results
predictionResult = algorithm.compute()
printNumericTable(predictionResult.get(prediction.prediction),
"Ridge Regression prediction results: (first 10 rows):", 10)
def testModel():
# Initialize FileDataSource to retrieve the input data from a .csv file
testDataSource = FileDataSource(testDatasetFileName,
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext)
# Create Numeric Tables for testing data and ground truth values
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth)
# Retrieve the data from an input file
testDataSource.loadDataBlock(mergedData)
# Create an algorithm object to predict values of multiple linear regression
algorithm = prediction.Batch()
# Pass a testing data set and the trained model to the algorithm
algorithm.input.setTable(prediction.data, testData)
algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
# Predict values of multiple linear regression and retrieve the algorithm results
predictionResult = algorithm.compute()
printNumericTable(predictionResult.get(prediction.prediction),
"Linear Regression prediction results: (first 10 rows):",
def testModel():
global predictionResult, testGroundTruth
# Initialize FileDataSource to retrieve the test data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName, DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for testing data and labels
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth)
# Retrieve the data from input file
testDataSource.loadDataBlock(mergedData)
# Create algorithm objects for AdaBoost prediction with the default method
algorithm = prediction.Batch()
# Pass the testing data set and trained model to the algorithm
algorithm.input.setTable(classifier.prediction.data, testData)
algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
# Compute prediction results and retrieve algorithm results
# (Result class from classifier.prediction)
predictionResult = algorithm.compute()
def trainModel():
global trainingResult
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and labels
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth)
# Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData)
# Create an algorithm object to train the SVM model
algorithm = svm.training.Batch()
algorithm.parameter.kernel = kernel
algorithm.parameter.cacheSize = 600000000
# Pass a training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
# Build the SVM model and get the algorithm results
trainingResult = algorithm.compute()
nFeatures = 6
nClasses = 5
nIterations = 1000
stepLength = 1.0e-4
if __name__ == "__main__":
# Initialize FileDataSource to retrieve the input data from a .csv file
dataSource = FileDataSource(datasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext)
# Create Numeric Tables for input data and dependent variables
data = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
dependentVariables = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(data, dependentVariables)
# Retrieve the data from input file
dataSource.loadDataBlock(mergedData)
func = optimization_solver.cross_entropy_loss.Batch(nClasses, data.getNumberOfRows())
func.input.set(optimization_solver.cross_entropy_loss.data, data)
func.input.set(optimization_solver.cross_entropy_loss.dependentVariables, dependentVariables)
# Create objects to compute LBFGS result using the default method
algorithm = optimization_solver.lbfgs.Batch(func)
algorithm.parameter.nIterations = nIterations
algorithm.parameter.stepLengthSequence = HomogenNumericTable(1, 1, NumericTableIface.doAllocate, stepLength)
# Set input objects for LBFGS algorithm
nParameters = nClasses * (nFeatures + 1)
initialPoint = np.full((nParameters, 1), 0.001, dtype=np.float64)
def trainModel():
global trainingResult
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and dependent variables
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth)
# Retrieve the data from input file
trainDataSource.loadDataBlock(mergedData)
# Create an algorithm object to train the KD-tree based kNN model
algorithm = training.Batch()
# Pass a training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
# Train the KD-tree based kNN model
trainingResult = algorithm.compute()
def loadData(fileName):
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and dependent variables
data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(data, dependentVar)
# Retrieve the data from input file
trainDataSource.loadDataBlock(mergedData)
dictionary = data.getDictionary()
for i in range(len(categoricalFeaturesIndices)):
dictionary[categoricalFeaturesIndices[i]].featureType = features.DAAL_CATEGORICAL
return data, dependentVar
def trainModel():
global trainingResult
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(trainDatasetFileNames[rankId],
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext)
# Create Numeric Tables for training data and labels
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
trainDependentVariables = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(trainData, trainDependentVariables)
# Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData)
# Create an algorithm object to train the multiple linear regression model based on the local-node data
localAlgorithm = training.Distributed(step1Local)
# Pass a training data set and dependent values to the algorithm
localAlgorithm.input.set(training.data, trainData)
localAlgorithm.input.set(training.dependentVariables, trainDependentVariables)
# Train the multiple linear regression model on local nodes
pres = localAlgorithm.compute()
# Serialize partial results required by step 2
dataArch = InputDataArchive()
def testModel():
global trainingResult, predictionResult
# Initialize FileDataSource to retrieve the test data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName, DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for testing data and ground truth values
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth)
# Load the data from the data file
testDataSource.loadDataBlock(mergedData)
# Create an algorithm object to predict values of multiple linear regression
algorithm = prediction.Batch()
# Pass a testing data set and the trained model to the algorithm
algorithm.input.setTable(prediction.data, testData)
algorithm.input.setModel(prediction.model, trainingResult.get(training.model))
# Predict values of multiple linear regression and retrieve the algorithm results
predictionResult = algorithm.compute()
printNumericTable(predictionResult.get(prediction.prediction), "Linear Regression prediction results: (first 10 rows):", 10)
printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10)