Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_daal_prediction(x=np.array([1,2,3]), y=np.array([1,2,3])):
ntX = HomogenNumericTable(x)
ntY = HomogenNumericTable(y)
lr_train = linear_training.Batch()
lr_train.input.set(linear_training.data, ntX)
lr_train.input.set(linear_training.dependentVariables, ntY)
result = lr_train.compute()
model = result.get(linear_training.model)
lr_predict = linear_prediction.Batch()
lr_predict.input.setModel(linear_prediction.model, model)
lr_predict.input.setTable(linear_prediction.data, ntX)
result = lr_predict.compute()
np_predicted = getNumpyArray(result.get(linear_prediction.prediction))
# assert the same as the initial dependent variable
assert_array_almost_equal(y, np_predicted)
def testModel():
global testGroundTruth, predictionResult
# Initialize FileDataSource to retrieve the test data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for testing data and labels
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth)
# Retrieve the data from input file
testDataSource.loadDataBlock(mergedData)
# Get the dictionary and update it with additional information about data
dict = testData.getDictionary()
# Add a feature type to the dictionary
dict[0].featureType = features.DAAL_CONTINUOUS
dict[1].featureType = features.DAAL_CONTINUOUS
dict[2].featureType = features.DAAL_CATEGORICAL
# Create algorithm objects for decision forest classification prediction with the default method
algorithm = prediction.Batch(nClasses)
for j in range(int(rowOffsets[i] - 1), int(rowOffsets[i + 1] - 1)):
for k in range(1, nBlocks + 1):
if dataBlockPartition[k -
1] <= colIndices[j] - 1 and colIndices[j] - 1 < dataBlockPartition[k]:
blockIdFlags[(k - 1) * nRows + i] = 1
nNotNull = [0] * nBlocks
for i in range(nBlocks):
nNotNull[i] = 0
for j in range(nRows):
nNotNull[i] += blockIdFlags[i * nRows + j]
result = KeyValueDataCollection()
for i in range(nBlocks):
indicesTable = HomogenNumericTable(
1, int(nNotNull[i]), NumericTableIface.doAllocate, ntype=np.intc)
indices = indicesTable.getArray()
indexId = 0
for j in range(nRows):
if blockIdFlags[i * nRows + j]:
indices[indexId] = int(j)
indexId += 1
result[i] = indicesTable
return result
def testModel():
global predictionResult, groundTruthLabels
# Initialize FileDataSource to retrieve the input data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName, DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for testing data and labels
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
groundTruthLabels = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(testData, groundTruthLabels)
# Retrieve the data from input file
testDataSource.loadDataBlock(mergedData)
# Create an algorithm object to predict SVM values
algorithm = svm.prediction.Batch()
algorithm.parameter.kernel = kernel
# Pass a testing data set and the trained model to the algorithm
algorithm.input.setTable(classifier.prediction.data, testData)
algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
# Predict SVM values
def trainModel():
global trainingResult
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and labels
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth)
# Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData)
# Create an algorithm object to train the AdaBoost model
algorithm = training.Batch()
# Pass the training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
# Train the AdaBoost model and retrieve the results of the training algorithm
trainingResult = algorithm.compute()
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth)
# Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData)
# Initialize FileDataSource to retrieve the input data from a .csv file
pruneDataSource = FileDataSource(
pruneDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for pruning data and labels
pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
# Retrieve the data from the input file
pruneDataSource.loadDataBlock(pruneMergedData)
# Create an algorithm object to train the decision tree classification model
algorithm = training.Batch(nClasses)
# Pass the training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
algorithm.input.setTable(training.dataForPruning, pruneData)
algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth)
# Train the decision tree classification model and retrieve the results of the training algorithm
trainingResult = algorithm.compute()
def trainModel():
# Read training data set from a .csv file and create a tensor to store input data
trainingData = readTensorFromCSV(trainDatasetFile)
trainingGroundTruth = readTensorFromCSV(trainGroundTruthFile, True)
sgdAlgorithm = optimization_solver.sgd.Batch(fptype=np.float32)
# Set learning rate for the optimization solver used in the neural network
learningRate = 0.001
sgdAlgorithm.parameter.learningRateSequence = HomogenNumericTable(1, 1, NumericTable.doAllocate, learningRate)
# Set the batch size for the neural network training
sgdAlgorithm.parameter.batchSize = batchSize
sgdAlgorithm.parameter.nIterations = int(trainingData.getDimensionSize(0) / sgdAlgorithm.parameter.batchSize)
# Create an algorithm to train neural network
net = training.Batch(sgdAlgorithm)
sampleSize = trainingData.getDimensions()
sampleSize[0] = batchSize
# Configure the neural network
topology = configureNet()
net.initialize(sampleSize, topology)
# Pass a training data set and dependent values to the algorithm
net.input.setInput(training.data, trainingData)
def testModel():
global predictionResult, testGroundTruth
# Initialize FileDataSource to retrieve the test data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName, DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for testing data and labels
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth)
# Retrieve the data from input file
testDataSource.loadDataBlock(mergedData)
# Create algorithm objects for AdaBoost prediction with the default method
algorithm = prediction.Batch()
# Pass the testing data set and trained model to the algorithm
algorithm.input.setTable(classifier.prediction.data, testData)
algorithm.input.setModel(classifier.prediction.model, trainingResult.get(classifier.training.model))
# Compute prediction results and retrieve algorithm results
# (Result class from classifier.prediction)
predictionResult = algorithm.compute()
def loadData(fileName):
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and dependent variables
data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(data, dependentVar)
# Retrieve the data from input file
trainDataSource.loadDataBlock(mergedData)
dictionary = data.getDictionary()
for i in range(len(categoricalFeaturesIndices)):
dictionary[categoricalFeaturesIndices[i]].featureType = features.DAAL_CATEGORICAL
return data, dependentVar