Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_add_features_throws_if_num_data_unequal(self):
X1 = np.random.random((100, 1))
X2 = np.random.random((10, 1))
d1 = lgb.Dataset(X1).construct()
d2 = lgb.Dataset(X2).construct()
with self.assertRaises(lgb.basic.LightGBMError):
d1.add_features_from(d2)
params_obj_class_1_verbose, metrics=obj_multi_alias,
fobj=dummy_obj, feval=constant_metric)
# multiclass default metric without num_class
self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_obj_verbose)
for metric_multi_alias in obj_multi_aliases + ['multi_logloss']:
# multiclass metric alias
res = get_cv_result(params_obj_class_3_verbose, metrics=metric_multi_alias)
self.assertEqual(len(res), 2)
self.assertIn('multi_logloss-mean', res)
# multiclass metric
res = get_cv_result(params_obj_class_3_verbose, metrics='multi_error')
self.assertEqual(len(res), 2)
self.assertIn('multi_error-mean', res)
# non-valid metric for multiclass objective
self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_obj_class_3_verbose, metrics='binary_logloss')
params_class_3_verbose = {'num_class': 3, 'verbose': -1}
# non-default num_class for default objective
self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_class_3_verbose)
# no metric with non-default num_class for custom objective
res = get_cv_result(params_class_3_verbose, fobj=dummy_obj)
self.assertEqual(len(res), 0)
for metric_multi_alias in obj_multi_aliases + ['multi_logloss']:
# multiclass metric alias for custom objective
res = get_cv_result(params_class_3_verbose, metrics=metric_multi_alias, fobj=dummy_obj)
self.assertEqual(len(res), 2)
self.assertIn('multi_logloss-mean', res)
# multiclass metric for custom objective
res = get_cv_result(params_class_3_verbose, metrics='multi_error', fobj=dummy_obj)
self.assertEqual(len(res), 2)
params_obj_verbose)
for metric_multi_alias in obj_multi_aliases + ['multi_logloss']:
# multiclass metric alias
res = get_cv_result(params_obj_class_3_verbose, metrics=metric_multi_alias)
self.assertEqual(len(res), 2)
self.assertIn('multi_logloss-mean', res)
# multiclass metric
res = get_cv_result(params_obj_class_3_verbose, metrics='multi_error')
self.assertEqual(len(res), 2)
self.assertIn('multi_error-mean', res)
# non-valid metric for multiclass objective
self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_obj_class_3_verbose, metrics='binary_logloss')
params_class_3_verbose = {'num_class': 3, 'verbose': -1}
# non-default num_class for default objective
self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_class_3_verbose)
# no metric with non-default num_class for custom objective
res = get_cv_result(params_class_3_verbose, fobj=dummy_obj)
self.assertEqual(len(res), 0)
for metric_multi_alias in obj_multi_aliases + ['multi_logloss']:
# multiclass metric alias for custom objective
res = get_cv_result(params_class_3_verbose, metrics=metric_multi_alias, fobj=dummy_obj)
self.assertEqual(len(res), 2)
self.assertIn('multi_logloss-mean', res)
# multiclass metric for custom objective
res = get_cv_result(params_class_3_verbose, metrics='multi_error', fobj=dummy_obj)
self.assertEqual(len(res), 2)
self.assertIn('multi_error-mean', res)
# binary metric with non-default num_class for custom objective
self.assertRaises(lgb.basic.LightGBMError, get_cv_result,
params_class_3_verbose, metrics='binary_error', fobj=dummy_obj)
ctypes.c_uint(ncol),
num_per_col,
ctypes.c_uint(num_sample_row),
ctypes.c_uint(row_count),
parameters,
ctypes.byref(self.handle)))
blocks = int(math.ceil(row_count / blocksize))
dtype = np.float64
for i in range(blocks):
i1 = i * blocksize
i2 = min(row_count, (i+1) * blocksize)
data = np.array([df.evaluate(k, i1=i1, i2=i2).astype(dtype) for k in self.features]).T.copy()
ctypemap = {np.float64: ctypes.c_double, np.float32: ctypes.c_float}
capi_typemap = {np.float64: lightgbm.basic.C_API_DTYPE_FLOAT64, np.float32: lightgbm.basic.C_API_DTYPE_FLOAT32}
lightgbm.basic._safe_call(lib.LGBM_DatasetPushRows(self.handle,
data.ctypes.data_as(ctypes.POINTER(ctypemap[dtype])),
ctypes.c_uint(capi_typemap[dtype]),
ctypes.c_uint32(i2-i1),
ctypes.c_uint32(ncol),
ctypes.c_uint32(i1),
))
if label is not None:
self.label_data = self.df.evaluate(label)
self.set_label(self.label_data)
import ctypes
import math
import warnings
import vaex
import lightgbm
import numpy as np
import tempfile
import base64
import vaex.serialize
from . import state
import traitlets
from . import generate
lib = lightgbm.basic._LIB
@vaex.serialize.register
@generate.register
class LightGBMModel(state.HasState):
'''The LightGBM algorithm.
This class provides an interface to the LightGBM algorithm, with some optimizations
for better memory efficiency when training large datasets. The algorithm itself is
not modified at all.
LightGBM is a fast gradient boosting algorithm based on decision trees and is
mainly used for classification, regression and ranking tasks. It is under the
umbrella of the Distributed Machine Learning Toolkit (DMTK) project of Microsoft.
For more information, please visit https://github.com/Microsoft/LightGBM/.
f = open(filename, "w+")
f.write("tree\n"+
"version=v2\n"+
"num_class="+num_class_+"\n"+
"num_tree_per_iteration="+num_class_+"\n"+
"label_index=0\n"+
"max_feature_idx="+str(len(features)-1)+"\n"+
"objective="+objective_+"\n"+
"feature_names="+" ".join(features)+"\n"+
"feature_infos="+" ".join(feature_infos)+"\n"+ #feature_infos is minimum value to maximum value ratio of every features
#tree_sizes=??????????
"\n")
get_tree_string(segmentation=segmentation_obj, temp_file = f)
f.write("pandas_categorical:"+pandas_categorical_+"\n")
f.close()
newgbm = lgb.basic.Booster(params = {'model_str' : open(filename, "r").read()})
f.close()
os.remove(filename)
return newgbm