How to use the datatable.DataTable function in datatable

To help you get started, we’ve selected a few datatable examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgboost_dtinput.py View on Github external
dtest = xgb.DMatrix(dtdata_X_test, dtdata_y_test, nthread=-1)
        print ("dt->DMatrix2 Time: %s seconds" % (str(time.time() - tmp)))

        print("Training with '%s'" % param['tree_method'])
        tmp = time.time()
        res_tmp = {}
        xgb.train(param, dtrain, iterations, evals=[(dtrain, "train"),(dtest, "test")], evals_result=res_tmp)
        res['2'] = res_tmp['train']['error']
        print ("Train Time: %s seconds" % (str(time.time() - tmp)))
    if HAVE_DT and do_dt_likeDAI:

        # convert to column-major contiguous in memory to mimic persistent column-major state
        # do_cccont = True leads to prepare2 time of about 1.4s for 1000000 rows * 50 columns
        # do_cccont = False leads to prepare2 time of about 0.000548 for 1000000 rows * 50 columns
        tmp = time.time()
        dtdata_X_train = dt.DataTable(X_train_cc)
        dtdata_X_test = dt.DataTable(X_test_cc)
        dtdata_y_train = dt.DataTable(y_train_cc)
        dtdata_y_test = dt.DataTable(y_test_cc)
        print ("dt prepare2 Time: %s seconds" % (str(time.time() - tmp)))

        #test = dtdata_X_train.tonumpy()
        #print(test)

        print ("dt->DMatrix Start")
        # omp way
        tmp = time.time()
        dtrain = xgb.DMatrix(dtdata_X_train.tonumpy(), dtdata_y_train.tonumpy(), nthread=-1)
        print ("dt->DMatrix1 Time: %s seconds" % (str(time.time() - tmp)))
        tmp = time.time()
        dtest = xgb.DMatrix(dtdata_X_test.tonumpy(), dtdata_y_test.tonumpy(), nthread=-1)
        print ("dt->DMatrix2 Time: %s seconds" % (str(time.time() - tmp)))
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgboost_dtinput.py View on Github external
print ("np->DMatrix2 Time: %s seconds" % (str(time.time() - tmp)))

        print("Training with '%s'" % param['tree_method'])
        tmp = time.time()
        res_tmp = {}
        xgb.train(param, dtrain, iterations, evals=[(dtrain, "train"),(dtest, "test")], evals_result=res_tmp)
        res['1'] = res_tmp['train']['error']
        print("Train Time: %s seconds" % (str(time.time() - tmp)))
    if HAVE_DT and do_dt:

        # convert to column-major contiguous in memory to mimic persistent column-major state
        # do_cccont = True leads to prepare2 time of about 1.4s for 1000000 rows * 50 columns
        # do_cccont = False leads to prepare2 time of about 0.000548 for 1000000 rows * 50 columns
        tmp = time.time()
        dtdata_X_train = dt.DataTable(X_train_cc)
        dtdata_X_test = dt.DataTable(X_test_cc)
        dtdata_y_train = dt.DataTable(y_train_cc)
        dtdata_y_test = dt.DataTable(y_test_cc)
        print ("dt prepare2 Time: %s seconds" % (str(time.time() - tmp)))

        #test = dtdata_X_train.tonumpy()
        #print(test)

        print ("dt->DMatrix Start")
        # omp way
        tmp = time.time()
        # below takes about 0.47s - 0.53s independent of do_ccont
        dtrain = xgb.DMatrix(dtdata_X_train, dtdata_y_train, nthread=-1)
        print ("dt->DMatrix1 Time: %s seconds" % (str(time.time() - tmp)))
        tmp = time.time()
        dtest = xgb.DMatrix(dtdata_X_test, dtdata_y_test, nthread=-1)
        print ("dt->DMatrix2 Time: %s seconds" % (str(time.time() - tmp)))
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgboost_dtinput.py View on Github external
print("Training with '%s'" % param['tree_method'])
        tmp = time.time()
        res_tmp = {}
        xgb.train(param, dtrain, iterations, evals=[(dtrain, "train"),(dtest, "test")], evals_result=res_tmp)
        res['2'] = res_tmp['train']['error']
        print ("Train Time: %s seconds" % (str(time.time() - tmp)))
    if HAVE_DT and do_dt_likeDAI:

        # convert to column-major contiguous in memory to mimic persistent column-major state
        # do_cccont = True leads to prepare2 time of about 1.4s for 1000000 rows * 50 columns
        # do_cccont = False leads to prepare2 time of about 0.000548 for 1000000 rows * 50 columns
        tmp = time.time()
        dtdata_X_train = dt.DataTable(X_train_cc)
        dtdata_X_test = dt.DataTable(X_test_cc)
        dtdata_y_train = dt.DataTable(y_train_cc)
        dtdata_y_test = dt.DataTable(y_test_cc)
        print ("dt prepare2 Time: %s seconds" % (str(time.time() - tmp)))

        #test = dtdata_X_train.tonumpy()
        #print(test)

        print ("dt->DMatrix Start")
        # omp way
        tmp = time.time()
        dtrain = xgb.DMatrix(dtdata_X_train.tonumpy(), dtdata_y_train.tonumpy(), nthread=-1)
        print ("dt->DMatrix1 Time: %s seconds" % (str(time.time() - tmp)))
        tmp = time.time()
        dtest = xgb.DMatrix(dtdata_X_test.tonumpy(), dtdata_y_test.tonumpy(), nthread=-1)
        print ("dt->DMatrix2 Time: %s seconds" % (str(time.time() - tmp)))

        print("Training with '%s'" % param['tree_method'])
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgboost_dtinput.py View on Github external
print("Training with '%s'" % param['tree_method'])
        tmp = time.time()
        res_tmp = {}
        xgb.train(param, dtrain, iterations, evals=[(dtrain, "train"),(dtest, "test")], evals_result=res_tmp)
        res['1'] = res_tmp['train']['error']
        print("Train Time: %s seconds" % (str(time.time() - tmp)))
    if HAVE_DT and do_dt:

        # convert to column-major contiguous in memory to mimic persistent column-major state
        # do_cccont = True leads to prepare2 time of about 1.4s for 1000000 rows * 50 columns
        # do_cccont = False leads to prepare2 time of about 0.000548 for 1000000 rows * 50 columns
        tmp = time.time()
        dtdata_X_train = dt.DataTable(X_train_cc)
        dtdata_X_test = dt.DataTable(X_test_cc)
        dtdata_y_train = dt.DataTable(y_train_cc)
        dtdata_y_test = dt.DataTable(y_test_cc)
        print ("dt prepare2 Time: %s seconds" % (str(time.time() - tmp)))

        #test = dtdata_X_train.tonumpy()
        #print(test)

        print ("dt->DMatrix Start")
        # omp way
        tmp = time.time()
        # below takes about 0.47s - 0.53s independent of do_ccont
        dtrain = xgb.DMatrix(dtdata_X_train, dtdata_y_train, nthread=-1)
        print ("dt->DMatrix1 Time: %s seconds" % (str(time.time() - tmp)))
        tmp = time.time()
        dtest = xgb.DMatrix(dtdata_X_test, dtdata_y_test, nthread=-1)
        print ("dt->DMatrix2 Time: %s seconds" % (str(time.time() - tmp)))
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgboost_dtinput.py View on Github external
print ("dt->DMatrix2 Time: %s seconds" % (str(time.time() - tmp)))

        print("Training with '%s'" % param['tree_method'])
        tmp = time.time()
        res_tmp = {}
        xgb.train(param, dtrain, iterations, evals=[(dtrain, "train"),(dtest, "test")], evals_result=res_tmp)
        res['2'] = res_tmp['train']['error']
        print ("Train Time: %s seconds" % (str(time.time() - tmp)))
    if HAVE_DT and do_dt_likeDAI:

        # convert to column-major contiguous in memory to mimic persistent column-major state
        # do_cccont = True leads to prepare2 time of about 1.4s for 1000000 rows * 50 columns
        # do_cccont = False leads to prepare2 time of about 0.000548 for 1000000 rows * 50 columns
        tmp = time.time()
        dtdata_X_train = dt.DataTable(X_train_cc)
        dtdata_X_test = dt.DataTable(X_test_cc)
        dtdata_y_train = dt.DataTable(y_train_cc)
        dtdata_y_test = dt.DataTable(y_test_cc)
        print ("dt prepare2 Time: %s seconds" % (str(time.time() - tmp)))

        #test = dtdata_X_train.tonumpy()
        #print(test)

        print ("dt->DMatrix Start")
        # omp way
        tmp = time.time()
        dtrain = xgb.DMatrix(dtdata_X_train.tonumpy(), dtdata_y_train.tonumpy(), nthread=-1)
        print ("dt->DMatrix1 Time: %s seconds" % (str(time.time() - tmp)))
        tmp = time.time()
        dtest = xgb.DMatrix(dtdata_X_test.tonumpy(), dtdata_y_test.tonumpy(), nthread=-1)
        print ("dt->DMatrix2 Time: %s seconds" % (str(time.time() - tmp)))
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgboost_dtinput.py View on Github external
print("Training with '%s'" % param['tree_method'])
        tmp = time.time()
        res_tmp = {}
        xgb.train(param, dtrain, iterations, evals=[(dtrain, "train"),(dtest, "test")], evals_result=res_tmp)
        res['2'] = res_tmp['train']['error']
        print ("Train Time: %s seconds" % (str(time.time() - tmp)))
    if HAVE_DT and do_dt_likeDAI:

        # convert to column-major contiguous in memory to mimic persistent column-major state
        # do_cccont = True leads to prepare2 time of about 1.4s for 1000000 rows * 50 columns
        # do_cccont = False leads to prepare2 time of about 0.000548 for 1000000 rows * 50 columns
        tmp = time.time()
        dtdata_X_train = dt.DataTable(X_train_cc)
        dtdata_X_test = dt.DataTable(X_test_cc)
        dtdata_y_train = dt.DataTable(y_train_cc)
        dtdata_y_test = dt.DataTable(y_test_cc)
        print ("dt prepare2 Time: %s seconds" % (str(time.time() - tmp)))

        #test = dtdata_X_train.tonumpy()
        #print(test)

        print ("dt->DMatrix Start")
        # omp way
        tmp = time.time()
        dtrain = xgb.DMatrix(dtdata_X_train.tonumpy(), dtdata_y_train.tonumpy(), nthread=-1)
        print ("dt->DMatrix1 Time: %s seconds" % (str(time.time() - tmp)))
        tmp = time.time()
        dtest = xgb.DMatrix(dtdata_X_test.tonumpy(), dtdata_y_test.tonumpy(), nthread=-1)
        print ("dt->DMatrix2 Time: %s seconds" % (str(time.time() - tmp)))

        print("Training with '%s'" % param['tree_method'])
        tmp = time.time()
github bgrimstad / splinter / python / bspline.py View on Github external
else:
			dataTable = dataTableOrFileName
			self._handle = SPLINTER.call(SPLINTER.getHandle().bspline_init, dataTable.getHandle(), degree)
		
		
if __name__ == "__main__":
	import SPLINTER
	SPLINTER.load("/home/anders/SPLINTER/build/release/libsplinter-matlab-1-4.so")
	
	
	from datatable import DataTable
	
	def f(x):
		return x[0]*x[1]
	
	d = DataTable()
	for i in range(10):
		for j in range(10):
			d.addSample([i,j], f([i,j]))
	
	b = BSpline(d, 1)
	for i in range(10):
		for j in range(10):
			print(str(b.eval([0.9*i,0.9*j])) + " == " + str(0.81*i*j))
	
	print(b.evalJacobian([3,3]))
	print(b.evalHessian([3,3]))
	
	b.save("test.bspline")
	b2 = BSpline("test.bspline")
	
	print(b.eval([2,3]))
github h2oai / driverlessai-recipes / transformers / targetencoding / ExpandingMean.py View on Github external
def transform(self, X: dt.Frame):
        transformed_X = X[:, :, dt.join(self._group_means)][:, -1]
        return dt.DataTable(transformed_X.to_pandas().fillna(self.dataset_mean))