Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_GenParity1(self):
SYNDATASETS_DIR = h2o.make_syn_dir()
parityPl = h2o.find_file('syn_scripts/parity.pl')
# two row dataset gets this. Avoiding it for now
# java.lang.ArrayIndexOutOfBoundsException: 1
# at hex.rf.Data.sample_fair(Data.java:149)
# always match the run below!
print "\nAssuming two row dataset is illegal. avoiding"
for x in xrange (10,100,10):
shCmdString = "perl " + parityPl + " 128 4 "+ str(x) + " quad " + SYNDATASETS_DIR
h2o.spawn_cmd_and_wait('parity.pl', shCmdString.split())
# algorithm for creating the path and filename is hardwired in parity.pl.
csvFilename = "parity_128_4_" + str(x) + "_quad.data"
trees = 6
def test_rf_1ktrees_fvec(self):
SYNDATASETS_DIR = h2o.make_syn_dir()
# always match the run below!
# just using one file for now
for x in [500]:
shCmdString = "perl " + h2o.find_file("syn_scripts/parity.pl") + " 128 4 "+ str(x) + " quad " + SYNDATASETS_DIR
h2o.spawn_cmd_and_wait('parity.pl', shCmdString.split(),4)
csvFilename = "parity_128_4_" + str(x) + "_quad.data"
# always match the gen above!
for trial in range (1,5):
sys.stdout.write('.')
sys.stdout.flush()
csvFilename = "parity_128_4_" + str(500) + "_quad.data"
csvPathname = SYNDATASETS_DIR + '/' + csvFilename
def test_rf_predict_fvec(self):
h2b.browseTheCloud()
SYNDATASETS_DIR = h2o.make_syn_dir()
trees = 6
timeoutSecs = 20
hex_key = 'iris2.csv.hex'
parseResult = h2i.import_parse(bucket='smalldata', path='iris/iris2.csv', schema='put', hex_key=hex_key)
h2o_cmd.runRF(parseResult=parseResult, ntrees=trees, destination_key="iris_rf_model", timeoutSecs=timeoutSecs)
print "Use H2O GeneratePredictionsPage with a H2O generated model and the same data key. Inspect/Summary result"
start = time.time()
predict = h2o.nodes[0].generate_predictions(model_key="iris_rf_model", data_key=hex_key,
prediction='predict.hex')
print "generate_predictions end on ", hex_key, " took", time.time() - start, 'seconds'
print "predict:", h2o.dump_json(predict)
csvPredictPathname = SYNDATASETS_DIR + "/" + "iris2.predict.csv"
h2o.nodes[0].csv_download(src_key='predict.hex', csvPathname=csvPredictPathname)
def test_GLM2_mnist(self):
if not SCIPY_INSTALLED:
pass
else:
SYNDATASETS_DIR = h2o.make_syn_dir()
csvFilelist = [
(10000, 500, 'cA', 60),
]
trial = 0
for (rowCount, colCount, hex_key, timeoutSecs) in csvFilelist:
trialStart = time.time()
# PARSE test****************************************
csvFilename = 'syn_' + "binary" + "_" + str(rowCount) + 'x' + str(colCount) + '.csv'
csvPathname = SYNDATASETS_DIR + "/" + csvFilename
write_syn_dataset(csvPathname, rowCount, colCount)
start = time.time()
parseResult = h2i.import_parse(path=csvPathname, schema='put',
def test_exec2_row_range(self):
SYNDATASETS_DIR = h2o.make_syn_dir()
tryList = [
(1000000, 5, 'cA', 200),
]
# h2b.browseTheCloud()
for (rowCount, colCount, hex_key, timeoutSecs) in tryList:
SEEDPERFILE = random.randint(0, sys.maxint)
csvFilename = 'syn_' + str(SEEDPERFILE) + "_" + str(rowCount) + 'x' + str(colCount) + '.csv'
csvPathname = SYNDATASETS_DIR + '/' + csvFilename
print "\nCreating random", csvPathname
write_syn_dataset(csvPathname, rowCount, colCount, SEEDPERFILE)
start = time.time()
parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key,
def test_rf_predict3_fvec(self):
SYNDATASETS_DIR = h2o.make_syn_dir()
timeoutSecs = 600
predictHexKey = 'predict_0.hex'
predictCsv = 'predict_0.csv'
actualCsv = 'actual_0.csv'
if 1==1:
y = 4 # last col
response = 'response'
skipSrcOutputHeader = 1
skipPredictHeader = 1
trees = 40
bucket = 'smalldata'
csvPathname = 'iris/iris2.csv'
hexKey = 'iris2.csv.hex'
# translate = {'setosa': 0.0, 'versicolor': 1.0, 'virginica': 2.0}
def test_PCA_many_cols(self):
SYNDATASETS_DIR = h2o.make_syn_dir()
tryList = [
(10000, 10, 'cA', 300),
(10000, 50, 'cB', 300),
(10000, 100, 'cC', 300),
# (10000, 500, 'cH', 300),
# (10000, 1000, 'cI', 300),
]
### h2b.browseTheCloud()
for (rowCount, colCount, hex_key, timeoutSecs) in tryList:
print (rowCount, colCount, hex_key, timeoutSecs)
SEEDPERFILE = random.randint(0, sys.maxint)
# csvFilename = 'syn_' + str(SEEDPERFILE) + "_" + str(rowCount) + 'x' + str(colCount) + '.csv'
csvFilename = 'syn_' + "binary" + "_" + str(rowCount) + 'x' + str(colCount) + '.csv'
csvPathname = SYNDATASETS_DIR + '/' + csvFilename
def test_summary2_uniform_int_w_NA(self):
SYNDATASETS_DIR = h2o.make_syn_dir()
M = 100
tryList = [
# colname, (min, 25th, 50th, 75th, max)
(ROWS, 1, 'B.hex', 1, 1000*M, ('C1', 1.0*M, 250.0*M, 500.0*M, 750.0*M, 1000.0*M)),
(ROWS, 1, 'B.hex', 1, 1000, ('C1', 1.0, 250.0, 500.0, 750.0, 1000.0)),
(ROWS, 1, 'x.hex', 1, 20000, ('C1', 1.0, 5000.0, 10000.0, 15000.0, 20000.0)),
(ROWS, 1, 'x.hex', -5000, 0, ('C1', -5000.00, -3750.0, -2500.0, -1250.0, 0)),
(ROWS, 1, 'x.hex', -100000, 100000, ('C1', -100000.0, -50000.0, 0, 50000.0, 100000.0)),
# (ROWS, 1, 'A.hex', 1, 101, ('C1', 1.0, 26.00, 51.00, 76.00, 101.0)),
# (ROWS, 1, 'A.hex', -99, 99, ('C1', -99, -49.0, 0, 49.00, 99)),
(ROWS, 1, 'B.hex', 1, 10000, ('C1', 1.0, 2501.0, 5001.0, 7501.0, 10000.0)),
(ROWS, 1, 'B.hex', -100, 100, ('C1', -100.0, -50.0, 0.0, 50.0, 100.0)),
(ROWS, 1, 'C.hex', 1, 100000, ('C1', 1.0, 25001.0, 50001.0, 75001.0, 100000.0)),
def test_summary2_unifiles2(self):
SYNDATASETS_DIR = h2o.make_syn_dir()
# new with 1000 bins. copy expected from R
tryList = [
# colname, (min, 25th, 50th, 75th, max)
('breadth.csv', 'b.hex', False, [ ('C1', None, None, None, None, None)], 'smalldata', 'quantiles'),
# ('wonkysummary.csv', 'b.hex', False, [ ('X1', 7, 22, 876713, 100008, 1000046)], 'smalldata', None),
('wonkysummary.csv', 'b.hex', True, [ ('X1', 7.00, None, None, None, 1000046.0)], 'smalldata', None),
('covtype.data', 'c.hex', False, [ ('C1', None, None, None, None, None)], 'home-0xdiag-datasets', 'standard'),
]
timeoutSecs = 10
trial = 1
n = h2o.nodes[0]
lenNodes = len(h2o.nodes)
x = 0
def test_many_fp_formats_libsvm_fvec(self):
# h2b.browseTheCloud()
SYNDATASETS_DIR = h2o.make_syn_dir()
tryList = [
(10, 10, 'cA', 30, 'sparse50'),
(100, 10, 'cB', 30, 'sparse'),
(100000, 100, 'cC', 30, 'sparse'),
(1000, 10, 'cD', 30, 'sparse50'),
(100, 100, 'cE', 30,'sparse'),
(100, 100, 'cF', 30,'sparse50'),
]
# h2b.browseTheCloud()
for (rowCount, colCount, hex_key, timeoutSecs, distribution) in tryList:
NUM_CASES = h2o_util.fp_format()
for sel in [random.randint(0,NUM_CASES-1)]: # len(caseList)
SEEDPERFILE = random.randint(0, sys.maxint)
csvFilename = "syn_%s_%s_%s_%s.csv" % (SEEDPERFILE, sel, rowCount, colCount)