Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for j in range(5):
# FIX! apparently we can't reuse a model key after a cancel
kwargs['destination_key'] = 'GBMBad' + str(j)
# rjson error in poll_url: Job was cancelled by user!
GBMFirstResult = h2o_cmd.runGBM(parseResult=parseResult, noPoll=True, **kwargs)
jobids.append(GBMFirstResult['job_key'])
h2o.check_sandbox_for_errors()
# try ray's 'models' request to see if anything blows up
modelsParams = {
'key': None,
'find_compatible_frames': 0,
'score_frame': None
}
modelsResult = h2o.nodes[0].models(timeoutSecs=10, **modelsParams)
print "modelsResult:", h2o.dump_json(modelsResult)
# have to pass the job id
# for j in jobids:
# h2o.nodes[0].jobs_cancel(key=j)
h2o_jobs.cancelAllJobs()
# PUB-361. going to wait after cancel before reusing keys
time.sleep(3)
# am I getting a subsequent parse job cancelled?
h2o_jobs.showAllJobs()
if DELETE_KEYS:
h2i.delete_keys_from_import_result(pattern=csvFilename, importResult=importResult)
rSummary = h2o_cmd.runSummary(key='s0.hex', cols='0')
# h2o_cmd.infoFromSummary(rSummary)
sSummary = h2o_cmd.runSummary(key='s1.hex', cols='0')
# h2o_cmd.infoFromSummary(sSummary)
sSummary = h2o_cmd.runSummary(key='s2.hex', cols='0')
# h2o_cmd.infoFromSummary(sSummary)
# since there are no NAs in covtype, r.hex and s.hex should be identical?
if 1==0:
print "Comparing summary of r.hex to summary of s.hex"
df = h2o_util.JsonDiff(rSummary, sSummary, with_values=True)
# time can be different
print "df.difference:", h2o.dump_json(df.difference)
self.assertLess(len(df.difference), 2)
print "results from the individual exec expresssions (ignore last which was an apply)"
print "results:", results
self.assertEqual(results, [0.0, 0.0, 0.0, 1859.0, 581012.0, 581012.0, 2959.365300544567, 1859.0, 1859.0])
def test_tree_view(self):
parseResult = h2i.import_parse(bucket='smalldata', path='poker/poker1000', hex_key='poker1000.hex', schema='put')
h2o_cmd.runRF(parseResult=parseResult, trees=50, model_key="model0", timeoutSecs=10)
for n in range(1):
a = h2o_cmd.runRFTreeView(n=n, data_key='poker1000.hex', model_key="model0", timeoutSecs=10)
print (h2o.dump_json(a))
hex_key = 'iris2.csv.hex'
parseResult = h2i.import_parse(bucket='smalldata', path='iris/iris2.csv', schema='put', hex_key=hex_key)
h2o_cmd.runRF(parseResult=parseResult, ntrees=trees, destination_key="iris_rf_model", timeoutSecs=timeoutSecs)
print "Use H2O GeneratePredictionsPage with a H2O generated model and the same data key. Inspect/Summary result"
start = time.time()
predict = h2o.nodes[0].generate_predictions(model_key="iris_rf_model", data_key=hex_key,
prediction='predict.hex')
print "generate_predictions end on ", hex_key, " took", time.time() - start, 'seconds'
print "predict:", h2o.dump_json(predict)
csvPredictPathname = SYNDATASETS_DIR + "/" + "iris2.predict.csv"
h2o.nodes[0].csv_download(src_key='predict.hex', csvPathname=csvPredictPathname)
inspect = h2o_cmd.runInspect(key='predict.hex')
print "inspect:", h2o.dump_json(inspect)
# print h2o.dump_json(predict)
# no min/max any more with enums?
expectedCols = {
# "max": 2.0,
# "mean": 1.0,
# "min": 0.0,
"naCnt": 0,
# "name": 0,
# Enum or real?
# "type": "Real",
}
predictCols = inspect['cols'][0]
diffKeys = [k for k in expectedCols if predictCols[k] != expectedCols[k]]
def test_exec2_fast_locks_overlap(self):
csvPathname = 'iris/iris2.csv'
src_key='iris.csv'
if not AVOID_BUG:
# need the key name (pattern) to feed to parse)
(importResult, importPattern) = h2i.import_only(bucket='smalldata', path=csvPathname, schema='put',
src_key=src_key, timeoutSecs=10)
# just as a reminder of what these returns look like
print "importResult:", h2o.dump_json(importResult)
print "importPattern:", h2o.dump_json(importPattern)
y = 4
lastHexKey = None
for trial in range (1, 100):
if AVOID_BUG:
# need the key name (pattern) to feed to parse)
(importResult, importPattern) = h2i.import_only(bucket='smalldata', path=csvPathname, schema='put',
src_key=src_key, timeoutSecs=10)
# just as a reminder of what these returns look like
print "importResult:", h2o.dump_json(importResult)
print "importPattern:", h2o.dump_json(importPattern)
# make sure each parse is unique dest key (not in use)
hex_key = "iris2_" + str(trial) + ".hex"
# what if we kicked off another parse without waiting for it? I think the src key gets locked
# so we'd get lock issues on the src_key
def test_iostatus(self):
# wait a bit first?
time.sleep(5)
# Ask each node for iostatus statistics
for node in h2o.nodes:
stats = node.iostatus()
h2o.verboseprint(h2o.dump_json(stats))
histogram = stats['histogram']
# {
# u'i_o': u'TCP',
# u'peak_bytes_/_sec': 199690496.78920883,
# u'effective_bytes_/_sec': 21850666.666666668,
# u'r_w': u'write',
# u'cloud_node_idx': 2,
# u'window': 10
# }
print "\nProbing node:", str(node.h2o_addr) + ":" + str(node.port)
for k in histogram:
### print k
if k['window'] == 10:
i_o = k['i_o']
node = k['cloud_node_idx']
r_w = k['r_w']
else:
csvPathname = 'iris/iris2.csv'
start = time.time()
parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, schema='put')
rfResult = h2o_cmd.runRF(parseResult=parseResult, trees=6, timeoutSecs=10, rfView=False)
print "RF #%d" % trial, "started on ", csvPathname, 'took', time.time() - start, 'seconds'
print "rfResult", h2o.dump_json(rfResult)
model_key = rfResult['destination_key']
print "model_key:", model_key
if model_key in modelKeyDict:
raise Exception("same model_key used in RF #%d that matches prior RF #%d" % (trial, modelKeyDict[model_key]))
modelKeyDict[model_key] = trial
# just show the jobs still going, if any. maybe none, because short (iris)
a = h2o.nodes[0].jobs_admin()
print "jobs_admin():", h2o.dump_json(a)
csvFilenameList = random.sample(csvFilenameAll,8)
# Alternatively: do the list in order! Note the order is easy to hard
else:
csvFilenameList = csvFilenameAll
# save the first, for all comparisions, to avoid slow drift with each iteration
importFolderPath = "datasets"
trial = 0
for csvFilename in csvFilenameList:
# creates csvFilename.hex from file in hdfs dir
csvPathname = importFolderPath + "/" + csvFilename
timeoutSecs = 1000
# do an import first, because we want to get the size of the file
(importResult, importPattern) = h2i.import_only(path=csvPathname, schema="maprfs", timeoutSecs=timeoutSecs)
print "importResult:", h2o.dump_json(importResult)
succeeded = importResult['files']
fails = importResult['fails']
if len(succeeded) < 1:
raise Exception("Should have imported at least 1 key for %s" % csvPathname)
# just do a search
foundIt = None
for f in succeeded:
if csvPathname in f:
foundIt = f
break
if not foundIt:
raise Exception("Should have found %s in the imported keys for %s" % (importPattern, csvPathname))
def test_parse_specific_case1(self):
SYNDATASETS_DIR = h2o.make_syn_dir()
hex_key = "a.hex"
for (dataset, expNumRows, expNumCols, expNaCnt, expType) in tryList:
csvFilename = 'specific_' + str(expNumRows) + "x" + str(expNumCols) + '.csv'
csvPathname = SYNDATASETS_DIR + '/' + csvFilename
write_syn_dataset(csvPathname, dataset)
parseResult = h2i.import_parse(path=csvPathname, schema='put', header=0,
hex_key=hex_key, timeoutSecs=10, doSummary=False)
inspect = h2o_cmd.runInspect(None, parseResult['destination_key'], timeoutSecs=60)
print "inspect:", h2o.dump_json(inspect)
numRows = inspect['numRows']
self.assertEqual(numRows, expNumRows, msg='Wrong numRows: %s Expected: %s' % (numRows, expNumRows))
numCols = inspect['numCols']
self.assertEqual(numCols, expNumCols, msg='Wrong numCols: %s Expected: %s' % (numCols, expNumCols))
# this is required for the test setup
assert(len(expNaCnt)>=expNumCols)
assert(len(expType)>=expNumCols)
for k in range(expNumCols):
naCnt = inspect['cols'][k]['naCnt']
self.assertEqual(expNaCnt[k], naCnt, msg='col %s naCnt %d should be %s' % (k, naCnt, expNaCnt[k]))
stype = inspect['cols'][k]['type']
self.assertEqual(expType[k], stype, msg='col %s type %s should be %s' % (k, stype, expType[k]))
'response': response,
}
colX = h2o_util.pickRandParams(paramDict, params)
kwargs = params.copy()
timeoutSecs = 120
# chagne response to factor
execExpr = 'covtype.hex[,54+1] = factor(covtype.hex[,54+1] != 5)' # turn 7-class problem into binomial such that AUC can work below..
resultExec, ncols = h2e.exec_expr(execExpr=execExpr)
start = time.time()
bayesResult = h2o.nodes[0].naive_bayes(timeoutSecs=timeoutSecs, source='covtype.hex', **kwargs)
print "bayes end on ", csvPathname, 'took', time.time() - start, 'seconds'
print "bayes result:", h2o.dump_json(bayesResult)
nb_model = bayesResult['nb_model']
ncats = nb_model['ncats']
nnums = nb_model['nnums']
pcond = nb_model['pcond']
pprior = nb_model['pprior']
rescnt = nb_model['rescnt']
modelClassDist = nb_model['_modelClassDist']
names = nb_model['_names']
domains = nb_model['_domains']
priorClassDist = nb_model['_priorClassDist']
model_key = nb_model['_key']
# is it an error to get std dev of 0 after predicting?
print "Doing predict with same dataset, and the bayes model"