Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_Cloud(self):
# FIX! weird timeout H2O exceptions with >8? maybe shouldn't
# don't know if we care
ports_per_node = 2
tryNodes = 5
for trial in range(10):
h2o.verboseprint("Trying cloud of", tryNodes)
sys.stdout.write('.')
sys.stdout.flush()
start = time.time()
h2o.init(tryNodes, retryDelaySecs=2, timeoutSecs=max(30,10*tryNodes), java_heap_GB=1)
print "trial #%d: Build cloud of %d in %d secs" % (trial, tryNodes, (time.time() - start))
h2o.verify_cloud_size()
time.sleep(5)
h2o.tear_down_cloud()
if __name__ == '__main__':
def changeTokens(self,rows,tokenCase):
[cOpen,cClose] = self.tokenChangeDict[tokenCase]
newRows = []
for r in rows:
# don't quote lines that start with #
# can quote lines start with some spaces or tabs? maybe
comment = re.match(r'^[ \t]*#', r)
empty = re.match(r'^$',r)
if not (comment or empty):
r = re.sub('^',cOpen,r)
r = re.sub('\|',cClose + '|' + cOpen,r)
r = re.sub('$',cClose,r)
h2o.verboseprint(r)
newRows.append(r)
return newRows
csvFilename = 'syn_' + "binary" + "_" + str(rowCount) + 'x' + str(colCount) + '.csv'
csvPathname = SYNDATASETS_DIR + '/' + csvFilename
print "Creating random", csvPathname
write_syn_dataset(csvPathname, rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE)
csvPathnameFull = h2i.find_folder_and_filename(None, csvPathname, returnFullPath=True)
parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, timeoutSecs=30, doSummary=False)
print "Parse result['destination_key']:", parseResult['destination_key']
inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
print "\n" + csvFilename
numRows = inspect["numRows"]
numCols = inspect["numCols"]
summaryResult = h2o_cmd.runSummary(key=hex_key)
h2o.verboseprint("summaryResult:", h2o.dump_json(summaryResult))
# only one column
column = summaryResult['summaries'][0]
colname = column['colname']
self.assertEqual(colname, expected[0])
coltype = column['type']
nacnt = column['nacnt']
stats = column['stats']
stattype= stats['type']
# FIX! we should compare mean and sd to expected?
mean = stats['mean']
sd = stats['sd']
print "colname:", colname, "mean (2 places):", h2o_util.twoDecimals(mean)
def parseFile(self, bucket, pathname, timeoutSecs, header, **kwargs):
# this can get redirected
if USE_LOCAL:
schema = None
else:
schema = 's3n'
start = time.time()
parseResult = h2i.import_parse(bucket=bucket, path=pathname, schema='local', timeoutSecs=180)
parse_time = time.time() - start
h2o.verboseprint("parse took {0} sec".format(parse_time))
parseResult['python_call_timer'] = parse_time
return parseResult
def parseS3File(self, s3bucket, filename, **kwargs):
start = time.time()
parseResult = h2o_cmd.parseS3File(bucket=s3bucket, filename=filename, **kwargs)
parse_time = time.time() - start
h2o.verboseprint("py-S3 parse took {0} sec".format(parse_time))
parseResult['python_call_timer'] = parse_time
return parseResult
def test_B_putfile_and_getfile_to_all_nodes(self):
csvfile = file_to_put()
nodeTry = 0
for node in h2o.nodes:
sys.stdout.write('.')
sys.stdout.flush()
h2o.verboseprint("put_file", csvfile, "to", node)
key = node.put_file(csvfile)
h2o.verboseprint("put_file ok for node", nodeTry)
print "starting get_key..this is the same as the original source?"
r = node.get_key(key)
f = open(csvfile)
self.diff(r, f)
h2o.verboseprint("put_file filesize ok")
f.close()
nodeTry += 1
str(set) + "_" + \
str(eolCase) + "_" + \
str(tokenCase) + "_" + \
str(sepCase) + \
'.data'
self.writeRows(csvPathname,newRows2,eol)
if "'" in self.tokenChangeDict[tokenCase][0]:
single_quotes = 1
else:
single_quotes = 0
parseResult = h2i.import_parse(path=csvPathname, schema='put', single_quotes=single_quotes,
noPrint=not h2o.verbose)
if DO_RF:
h2o_cmd.runRF(parseResult=parseResult, trees=1, timeoutSecs=30, retryDelaySecs=0.1)
h2o.verboseprint("Set", set)
sys.stdout.write('.')
sys.stdout.flush()
parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname,
schema='put', hex_key=hex_key, timeoutSecs=10, doSummary=False)
print "Parse result['destination_key']:", parseResult['destination_key']
# We should be able to see the parse result?
inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
print "\n" + csvFilename
numRows = inspect["numRows"]
numCols = inspect["numCols"]
# okay to get more cols than we want
# okay to vary MAX_QBINS because we adjust the expected accuracy
summaryResult = h2o_cmd.runSummary(key=hex_key, max_qbins=MAX_QBINS)
h2o.verboseprint("summaryResult:", h2o.dump_json(summaryResult))
summaries = summaryResult['summaries']
scipyCol = 0
for expected, column in zip(expectedCols, summaries):
colname = column['colname']
if expected[0]:
self.assertEqual(colname, expected[0]), colname, expected[0]
else:
# if the colname is None, skip it (so we don't barf on strings on the h2o quantile page
scipyCol += 1
continue
quantile = 0.5 if DO_MEDIAN else .999
# h2o has problem if a list of columns (or dictionary) is passed to 'column' param
q = h2o.nodes[0].quantiles(source_key=hex_key, column=column['colname'],
quantile=quantile, max_qbins=MAX_QBINS, multiple_pass=2, interpolation_type=7) # for comparing to summary2
csvPathname = SYNDATASETS_DIR + '/' + csvFilename
print "Creating random", csvPathname
write_syn_dataset(csvPathname, rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE)
csvPathnameFull = h2i.find_folder_and_filename(None, csvPathname, returnFullPath=True)
parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, timeoutSecs=60, doSummary=False)
print "Parse result['destination_key']:", parseResult['destination_key']
inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
print "\n" + csvFilename
numRows = inspect["numRows"]
numCols = inspect["numCols"]
summaryResult = h2o_cmd.runSummary(key=hex_key, max_qbins=MAX_QBINS)
h2o.verboseprint("summaryResult:", h2o.dump_json(summaryResult))
# only one column
column = summaryResult['summaries'][0]
colname = column['colname']
self.assertEqual(colname, expected[0])
coltype = column['type']
nacnt = column['nacnt']
stats = column['stats']
stattype= stats['type']
# FIX! we should compare mean and sd to expected?
mean = stats['mean']
sd = stats['sd']
# assume the remote user has a /home/ (linux targets?)
# This only affects import folder path name generation by python tests
if paramsToUse['username']:
paramsToUse['h2o_remote_buckets_root'] = "/home/" + paramsToUse['username']
h2o.verboseprint("All build_cloud_with_hosts params:", paramsToUse)
#********************
global hosts
# Update: special case paramsToUse['ip'] = ["127.0.0.1"] and use the normal build_cloud
# this allows all the tests in testdir_host to be run with a special config that points to 127.0.0.1
# hosts should be None for everyone if normal build_cloud is desired
if paramsToUse['ip']== ["127.0.0.1"]:
hosts = None
else:
h2o.verboseprint("About to RemoteHost, likely bad ip if hangs")
hosts = []
for h in paramsToUse['ip']:
h2o.verboseprint("Connecting to:", h)
# expand any ~ or ~user in the string
key_filename = paramsToUse['key_filename']
if key_filename: # don't try to expand if None
key_filename=os.path.expanduser(key_filename)
hosts.append(h2o.RemoteHost(addr=h,
username=paramsToUse['username'], password=paramsToUse['password'], key_filename=key_filename))
# done with these, don't pass to build_cloud
paramsToUse.pop('ip') # this was the list of ip's from the config file, replaced by 'hosts' to build_cloud
# we want to save username in the node info. don't pop
# paramsToUse.pop('username')
paramsToUse.pop('password')