How to use the h2o.beta_features function in h2o

To help you get started, we’ve selected a few h2o examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / h2o-2 / py / testdir_single_jvm / test_exec2_operators.py View on Github external
def test_exec2_operators(self):
        h2o.beta_features = True
        bucket = ''
        csvPathname = 'testdata/airlines/year2013.csv'
        hexKey = 'i.hex'
        parseResult = h2i.import_parse(bucket=bucket, path=csvPathname, schema='put', hex_key=hexKey)

        for resultKey, execExpr in initList:
            h2e.exec_expr(h2o.nodes[0], execExpr, resultKey=None, timeoutSecs=4)
        start = time.time()
        # h2e.exec_expr_list_rand(len(h2o.nodes), exprList, 'r1.hex', maxTrials=200, timeoutSecs=10)
        h2e.exec_expr_list_rand(len(h2o.nodes), exprList, None, maxTrials=200, timeoutSecs=10)

        h2o.check_sandbox_for_errors()
        print "exec end on ", "operators" , 'took', time.time() - start, 'seconds'
github h2oai / h2o-2 / py / testdir_multi_jvm / test_rf_kaggle_fvec.py View on Github external
def test_cs_training(self):
        h2o.beta_features = True
        parseResult = h2i.import_parse(bucket='smalldata', path='kaggle/creditsample-training.csv.gz', schema='put', timeoutSecs=120)
        h2o_cmd.runRF(parseResult=parseResult, ntrees=100, max_depth=100, timeoutSecs=500, 
            response='SeriousDlqin2yrs')
        # h2b.browseJsonHistoryAsUrlLastMatch("RFView")
github h2oai / h2o-2 / py / testdir_release / c7 / test_c7_fvec.py View on Github external
def test_c7_rel(self):
        print "Running with h2o.beta_features=True for all"
        h2o.beta_features = True

        DO_INSPECT = True
        print "Since the python is not necessarily run as user=0xcust..., can't use a  schema='put' here"
        print "Want to be able to run python as jenkins"
        print "I guess for big 0xcust files, we don't need schema='put'"
        print "For files that we want to put (for testing put), we can get non-private files"

        csvFilename = 'part-00000b'
        if getpass.getuser()=='kevin':
            importFolderPath = '/home/hduser/data/'
        else:
            importFolderPath = '/mnt/0xcustomer-datasets/c2'

        csvPathname = importFolderPath + "/" + csvFilename

        # FIX! does 'separator=' take ints or ?? hex format
github h2oai / h2o-2 / py / testdir_multi_jvm / test_rand_inspect.py View on Github external
num_rows = origInspect['num_rows']
        num_cols = origInspect['num_cols']

        lenNodes = len(h2o.nodes)
        for trial in range (10):
            h2p.green_print("\nTrial", trial)
            # we want to use the boundary conditions, so have two level of random choices
            offset = good_choices(num_rows)
            view = good_choices(num_cols)
            # randomize the node used
            nodeX = random.randint(0,lenNodes-1)
            print "nodeX:", nodeX, "offset:", offset, "view:", view
            h2o.beta_features = False
            inspect_and_check(nodeX,destination_key,offset,view,origInspect)
            print "trying Inspect2 by flipping h2o.nodes[0].beta_features"
            h2o.beta_features = True
            # delay between the two inspects...bug around not getting autoframe in storeview?
            time.sleep(1)            
            inspect_and_check(nodeX,destination_key,offset,view,origInspect)
            h2o.beta_features = False

            # a fvec frame should have been created in the storeView
            time.sleep(1)            

            # loop looking for the autoframe to show up
            # o = len(origStoreViewResult['keys'])
            o = h2i.count_keys_at_all_nodes()
            retry = 0
            okay = False
            while retry==0 or not okay:
                newStoreViewResult = h2o_cmd.runStoreView(offset=0, view=1024, timeoutSecs=60)
                ## p = len(newStoreViewResult['keys'])
github h2oai / h2o-2 / py / testdir_multi_jvm / test_rf_hhp_2_fvec.py View on Github external
def test_rf_hhp_2_fvec(self):
        h2o.beta_features = True
        # NAs cause CM to zero..don't run for now
        csvPathname = 'hhp_9_17_12.predict.data.gz'
        parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, schema='put')
        h2o_cmd.runRF(parseResult=parseResult, trees=6, timeoutSecs=30)
github h2oai / h2o-2 / py / testdir_single_jvm / test_from_import_fvec.py View on Github external
(importResult, importPattern) = h2i.import_only(bucket='home-0xdiag-datasets', path=csvPathname, schema='local', timeoutSecs=50)
            parseResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvPathname, schema='local', hex_key='c.hex', 
                timeoutSecs=500, noPoll=True, doSummary=False) # can't do summary until parse result is correct json

            h2o.check_sandbox_for_errors()

            print "\nparseResult", h2o.dump_json(parseResult)

            # wait for it to show up in jobs?
            time.sleep(2)
            # no pattern waits for all
            h2o_jobs.pollWaitJobs(pattern=None, timeoutSecs=300, pollTimeoutSecs=10, retryDelaySecs=5)

            # hack it because no response from Parse2
            if h2o.beta_features:
                parseResult = {'destination_key': 'c.hex'}

            else:
                print csvFilename, 'parse time:', parseResult['response']['time']
                print "Parse result['destination_key']:", parseResult['destination_key']
                inspect = h2o_cmd.runInspect(key=parseResult['destination_key'], timeoutSecs=30)

            h2o.beta_features = True
            inspect = h2o_cmd.runInspect(key='c.hex', timeoutSecs=30)
            h2o.check_sandbox_for_errors()

            # have to avoid this on nflx data. colswap with exec
            # Exception: rjson error in gbm: Argument 'response' error: Only integer or enum/factor columns can be classified
            if importFolderPath=='manyfiles-nflx-gz':
                execExpr = 'c.hex=colSwap(c.hex,378,(c.hex[378]>15 ? 1 : 0))'
                resultExec = h2o_cmd.runExec(expression=execExpr)
github h2oai / h2o-2 / py / testdir_release / c4 / test_c4_four_billion_rows.py View on Github external
def test_four_billion_rows(self):
        h2o.beta_features = True
        timeoutSecs = 1500

        importFolderPath = "billions"
        csvFilenameList = [
            "four_billion_rows.csv",
            ]
        for csvFilename in csvFilenameList:
            csvPathname = importFolderPath + "/" + csvFilename
            start = time.time()

            # Parse*********************************
            parseResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvPathname, schema='local',
                timeoutSecs=timeoutSecs, pollTimeoutSecs=180)
            elapsed = time.time() - start
            print "Parse result['destination_key']:", parseResult['destination_key']
            print csvFilename, "completed in", elapsed, "seconds.", "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)
github h2oai / h2o-2 / py / testdir_single_jvm_fvec / test_KMeans_covtype_fvec.py View on Github external
def test_KMeans_covtype_fvec(self):
        h2o.beta_features = True
        csvFilenameList = [
            ('covtype.data', 800),
            ]

        importFolderPath = "standard"
        for csvFilename, timeoutSecs in csvFilenameList:
            # creates csvFilename.hex from file in importFolder dir 
            csvPathname = importFolderPath + "/" + csvFilename
            parseResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvPathname,
                timeoutSecs=2000, pollTimeoutSecs=60)
            inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])
            print "\n" + csvPathname, \
                "    numRows:", "{:,}".format(inspect['numRows']), \
                "    numCols:", "{:,}".format(inspect['numCols'])

            for trial in range(3):
github h2oai / h2o-2 / py / testdir_multi_jvm / test_GBM_params_rand2.py View on Github external
# hack
            if h2o.beta_features:
                h2j.pollWaitJobs(timeoutSecs=timeoutSecs, pollTimeoutSecs=timeoutSecs)
                print "Filling in the parseTestResult['destination_key'] for h2o"
                parseTestResult['destination_key'] = testKey

            elapsed = time.time() - start
            print "test parse end on ", testFilename, 'took', elapsed, 'seconds',\
                "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)
            print "test parse result:", parseTestResult['destination_key']

            # GBM (train iterate)****************************************
            inspect = h2o_cmd.runInspect(key=parseTestResult['destination_key'])
            paramsDict = define_gbm_params()
            for trial in range(3):
                h2o.beta_features = True
                # translate it (only really need to do once . out of loop?
                h2o_cmd.runInspect(key=parseTrainResult['destination_key'])
                ### h2o_cmd.runSummary(key=parsTraineResult['destination_key'])

                # use this to set any defaults you want if the pick doesn't set
                params = {
                    'response': 54, 
                    'ignored_cols_by_name': 
                    '0,1,2,3,4', 
                    'ntrees': 2,
                    'validation': parseTestResult['destination_key'],
                }
                h2o_gbm.pickRandGbmParams(paramsDict, params)
                print "Using these parameters for GBM: ", params
                kwargs = params.copy()
github h2oai / h2o-2 / py / testdir_multi_jvm / test_rf_big_rand_tree_fvec.py View on Github external
def test_rf_big_rand_tree_fvec(self):
        h2o.beta_features = True
        SYNDATASETS_DIR = h2o.make_syn_dir()
        csvFilename = "syn.csv"
        csvPathname = SYNDATASETS_DIR + '/' + csvFilename

        rowCount = 5000
        colCount = 1000
        write_syn_dataset(csvPathname, rowCount, colCount)

        for trial in range (1):
            # make sure all key names are unique, when we re-put and re-parse (h2o caching issues)
            src_key = csvFilename + "_" + str(trial)
            hex_key = csvFilename + "_" + str(trial) + ".hex"
            seed = random.randint(0,sys.maxint)
            # some cols can be dropped due to constant 0 or 1. make sure data set has all 0's and all 1's above
            # to guarantee no dropped cols!
            # kwargs = {'ntree': 3, 'depth': 50, 'seed': seed}