How to use the xgboost.XGBRegressor function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmlc / xgboost / tests / python-gpu / test_gpu_prediction.py View on Github external
X_train, y_train = X[:tr_size, :], y[:tr_size]
        X_test, y_test = X[tr_size:, :], y[tr_size:]

        # First with cpu_predictor
        params = {'tree_method': 'gpu_hist',
                  'predictor': 'cpu_predictor',
                  'n_jobs': -1,
                  'seed': 123}
        m = xgb.XGBRegressor(**params).fit(X_train, y_train)
        cpu_train_score = m.score(X_train, y_train)
        cpu_test_score = m.score(X_test, y_test)

        # Now with gpu_predictor
        params['predictor'] = 'gpu_predictor'

        m = xgb.XGBRegressor(**params).fit(X_train, y_train)
        gpu_train_score = m.score(X_train, y_train)
        gpu_test_score = m.score(X_test, y_test)

        assert np.allclose(cpu_train_score, gpu_train_score)
        assert np.allclose(cpu_test_score, gpu_test_score)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_xgboost_direct():
    try:
        import xgboost
    except Exception as e:
        print("Skipping test_xgboost_direct!")
        return
    import shap

    N = 100
    M = 4
    X = np.random.randn(N,M)
    y = np.random.randn(N)  

    model = xgboost.XGBRegressor()
    model.fit(X, y)

    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)

    assert np.allclose(shap_values[0,:], _brute_force_tree_shap(explainer.model, X[0,:]))
github dmlc / xgboost / demo / guide-python / sklearn_examples.py View on Github external
y = iris['target']
X = iris['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
    xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
    predictions = xgb_model.predict(X[test_index])
    actuals = y[test_index]
    print(confusion_matrix(actuals, predictions))

print("Boston Housing: regression")
boston = load_boston()
y = boston['target']
X = boston['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
    xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
    predictions = xgb_model.predict(X[test_index])
    actuals = y[test_index]
    print(mean_squared_error(actuals, predictions))

print("Parameter optimization")
y = boston['target']
X = boston['data']
xgb_model = xgb.XGBRegressor()
clf = GridSearchCV(xgb_model,
                   {'max_depth': [2,4,6],
                    'n_estimators': [50,100,200]}, verbose=1)
clf.fit(X,y)
print(clf.best_score_)
print(clf.best_params_)

# The sklearn API models are picklable
github mlflow / mlflow-apps / gbt-regression / train_gbt.py View on Github external
def train(args, pandasData):

	# Split data into a labels dataframe and a features dataframe
	labels = pandasData[args.label_col].values
	features = pandasData[args.feat_cols].values

	# Hold out test_percent of the data for testing.  We will use the rest for training.
	trainingFeatures, testFeatures, trainingLabels, testLabels = train_test_split(features, labels, test_size=args.test_percent)
	ntrain, ntest = len(trainingLabels), len(testLabels)
	print("Split data randomly into 2 sets: {} training and {} test instances.".format(ntrain, ntest))

	# We will use a GBT regressor model.
	xgbr = xgb.XGBRegressor(max_depth = args.m_depth, learning_rate = args.learning_rate, n_estimators = args.n_trees)

	# Here we train the model and keep track of how long it takes.
	start_time = time()
	xgbr.fit(trainingFeatures, trainingLabels, eval_metric = args.loss)

	# Calculating the score of the model.
	r2_score_training = xgbr.score(trainingFeatures, trainingLabels)
	r2_score_test = 0
	if args.test_percent != 0:
		r2_score_test = xgbr.score(testFeatures, testLabels)
	timed = time() - start_time
	print("Training set score:", r2_score_training)
	if args.test_percent != 0:
		print("Test set score:", r2_score_test)

	#Logging the parameters for viewing later. Can be found in the folder mlruns/.
github produvia / ai-platform / tasks / time-series / time-series-forecasting / a65761f6-78d4-4fa7-988c-4ac6e7c07421 / src / runner.py View on Github external
def grid_search(self, xtr, ytr):
        gbm = xgb.XGBRegressor()
        reg_cv = GridSearchCV(gbm,
                              {"colsample_bytree": self.colsample_bytree, "min_child_weight": self.min_child_weight,
                               'max_depth': self.max_depth, 'n_estimators': self.n_estimators}, verbose=1)
        reg_cv.fit(xtr, ytr)
        return reg_cv
github BayesWitnesses / m2cgen / tools / generate_code_examples.py View on Github external
tree.DecisionTreeClassifier(**TREE_PARAMS),
        utils.train_model_classification,
    ),
    (
        "regression", "random_forest",
        ensemble.RandomForestRegressor(**FOREST_PARAMS),
        utils.train_model_regression,
    ),
    (
        "classification", "random_forest",
        ensemble.RandomForestClassifier(**FOREST_PARAMS),
        utils.train_model_classification,
    ),
    (
        "regression", "xgboost",
        xgboost.XGBRegressor(**XGBOOST_PARAMS),
        utils.train_model_regression,
    ),
    (
        "classification", "xgboost",
        xgboost.XGBClassifier(**XGBOOST_PARAMS),
        utils.train_model_classification,
    ),
    (
        "regression", "lightgbm",
        lightgbm.LGBMRegressor(**LIGHT_GBM_PARAMS),
        utils.train_model_regression,
    ),
    (
        "classification", "lightgbm",
        lightgbm.LGBMClassifier(**LIGHT_GBM_PARAMS),
        utils.train_model_classification,
github gilad-rubin / hypster / hypster / estimators / regression / xgboost.py View on Github external
def create_model(self):
        # TODO: if learning rates are identical throughout - create a regular Classifier
        self.model_params['n_estimators'] = self.best_n_iterations
        self.model_params['learning_rate'] = self.learning_rates[0]

        self.model_params['n_jobs'] = self.model_params.pop('nthread')
        self.model_params['random_state'] = self.model_params.pop('seed')
        self.model_params['reg_lambda'] = self.model_params.pop('lambda')
        self.model_params['reg_alpha'] = self.model_params.pop('alpha')

        final_model = XGBRegressor(**self.model_params)
        #final_model = XGBRegressorLR(learning_rates=self.learning_rates, **self.model_params)
        return final_model

class XGBRegressorLR(XGBRegressor):
    def __init__(self, learning_rates = None,
                 max_depth=3, learning_rate=1, n_estimators=100,
                 verbosity=1,
                 objective="reg:squarederror", booster="gbtree", n_jobs=1, nthread=None, gamma=0,
                 min_child_weight=1, max_delta_step=0, subsample=0.8, colsample_bytree=1,
                 colsample_bylevel=1, colsample_bynode=0.8, reg_alpha=0, reg_lambda=1,
                 scale_pos_weight=1, base_score=0.5, random_state=0, seed=None,
                 missing=None, **kwargs):

        if 'learning_rates' in kwargs:
            self.learning_rates = kwargs.pop('learning_rates')
        else:
            self.learning_rates = learning_rates

        super(XGBRegressorLR, self).__init__(
            max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators,
github deepchem / deepchem / examples / benchmark_xgboost.py View on Github external
def model_builder(model_dir_xgb):
      xgboost_model = xgboost.XGBRegressor(
          max_depth=max_depth,
          learning_rate=learning_rate,
          n_estimators=n_estimators,
          gamma=gamma,
          min_child_weight=min_child_weight,
          max_delta_step=max_delta_step,
          subsample=subsample,
          colsample_bytree=colsample_bytree,
          colsample_bylevel=colsample_bylevel,
          reg_alpha=reg_alpha,
          reg_lambda=reg_lambda,
          scale_pos_weight=scale_pos_weight,
          base_score=base_score,
          seed=seed)
      return dc.models.xgboost_models.XGBoostModel(xgboost_model, model_dir_xgb,
                                                   **esr)
github ChenglongChen / Kaggle_HomeDepot / Code / Igor&Kostia / ensemble_script_random_version.py View on Github external
#     Our level 0 classifiers
    clfs = [
        ExtraTreesRegressor(n_estimators = n_trees * 20),
        BaggingRegressor(base_estimator=xgb.XGBRegressor(**xgb_params0), n_estimators=10, random_state=np.random.RandomState(2016) ),
        RandomForestRegressor(n_estimators=500, max_depth=5, min_samples_leaf=6, max_features=0.9,\
           min_samples_split=1, n_jobs= -1, random_state=2014),
        AdaBoostRegressor(base_estimator=None, n_estimators=250, learning_rate=0.03, loss='linear', random_state=20160703),
        BaggingRegressor(base_estimator=None, n_estimators=200, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=1, random_state=None, verbose=0),
        neighbors.KNeighborsRegressor(128, weights="uniform", leaf_size=5),
        SVR(kernel='rbf', C=0.2, gamma=0.1),
        SVR(kernel='rbf', C=0.3, gamma=0.5),
        SVR(kernel='linear', C=0.2),
        SVR(kernel='poly', C=0.2, degree=2),
        GradientBoostingRegressor(n_estimators=500, max_depth=6, min_samples_split=1, min_samples_leaf=15, learning_rate=0.035, loss='ls',random_state=10),
        xgb.XGBRegressor(**xgb_params0),
        xgb.XGBRegressor(**xgb_params1),
        DecisionTreeRegressor(criterion='mse', splitter='random', max_depth=4, min_samples_split=7, min_samples_leaf=30, min_weight_fraction_leaf=0.0, max_features='sqrt', random_state=None, max_leaf_nodes=None, presort=False)
    ]
   
    # Ready for cross validation
    skf = list(StratifiedKFold(Y_dev, n_folds, shuffle=True))
    blend_train = np.zeros((X_dev.shape[0], len(clfs))) # Number of training data x Number of classifiers
    blend_test = np.zeros((X_test.shape[0], len(clfs))) # Number of testing data x Number of classifiers
     
    print 'X_test.shape = %s' % (str(X_test.shape))
    print 'blend_train.shape = %s' % (str(blend_train.shape))
    print 'blend_test.shape = %s' % (str(blend_test.shape))
    
    # For each classifier, we train the number of fold times (=len(skf))
    for j, clf in enumerate(clfs):
        print 'Training classifier [%s]' % (clf) 
        print 'Training classifier [%s]' % ((j+1.0)/len(clfs))
github apple / coremltools / coremltools / converters / xgboost / _tree_ensemble.py View on Github external
import json
    import os
    feature_map = None
    if isinstance(model,  (_xgboost.core.Booster, _xgboost.XGBRegressor)):

        # Testing a few corner cases that we don't support
        if isinstance(model, _xgboost.XGBRegressor):
            try:
                objective = model.get_xgb_params()["objective"]
            except:
                objective = None
            if objective in ["reg:gamma", "reg:tweedie"]:
                raise ValueError("Regression objective '%s' not supported for export." % objective)

        # Now use the booster API.
        if isinstance(model, _xgboost.XGBRegressor):
            # Name change in 0.7
            if hasattr(model, 'get_booster'):
                model = model.get_booster()
            else:
                model = model.booster()

        # Xgboost sometimes has feature names in there. Sometimes does not.
        if (feature_names is None) and (model.feature_names is None):
            raise ValueError("Feature names not present in the model. Must be provided during conversion.")
            feature_names = model.feature_names
        if feature_names is None:
            feature_names = model.feature_names

        xgb_model_str = model.get_dump(with_stats=True, dump_format = 'json')

        if model.feature_names: