How to use the lightgbm.sklearn.LGBMClassifier function in lightgbm

To help you get started, we’ve selected a few lightgbm examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / LightGBM / tests / python_package_test / test_sklearn.py View on Github external
def test_pandas_sparse(self):
        import pandas as pd
        X = pd.DataFrame({"A": pd.SparseArray(np.random.permutation([0, 1, 2] * 100)),
                          "B": pd.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1, 0.2] * 60)),
                          "C": pd.SparseArray(np.random.permutation([True, False] * 150))})
        y = pd.Series(pd.SparseArray(np.random.permutation([0, 1] * 150)))
        X_test = pd.DataFrame({"A": pd.SparseArray(np.random.permutation([0, 2] * 30)),
                               "B": pd.SparseArray(np.random.permutation([0.0, 0.1, 0.2, -0.1] * 15)),
                               "C": pd.SparseArray(np.random.permutation([True, False] * 30))})
        if pd.__version__ >= '0.24.0':
            for dtype in pd.concat([X.dtypes, X_test.dtypes, pd.Series(y.dtypes)]):
                self.assertTrue(pd.api.types.is_sparse(dtype))
        gbm = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y)
        pred_sparse = gbm.predict(X_test, raw_score=True)
        if hasattr(X_test, 'sparse'):
            pred_dense = gbm.predict(X_test.sparse.to_dense(), raw_score=True)
        else:
            pred_dense = gbm.predict(X_test.to_dense(), raw_score=True)
        np.testing.assert_allclose(pred_sparse, pred_dense)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
# note: this test used to fail with lightgbm 2.2.1 with error:
    # ValueError: zero-size array to reduction operation maximum which has no identity
    # on TreeExplainer when trying to compute max nodes:
    # max_nodes = np.max([len(t.values) for t in self.trees])
    # The test does not fail with latest lightgbm 2.2.3 however
    try:
        import lightgbm
    except:
        print("Skipping test_lightgbm_constant_multiclass!")
        return
    import shap

    # train lightgbm model
    X, Y = shap.datasets.iris()
    Y.fill(1)
    model = lightgbm.sklearn.LGBMClassifier(num_classes=3, objective="multiclass")
    model.fit(X, Y)

    # explain the model's predictions using SHAP values
    shap_values = shap.TreeExplainer(model).shap_values(X)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_lightgbm_binary():
    try:
        import lightgbm
    except:
        print("Skipping test_lightgbm_binary!")
        return
    import shap
    from sklearn.model_selection import train_test_split

    # train lightgbm model
    X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.adult(), test_size=0.2, random_state=0)
    model = lightgbm.sklearn.LGBMClassifier()
    model.fit(X_train, Y_train)

    # explain the model's predictions using SHAP values
    shap_values = shap.TreeExplainer(model).shap_values(X_test)

    # validate structure of shap values, must be a list of ndarray for both classes
    assert isinstance(shap_values, list)
    assert len(shap_values) == 2

    # ensure plot works for first class
    shap.dependence_plot(0, shap_values[0], X_test, show=False)
github microsoft / LightGBM / tests / python_package_test / test_sklearn.py View on Github external
X_test = pd.DataFrame({"A": np.random.permutation(['a', 'b', 'e'] * 20),  # unseen category
                               "B": np.random.permutation([1, 3] * 30),
                               "C": np.random.permutation([0.1, -0.1, 0.2, 0.2] * 15),
                               "D": np.random.permutation([True, False] * 30),
                               "E": pd.Categorical(pd.np.random.permutation(['z', 'y'] * 30),
                                                   ordered=True)})
        np.random.seed()  # reset seed
        cat_cols_actual = ["A", "B", "C", "D"]
        cat_cols_to_store = cat_cols_actual + ["E"]
        X[cat_cols_actual] = X[cat_cols_actual].astype('category')
        X_test[cat_cols_actual] = X_test[cat_cols_actual].astype('category')
        cat_values = [X[col].cat.categories.tolist() for col in cat_cols_to_store]
        gbm0 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y)
        pred0 = gbm0.predict(X_test, raw_score=True)
        pred_prob = gbm0.predict_proba(X_test)[:, 1]
        gbm1 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, pd.Series(y), categorical_feature=[0])
        pred1 = gbm1.predict(X_test, raw_score=True)
        gbm2 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=['A'])
        pred2 = gbm2.predict(X_test, raw_score=True)
        gbm3 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=['A', 'B', 'C', 'D'])
        pred3 = gbm3.predict(X_test, raw_score=True)
        gbm3.booster_.save_model('categorical.model')
        gbm4 = lgb.Booster(model_file='categorical.model')
        pred4 = gbm4.predict(X_test)
        gbm5 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=['A', 'B', 'C', 'D', 'E'])
        pred5 = gbm5.predict(X_test, raw_score=True)
        gbm6 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=[])
        pred6 = gbm6.predict(X_test, raw_score=True)
        self.assertRaises(AssertionError,
                          np.testing.assert_allclose,
                          pred0, pred1)
        self.assertRaises(AssertionError,
github slundberg / shap / tests / explainers / test_tree.py View on Github external
# note: this test used to fail with lightgbm 2.2.1 with error:
    # ValueError: zero-size array to reduction operation maximum which has no identity
    # on TreeExplainer when trying to compute max nodes:
    # max_nodes = np.max([len(t.values) for t in self.trees])
    # The test does not fail with latest lightgbm 2.2.3 however
    try:
        import lightgbm
    except:
        print("Skipping test_lightgbm_constant_multiclass!")
        return
    import shap

    # train lightgbm model
    X, Y = shap.datasets.iris()
    Y.fill(1)
    model = lightgbm.sklearn.LGBMClassifier(num_classes=3, objective="multiclass")
    model.fit(X, Y)

    # explain the model's predictions using SHAP values
    shap_values = shap.TreeExplainer(model).shap_values(X)
github closest-git / LiteMORT / tests / python_package_test / test_sklearn.py View on Github external
if isMORT:
            mort0 = LiteMORT(params).fit(X, y)
            pred0 = list(mort0.predict(X_test))
            mort1 = LiteMORT(params).fit(X, y, categorical_feature=[0])
            pred1 = list(mort1.predict(X_test))
            mort2 = LiteMORT(params).fit(X, y, categorical_feature=['A'])
            pred2 = list(mort2.predict(X_test))
            mort3 = LiteMORT(params).fit(X, y, categorical_feature=['A', 'B', 'C', 'D'])
            pred3 = list(mort3.predict(X_test))
        else:
            clf=lgb.sklearn.LGBMClassifier()
            gbm_ = clf.fit(X, y)
            gbm0 = lgb.sklearn.LGBMClassifier().fit(X, y)
            pred0 = list(gbm0.predict(X_test))
            gbm1 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=[0])
            pred1 = list(gbm1.predict(X_test))
            gbm2 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A'])
            pred2 = list(gbm2.predict(X_test))
            gbm3 = lgb.sklearn.LGBMClassifier().fit(X, y, categorical_feature=['A', 'B', 'C', 'D'])
            pred3 = list(gbm3.predict(X_test))
            gbm3.booster_.save_model('categorical.model')
            gbm4 = lgb.Booster(model_file='categorical.model')
            pred4 = list(gbm4.predict(X_test))
            pred_prob = list(gbm0.predict_proba(X_test)[:, 1])
            np.testing.assert_almost_equal(pred_prob, pred4)
            input("...")
        #np.testing.assert_almost_equal(pred0, pred1)
github Ashton-Sidhu / aethos / aethos / model_analysis / model_explanation.py View on Github external
elif learner == "kernel":
            if hasattr(self.model, "predict_proba"):
                func = self.model.predict_proba
            else:
                func = self.model.predict

            self.explainer = shap.KernelExplainer(func, self.x_train)
        else:
            raise ValueError(f"Learner: {learner} is not supported yet.")

        self.expected_value = self.explainer.expected_value
        self.shap_values = np.array(self.explainer.shap_values(self.x_test)).astype(
            float
        )

        if isinstance(self.model, lgb.sklearn.LGBMClassifier) and isinstance(
            self.expected_value, np.float
        ):
            self.shap_values = self.shap_values[1]

        # Calculate misclassified values
        self.misclassified_values = self._calculate_misclassified()

        # As per SHAP guidelines, test data needs to be dense for plotting functions
        self.x_test_array = self.x_test.values
github gilad-rubin / hypster / hypster / estimators / classification / lightgbm.py View on Github external
def create_model(self):
        #TODO: if learning rates are identical throughout - create a regular Classifier
        if "is_unbalance" in self.model_params:
            is_unbalance = self.model_params.pop("is_unbalance")
            self.model_params["class_weight"] = "balanced" if is_unbalance else None

        self.model_params['n_estimators'] = self.best_n_iterations
        self.model_params["learning_rate"] = self.learning_rates[0] #TODO change
        
        final_model = LGBMClassifier(**self.model_params)
        return final_model