How to use the lightgbm.Booster function in lightgbm

To help you get started, we’ve selected a few lightgbm examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / LightGBM / tests / python_package_test / test_basic.py View on Github external
def test_cegb_affects_behavior(self):
        X = np.random.random((100, 5))
        X[:, [1, 3]] = 0
        y = np.random.random(100)
        names = ['col_%d' % i for i in range(5)]
        ds = lgb.Dataset(X, feature_name=names).construct()
        ds.set_label(y)
        base = lgb.Booster(train_set=ds)
        for k in range(10):
            base.update()
        with tempfile.NamedTemporaryFile() as f:
            basename = f.name
        base.save_model(basename)
        with open(basename, 'rt') as f:
            basetxt = f.read()
        # Set extremely harsh penalties, so CEGB will block most splits.
        cases = [{'cegb_penalty_feature_coupled': [50, 100, 10, 25, 30]},
                 {'cegb_penalty_feature_lazy': [1, 2, 3, 4, 5]},
                 {'cegb_penalty_split': 1}]
        for case in cases:
            booster = lgb.Booster(train_set=ds, params=case)
            for k in range(10):
                booster.update()
            with tempfile.NamedTemporaryFile() as f:
github microsoft / LightGBM / tests / python_package_test / test_basic.py View on Github external
self.assertEqual(bst.current_iteration(), 20)
        self.assertEqual(bst.num_trees(), 20)
        self.assertEqual(bst.num_model_per_iteration(), 1)

        bst.save_model("model.txt")
        pred_from_matr = bst.predict(X_test)
        with tempfile.NamedTemporaryFile() as f:
            tname = f.name
        with open(tname, "w+b") as f:
            dump_svmlight_file(X_test, y_test, f)
        pred_from_file = bst.predict(tname)
        os.remove(tname)
        np.testing.assert_allclose(pred_from_matr, pred_from_file)

        # check saved model persistence
        bst = lgb.Booster(params, model_file="model.txt")
        os.remove("model.txt")
        pred_from_model_file = bst.predict(X_test)
        # we need to check the consistency of model file here, so test for exact equal
        np.testing.assert_array_equal(pred_from_matr, pred_from_model_file)

        # check early stopping is working. Make it stop very early, so the scores should be very close to zero
        pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5}
        pred_early_stopping = bst.predict(X_test, **pred_parameter)
        # scores likely to be different, but prediction should still be the same
        np.testing.assert_array_equal(np.sign(pred_from_matr), np.sign(pred_early_stopping))

        # test that shape is checked during prediction
        bad_X_test = X_test[:, 1:]
        bad_shape_error_msg = "The number of features in data*"
        np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
                                       bst.predict, bad_X_test)
github microsoft / LightGBM / tests / python_package_test / test_basic.py View on Github external
def test_add_features_same_booster_behaviour(self):
        self.maxDiff = None
        X = np.random.random((100, 5))
        X[:, [1, 3]] = 0
        names = ['col_%d' % i for i in range(5)]
        for j in range(1, 5):
            d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct()
            d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
            d1.add_features_from(d2)
            d = lgb.Dataset(X, feature_name=names).construct()
            y = np.random.random(100)
            d1.set_label(y)
            d.set_label(y)
            b1 = lgb.Booster(train_set=d1)
            b = lgb.Booster(train_set=d)
            for k in range(10):
                b.update()
                b1.update()
            with tempfile.NamedTemporaryFile() as df:
                dname = df.name
            with tempfile.NamedTemporaryFile() as d1f:
                d1name = d1f.name
            b1.save_model(d1name)
            b.save_model(dname)
            with open(dname, 'rt') as df:
                dtxt = df.read()
            with open(d1name, 'rt') as d1f:
                d1txt = d1f.read()
            self.assertEqual(dtxt, d1txt)
github microsoft / LightGBM / tests / python_package_test / test_basic.py View on Github external
ds = lgb.Dataset(X, feature_name=names).construct()
        ds.set_label(y)
        base = lgb.Booster(train_set=ds)
        for k in range(10):
            base.update()
        with tempfile.NamedTemporaryFile() as f:
            basename = f.name
        base.save_model(basename)
        with open(basename, 'rt') as f:
            basetxt = f.read()
        # Set extremely harsh penalties, so CEGB will block most splits.
        cases = [{'cegb_penalty_feature_coupled': [50, 100, 10, 25, 30]},
                 {'cegb_penalty_feature_lazy': [1, 2, 3, 4, 5]},
                 {'cegb_penalty_split': 1}]
        for case in cases:
            booster = lgb.Booster(train_set=ds, params=case)
            for k in range(10):
                booster.update()
            with tempfile.NamedTemporaryFile() as f:
                casename = f.name
            booster.save_model(casename)
            with open(casename, 'rt') as f:
                casetxt = f.read()
            self.assertNotEqual(basetxt, casetxt)
github Koziev / chatbot / ruchatbot / trainers / lgb_relevancy.py View on Github external
vectorize_sample_x(X_data, 0, premise_shingles, question_shingles, xgb_relevancy_shingle2id)

        y_pred = lgb_relevancy.predict(X_data)
        print('{}\n\n'.format(y_pred[0]))

if run_mode == 'query2':
    # Ручная проверка модели на вводимых в консоли вопросах.
    # Список предпосылок читается из заданного файла.

    # Загружаем данные обученной модели.
    with open(os.path.join(tmp_folder, config_filename), 'r') as f:
        model_config = json.load(f)

    tokenizer = PhraseSplitter.create_splitter(model_config['lemmatize'])

    lgb_relevancy = lightgbm.Booster(model_file=model_config['model_filename'])

    xgb_relevancy_shingle2id = model_config['shingle2id']
    xgb_relevancy_shingle_len = model_config['shingle_len']
    xgb_relevancy_nb_features = model_config['nb_features']
    xgb_relevancy_lemmalize = model_config['lemmatize']

    premises = []

    prompt = ':> '
    added_phrases = set()
    if task in 'relevancy partial_relevancy'.split():
        # Поиск лучшей предпосылки, релевантной введенному вопросу
        prompt = 'question:> '

        if True:
            for fname in ['profile_facts_1.dat']:
github mljar / mljar-supervised / supervised / models / learner_lightgbm.py View on Github external
def load(self, json_desc):

        self.library_version = json_desc.get("library_version", self.library_version)
        self.algorithm_name = json_desc.get("algorithm_name", self.algorithm_name)
        self.algorithm_short_name = json_desc.get(
            "algorithm_short_name", self.algorithm_short_name
        )
        self.uid = json_desc.get("uid", self.uid)
        self.model_file = json_desc.get("model_file", self.model_file)
        self.model_file_path = json_desc.get("model_file_path", self.model_file_path)
        self.params = json_desc.get("params", self.params)

        log.debug("LightgbmLearner load model from %s" % self.model_file_path)
        self.model = lgb.Booster(model_file=self.model_file_path)
github electricbrainio / hypermax / research / atpe_research_1 / simulation.py View on Github external
def executeLightGBMModel(params, model=None):
    global lightGBMModel
    if model == 'textextraction':
        if lightGBMModel is None:
            lightGBMModel = lgb.Booster(model_file='LightGBM_model_text_extraction.txt')

        vectorKeys = [# They are in this order for a reason - thats what was in our training data file.
            'layer_0.max_depth',
            'layer_0.min_data_in_leaf',
            'layer_0.boosting_rounds',
            'layer_1.input_window',
            'layer_0.num_leaves',
            'layer_1.min_data_in_leaf',
            'layer_1.boosting_rounds',
            'layer_1.learning_rate',
            'layer_1.num_leaves',
            'layer_0.bagging_fraction',
            'layer_1.max_depth',
            'layer_0.learning_rate',
            'layer_0.input_window',
            'layer_0.feature_fraction']
github xadrianzetx / fullstack.ai / backend / models.py View on Github external
def _load_models(self):
        models = []
        path = '/assets/models'

        # localize model artifacts for all folds
        abs_path = pkg_resources.resource_filename('backend', path)
        artifacts = os.listdir(abs_path)

        if len(artifacts) != self._n_folds:
            raise ValueError('Number of model artifacts does not match n_folds')

        for artifact in artifacts:
            # load model from artifact
            arti_path = os.path.join(abs_path, artifact)
            clf = lgb.Booster(model_file=arti_path)
            models.append(clf)

        return models
github Koziev / chatbot / ruchatbot / bot / lgb_req_interpretation.py View on Github external
def load(self, models_folder):
        self.logger.info('Loading LGB_ReqInterpretation model files')

        with open(os.path.join(models_folder, 'lgb_req_interpretation.config'), 'r') as f:
            self.model_config = json.load(f)

        model_path = os.path.join(models_folder, os.path.basename(self.model_config['model_filename']))
        self.model = lightgbm.Booster(model_file=model_path)

        vectorizer_path = os.path.join(models_folder, os.path.basename(self.model_config['vectorizer_filename']))
        with open(vectorizer_path, 'rb') as f:
            self.vectorizer = pickle.load(f)

        self.no_expansion_phrases = set(self.model_config['no_expansion_phrases'])
github electricbrainio / hypermax / hypermax / algorithms / atpe_optimizer.py View on Github external
def __init__(self):
        scalingModelData = json.loads(pkg_resources.resource_string(__name__, "../atpe_models/scaling_model.json"))
        self.featureScalingModels = {}
        for key in self.atpeModelFeatureKeys:
            self.featureScalingModels[key] = sklearn.preprocessing.StandardScaler()
            self.featureScalingModels[key].scale_ = numpy.array(scalingModelData[key]['scales'])
            self.featureScalingModels[key].mean_ = numpy.array(scalingModelData[key]['means'])
            self.featureScalingModels[key].var_ = numpy.array(scalingModelData[key]['variances'])

        self.parameterModels = {}
        self.parameterModelConfigurations = {}
        for param in self.atpeParameters:
            modelData = pkg_resources.resource_string(__name__, "../atpe_models/model-" + param + '.txt')
            with hypermax.file_utils.ClosedNamedTempFile(modelData) as model_file_name:
                self.parameterModels[param] = lightgbm.Booster(model_file=model_file_name)

            configString = pkg_resources.resource_string(__name__, "../atpe_models/model-" + param + '-configuration.json')
            data = json.loads(configString)
            self.parameterModelConfigurations[param] = data

        self.lastATPEParameters = None
        self.lastLockedParameters = []
        self.atpeParamDetails = None