Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
n_est=100, batch_size=1024, retrain=True):
model_name = os.path.splitext(os.path.splitext(os.path.basename(predict_test_file))[0])[0]
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG,
filename='{}.log'.format(model_name))
logging.info('Loading training and test data...')
X, y = load_data(train_file)
X_tst, _ = load_data(test_file)
dims = X.shape[1]
logging.info('{} dims'.format(dims))
logging.info('Loading CV Ids')
cv = StratifiedKFold(n_splits=N_FOLD, shuffle=True, random_state=SEED)
p = np.zeros_like(y)
p_tst = np.zeros((X_tst.shape[0],))
for i, (i_trn, i_val) in enumerate(cv.split(X, y), 1):
logging.info('Training model #{}'.format(i))
clf = nn_model(dims)
clf.fit_generator(generator=batch_generator(X[i_trn],
y[i_trn],
batch_size,
def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
n_est=100, batch_size=1024, retrain=True):
model_name = os.path.splitext(os.path.splitext(os.path.basename(predict_test_file))[0])[0]
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG,
filename='{}.log'.format(model_name))
logging.info('Loading training and test data...')
X, y = load_data(train_file)
X_tst, _ = load_data(test_file)
dims = X.shape[1]
logging.info('{} dims'.format(dims))
logging.info('Loading CV Ids')
cv = StratifiedKFold(n_splits=N_FOLD, shuffle=True, random_state=SEED)
p = np.zeros_like(y)
p_tst = np.zeros((X_tst.shape[0],))
for i, (i_trn, i_val) in enumerate(cv.split(X, y), 1):
logging.info('Training model #{}'.format(i))
clf = nn_model(dims)
clf.fit_generator(generator=batch_generator(X[i_trn],
y[i_trn],
batch_size,
True),
def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
n_est=100, n_leaf=200, lrate=.1, n_min=8, subcol=.3, subrow=.8,
subrow_freq=100, n_stop=100, retrain=True):
model_name = os.path.splitext(os.path.splitext(os.path.basename(predict_test_file))[0])[0]
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG,
filename='{}.log'.format(model_name))
logging.info('Loading training and test data...')
X, y = load_data(train_file)
X_tst, _ = load_data(test_file)
logging.info('Loading CV Ids')
cv = StratifiedKFold(n_splits=N_FOLD, shuffle=True, random_state=SEED)
params = {'random_state': SEED,
'n_jobs': -1,
'objective': 'binary',
'boosting': 'gbdt',
'learning_rate': lrate,
'num_leaves': n_leaf,
'feature_fraction': subcol,
'bagging_fraction': subrow,
'bagging_freq': subrow_freq,
'verbosity': -1,
'min_child_samples': n_min,
params = {'objective': "reg:linear",
'max_depth': depth,
'eta': lrate,
'subsample': subrow,
'colsample_bytree': subcol,
'colsample_bylevel': sublev,
'min_child_weight': weight,
'silent': 1,
'nthread': 10,
'seed': SEED}
logging.info('Loading training and test data...')
X, y = load_data(train_file)
y = np.log(y + offset)
X_tst, _ = load_data(test_file)
xgtst = xgb.DMatrix(X_tst)
logging.info('Loading CV Ids')
cv = KFold(len(y), n_folds=n_fold, shuffle=True, random_state=SEED)
p_val = np.zeros(X.shape[0])
p_tst = np.zeros(X_tst.shape[0])
for i, (i_trn, i_val) in enumerate(cv, 1):
xgtrn = xgb.DMatrix(X[i_trn], label=y[i_trn])
xgval = xgb.DMatrix(X[i_val], label=y[i_val])
logging.info('Training model #{}'.format(i))
watchlist = [(xgtrn, 'train'), (xgval, 'val')]
if i == 1:
logging.info('Training with early stopping')
filename='{}.log'.format(model_name))
# set xgb parameters
params = {'objective': "reg:linear",
'max_depth': depth,
'eta': lrate,
'subsample': subrow,
'colsample_bytree': subcol,
'colsample_bylevel': sublev,
'min_child_weight': weight,
'silent': 1,
'nthread': 10,
'seed': SEED}
logging.info('Loading training and test data...')
X, y = load_data(train_file)
y = np.log(y + offset)
X_tst, _ = load_data(test_file)
xgtst = xgb.DMatrix(X_tst)
logging.info('Loading CV Ids')
cv = KFold(len(y), n_folds=n_fold, shuffle=True, random_state=SEED)
p_val = np.zeros(X.shape[0])
p_tst = np.zeros(X_tst.shape[0])
for i, (i_trn, i_val) in enumerate(cv, 1):
xgtrn = xgb.DMatrix(X[i_trn], label=y[i_trn])
xgval = xgb.DMatrix(X[i_val], label=y[i_val])
logging.info('Training model #{}'.format(i))
watchlist = [(xgtrn, 'train'), (xgval, 'val')]
def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
n_est=100, n_leaf=200, lrate=.1, n_min=8, subcol=.3, subrow=.8,
subrow_freq=100, n_stop=100, retrain=True):
model_name = os.path.splitext(os.path.splitext(os.path.basename(predict_test_file))[0])[0]
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG,
filename='{}.log'.format(model_name))
logging.info('Loading training and test data...')
X, y = load_data(train_file)
X_tst, _ = load_data(test_file)
logging.info('Loading CV Ids')
cv = StratifiedKFold(n_splits=N_FOLD, shuffle=True, random_state=SEED)
params = {'random_state': SEED,
'n_jobs': -1,
'objective': 'binary',
'boosting': 'gbdt',
'learning_rate': lrate,
'num_leaves': n_leaf,
'feature_fraction': subcol,
'bagging_fraction': subrow,
'bagging_freq': subrow_freq,
'verbosity': -1,
'min_child_samples': n_min,
'metric': 'auc'}