How to use the statsmodels.api.add_constant function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Shen-Lab / DeepAffinity / baseline_models / RNN_features / baseline_ridge / baseline.py View on Github external
print(results.summary())


print("kinase error:")
y_pred_kinase = clf.predict(feature_kinase)
print(mean_squared_error(label_kinase,y_pred_kinase))

results = sm.OLS(y_pred_kinase,sm.add_constant(label_kinase)).fit()
print(results.summary())


print("GPCR error:")
y_pred_GPCR = clf.predict(feature_GPCR)
print(mean_squared_error(label_GPCR,y_pred_GPCR))

results = sm.OLS(y_pred_GPCR,sm.add_constant(label_GPCR)).fit()
print(results.summary())

print("channel error:")
y_pred_channel = clf.predict(feature_channel)
print(mean_squared_error(label_channel,y_pred_channel))

results = sm.OLS(y_pred_channel,sm.add_constant(label_channel)).fit()
print(results.summary())


#########  Saving model
ridge_pkl_filename = 'ridge_20182101.pkl'
ridge_model_pkl = open(ridge_pkl_filename, 'wb')
pickle.dump(clf,ridge_model_pkl)
ridge_model_pkl.close()
github Shen-Lab / DeepAffinity / Joint_models / marginalized_attention / joint_warm_start / joint-Model.py View on Github external
y_pred = model.predict([channel_protein[0:size],channel_compound[0:size]])
        elif i < num_bins-1:
          temp = model.predict([channel_protein[(i*size):((i+1)*size)],channel_compound[(i*size):((i+1)*size)]])
          y_pred = np.concatenate((y_pred,temp), axis=0)
        else:
          temp = model.predict([channel_protein[length_channel-size:length_channel],channel_compound[length_channel-size:length_channel]])
          y_pred = np.concatenate((y_pred,temp[size-length_channel+(i*size):size]), axis=0)

er=0
for i in range(length_channel):
  er += (y_pred[i]-channel_IC50[i])**2

mse = er/length_channel
print(mse)

results = sm.OLS(y_pred,sm.add_constant(channel_IC50)).fit()
print(results.summary())



print("error on train")
size = 64
length_train = len(train_protein)
print(length_train)
num_bins = math.ceil(length_train/size)
for i in range(num_bins):
        if i==0:
          y_pred = model.predict([train_protein[0:size],train_compound[0:size]])
        elif i < num_bins-1:
          temp = model.predict([train_protein[(i*size):((i+1)*size)],train_compound[(i*size):((i+1)*size)]])
          y_pred = np.concatenate((y_pred,temp), axis=0)
        else:
github Shen-Lab / DeepAffinity / baseline_models / RNN_features / baseline_ridge / baseline.py View on Github external
results = sm.OLS(y_pred_train,sm.add_constant(label_train)).fit()
print(results.summary())

print("test error:")
y_pred_test = clf.predict(feature_test)
print(mean_squared_error(label_test,y_pred_test))

results = sm.OLS(y_pred_test,sm.add_constant(label_test)).fit()
print(results.summary())

print("ER error:")
y_pred_ER = clf.predict(feature_ER)
print(mean_squared_error(label_ER,y_pred_ER))

results = sm.OLS(y_pred_ER,sm.add_constant(label_ER)).fit()
print(results.summary())


print("kinase error:")
y_pred_kinase = clf.predict(feature_kinase)
print(mean_squared_error(label_kinase,y_pred_kinase))

results = sm.OLS(y_pred_kinase,sm.add_constant(label_kinase)).fit()
print(results.summary())


print("GPCR error:")
y_pred_GPCR = clf.predict(feature_GPCR)
print(mean_squared_error(label_GPCR,y_pred_GPCR))

results = sm.OLS(y_pred_GPCR,sm.add_constant(label_GPCR)).fit()
github cutright / DVH-Analytics-Bokeh / dvh / modules / main / regression.py View on Github external
self.update_residual_y_axis_label()

            included_vars = [key for key in list(self.correlation.data['1']) if self.multi_var_reg_vars[key]]
            included_vars.sort()

            for n in GROUP_LABELS:
                if self.time_series.current_dvh_group[n]:
                    x = []
                    x_count = len(self.correlation.data[n][list(self.correlation.data[n])[0]]['data'])
                    for i in range(x_count):
                        current_x = []
                        for k in included_vars:
                            current_x.append(self.correlation.data[n][k]['data'][i])
                        x.append(current_x)
                    x = sm.add_constant(x)  # explicitly add constant to calculate intercept
                    y = self.correlation.data[n][self.y.value]['data']

                    fit = sm.OLS(y, x).fit()

                    coeff = fit.params
                    coeff_p = fit.pvalues
                    r_sq = fit.rsquared
                    model_p = fit.f_pvalue

                    coeff_str = ["%0.3E" % i for i in coeff]
                    coeff_p_str = ["%0.3f" % i for i in coeff_p]
                    r_sq_str = ["%0.3f" % r_sq]
                    model_p_str = ["%0.3f" % model_p]

                    getattr(self.sources, 'multi_var_coeff_results_%s' % n).data = {'var_name': ['Constant'] + included_vars,
                                                                                    'coeff': coeff.tolist(),
github williamleif / socialsent / socialsent / historical / explore_polarities.py View on Github external
def trend_estimate(y):
    X = np.arange(len(y))
    X = sm.add_constant(X)
    mod = sm.OLS(y, X)
    res = mod.fit()
    return res
github MacroConnections / DIVE-backend / dive / worker / statistics / fit.py View on Github external
def reg_m(y, x, estimator, weights=None):
    ones = np.ones(len(x[0]))
    X = sm.add_constant(np.column_stack((x[0], ones)))
    for ele in x[1:]:
        X = sm.add_constant(np.column_stack((ele, X)))

    if estimator=='ols':
        return sm.OLS(y, X).fit()

    elif estimator=='wls':
        return sm.WLS(y, X, weights).fit()

    elif estimator=='gls':
        return sm.GLS(y, X).fit()

    return None
github statsmodels / statsmodels / examples / incomplete / glsar.py View on Github external
model0if = GLSAR(Y, X, 2)
    res = model0if.iterative_fit(6)
    print('iterativefit beta', res.params)
    results.tvalues   # XXX is this correct? it does equal params/bse
    # but isn't the same as the AR example (which was wrong in the first place..)
    print(results.t_test([0, 1]))  # are sd and t correct? vs
    print(results.f_test(np.eye(2)))


rhotrue = np.array([0.5, 0.2])
nlags = np.size(rhotrue)
beta = np.array([0.1, 2])
noiseratio = 0.5
nsample = 2000
x = np.arange(nsample)
X1 = sm.add_constant(x, prepend=False)

wnoise = noiseratio * np.random.randn(nsample + nlags)
#.. noise = noise[1:] + rhotrue*noise[:-1] # wrong this is not AR

#.. find my drafts for univariate ARMA functions
# generate AR(p)
if np.size(rhotrue) == 1:
    # replace with scipy.signal.lfilter, keep for testing
    arnoise = np.zeros(nsample + 1)
    for i in range(1, nsample + 1):
        arnoise[i] = rhotrue * arnoise[i - 1] + wnoise[i]
    noise = arnoise[1:]
    an = signal.lfilter([1], np.hstack((1, -rhotrue)), wnoise[1:])
    print('simulate AR(1) difference', np.max(np.abs(noise - an)))
else:
    noise = signal.lfilter([1], np.hstack((1, -rhotrue)), wnoise)[nlags:]
github dssg / bikeshare / data / ipython_explore / GLM_Model.py View on Github external
bikes_slots_available=np.asarray(zip(bikes_available,slots_available))

# Creating Lags of Bike and Slot Variables
bikes_available_lag0 = bikes_available[1:]
bikes_available_lag1 = bikes_available[0:len(bikes_available)-1]
slots_available_lag1 = slots_available[0:len(slots_available)-1]
bikes_slots_available = bikes_slots_available[1:]

# Calculated the lag-log-odds ratio 
phat_lag1 = (bikes_available_lag1) / (bikes_available_lag1+slots_available_lag1)

logodds_lag1 = np.log( phat_lag1 / (1-phat_lag1) )


# Add Constant to Exogenous Variables
logodds_lag1 = sm.add_constant(logodds_lag1, prepend=False)


# Fit Binomial Regression.  Coefficients constant in time

glm_binom = sm.GLM(bikes_slots_available, logodds_lag1, family=sm.families.Binomial())

res = glm_binom.fit()

print res.summary()

# 

# This model includes month effects and one previous time point
##### previous time point is very significant predictor; months are not

import datetime
github statsmodels / statsmodels / statsmodels / examples / ex_sandwich2.py View on Github external
urlretrieve('http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta', 'srs.dta')
    print('downloading file')
    srs = dta.genfromdta("srs.dta")
#    from statsmodels.datasets import webuse
#    srs = webuse('srs', 'http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/')
#    #does currently not cache file

y = srs['api00']
#older numpy don't reorder
#x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1)
#force sequence
x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']])
group = srs['dnum']

#xx = sm.add_constant(x, prepend=True)
xx = sm.add_constant(x, prepend=False) #const at end for Stata compatibility

#remove nan observation
mask = (xx!=-999.0).all(1)   #nan code in dta file
mask.shape
y = y[mask]
xx = xx[mask]
group = group[mask]

#run OLS

res_srs = sm.OLS(y, xx).fit()
print('params    ', res_srs.params)
print('bse_OLS   ', res_srs.bse)

#get cluster robust standard errors and compare with STATA
github trislett / TFCE_mediation / tfce_mediation / misc_scripts / tm_massunivariatemodels.py View on Github external
if equal_lengths(len_list): # nested blocks (add specified models!)
				mixed_blocks = np.random.permutation(np.random.permutation(len_list))
				rotate_groups = []
				for m in mixed_blocks:
					rotate_groups.append(index_groups[uniq_groups == unique_blocks[m]])
				index_groups = np.array(rotate_groups).flatten()
		else:
			index_groups = np.random.permutation(list(range(n)))

		if medtype == 'I':
			EXOG_A = sm.add_constant(np.column_stack((leftvar, strip_ones(exog_vars))))

			EXOG_A = EXOG_A[index_groups]

			EXOG_B = np.column_stack((leftvar, rightvar))
			EXOG_B = sm.add_constant(np.column_stack((EXOG_B, strip_ones(exog_vars))))

			#pathA
			t_valuesA = full_glm_results(endog_arr, EXOG_A, only_tvals=True)[1,:]
			#pathB
			t_valuesB = full_glm_results(endog_arr, EXOG_B, only_tvals=True)[1,:]

		elif medtype == 'M':
			EXOG_A = sm.add_constant(np.column_stack((leftvar, strip_ones(exog_vars))))

			EXOG_A = EXOG_A[index_groups]

			EXOG_B = np.column_stack((rightvar, leftvar))
			EXOG_B = sm.add_constant(np.column_stack((EXOG_B, strip_ones(exog_vars))))

			#pathA
			t_valuesA = full_glm_results(endog_arr, EXOG_A, only_tvals=True)[1,:]