How to use the statsmodels.api function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github statsmodels / statsmodels / examples / python / glm_weights.py View on Github external
res_o.pearson_chi2 / res_o.df_resid

# ### condensed data (unique observations with frequencies)
#
# Combining identical observations and using frequency weights to take
# into account the multiplicity of observations produces exactly the same
# results. Some results attribute will differ when we want to have
# information about the observation and not about the aggregate of all
# identical observations. For example, residuals do not take
# ``freq_weights`` into account.

glm = smf.glm(
    'affairs ~ rate_marriage + age + yrs_married',
    data=dc,
    family=sm.families.Poisson(),
    freq_weights=np.asarray(dc['freq']))
res_f = glm.fit()
print(res_f.summary())

res_f.pearson_chi2 / res_f.df_resid

# ### condensed using ``var_weights`` instead of ``freq_weights``
#
# Next, we compare ``var_weights`` to ``freq_weights``. It is a common
# practice to incorporate ``var_weights`` when the endogenous variable
# reflects averages and not identical observations.
# I do not see a theoretical reason why it produces the same results (in
# general).
#
# This produces the same results but ``df_resid``  differs the
# ``freq_weights`` example because ``var_weights`` do not change the number
github photon-team / photon / DemoFiles / BAGDR / param_wise_bruteForce_backup.py View on Github external
covs = data_dict['covs']

    # create PHOTON hyperpipe
    my_pipe, metrics = setup_model()

    # shuffle targets if running a permutation test
    if perm_test == True:
        print('PERMUTATION TEST: SHUFFLING TARGETS NOW!')
        np.random.shuffle(targets)

    # remove confounders from target data (age, gender, site, ICV)
    if covs_out:
        import statsmodels.api as sm
        ols_X = covs
        ols_X = sm.add_constant(ols_X)
        ols_model = sm.OLS(targets, ols_X)
        ols_results = ols_model.fit()
        targets = np.asarray(ols_results.resid)
        print('Removing covariates from targets.')

    # fit PHOTON model
    results = my_pipe.fit(data, targets)
    results_tree = results.result_tree

    # get feature importance
    if getImp:
        importance_scores = get_feature_importance(results=results, feature_names=snp_names, data=data, targets=targets, roiName=roiName)
    else:
        importance_scores = []

    # TEST SET -> Test
    #best_config_performance_test = results_tree.get_best_config_performance_validation_set(outer_cv_fold=1)     # when outer fold is active
github dmnfarrell / pandastable / pandastable / stats.py View on Github external
s = self.table.multiplerowlist
        if len(s) == 0:
            sub = data.index
        else:
            sub = data.index[s]
        self.sub = sub        
        y,X = dmatrices(formula, data=data, return_type='dataframe')
        self.X = X
        self.y = y
        Xf = X.ix[sub]
        yf = y.ix[sub]

        if est == 'ols':
            #model = smf.ols(formula=formula, data=s)
            model = sm.OLS(yf, Xf)
        elif est == 'gls':
            model = sm.GLS(y, X)
        elif est == 'logit':
            model = sm.Logit(y, X)
        return model
github statsmodels / statsmodels / examples / python / wls.py View on Github external
# ## WLS Estimation
# 
# ### Artificial data: Heteroscedasticity 2 groups 
# 
# Model assumptions:
# 
#  * Misspecification: true model is quadratic, estimate only linear
#  * Independent noise/error term
#  * Two groups for error variance, low and high variance groups

nsample = 50
x = np.linspace(0, 20, nsample)
X = np.column_stack((x, (x - 5)**2))
X = sm.add_constant(X)
beta = [5., 0.5, -0.01]
sig = 0.5
w = np.ones(nsample)
w[nsample * 6/10:] = 3
y_true = np.dot(X, beta)
e = np.random.normal(size=nsample)
y = y_true + sig * w * e 
X = X[:,[0,1]]


# ### WLS knowing the true variance ratio of heteroscedasticity

mod_wls = sm.WLS(y, X, weights=1./w)
res_wls = mod_wls.fit()
print(res_wls.summary())
github enigmampc / catalyst / zipline / optimize / example.py View on Github external
@batch_transform
def ols_transform(data, spreads):
    p0 = data.price['PEP']
    p1 = sm.add_constant(data.price['KO'])
    beta, intercept = sm.OLS(p0, p1).fit().params

    spread = (data.price['PEP'] - (beta * data.price['KO'] + intercept))[-1]

    if len(spreads) > 10:
        z_score = (spread - np.mean(spreads[-10:])) / np.std(spreads[-10:])
    else:
        z_score = np.nan

    spreads.append(spread)

    return z_score
github Capnode / Algoloop / Algorithm.Python / PythonPackageTestAlgorithm.py View on Github external
def statsmodels_test():
    nsample = 100
    x = numpy.linspace(0, 10, 100)
    X = numpy.column_stack((x, x**2))
    beta = numpy.array([1, 0.1, 10])
    e = numpy.random.normal(size=nsample)

    X = sm.add_constant(X)
    y = numpy.dot(X, beta) + e

    model = sm.OLS(y, X)
    results = model.fit()
    print ("statsmodels tests >>>", results.summary())
github dupadhyaya / sipPython / cases2 / mtcars_case_blank1.py View on Github external
#Case Study on mtcars dataset in Python	download data

#Download data
import statsmodels.api as sm
#https://vincentarelbundock.github.io/Rdatasets/datasets.html
dataset_mtcars = sm.datasets.get_rdataset(dataname='mtcars', package='datasets')
dataset_mtcars.data.head()
mtcars = dataset_mtcars.data
#structure
github statsmodels / statsmodels / tools / validate_docstrings.py View on Github external
def examples_errors(self):
        flags = doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL
        finder = doctest.DocTestFinder()
        runner = doctest.DocTestRunner(optionflags=flags)
        context = {'np': numpy, 'pd': pandas, 'sm': statsmodels.api}
        error_msgs = ''
        for test in finder.find(self.raw_doc, self.name, globs=context):
            f = StringIO()
            runner.run(test, out=f.write)
            error_msgs += f.getvalue()
        return error_msgs
github msproteomicstools / msproteomicstools / msproteomicstoolslib / math / Smoothing.py View on Github external
def _initialize(self, data1, data2):
        try:
            import statsmodels.api as sm
            lowess = sm.nonparametric.lowess
        except ImportError:
            print("===================================")
            print("Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'")
            print("===================================")

        # NOTE: delta parameter is only available from statsmodels > 0.5.0
        delta = (max(data1) - min(data1)) * 0.01
        frac = 0.1
        
        if len(data1) < 100:
            frac = 1.0

        k = 0
        while k <= 10:
            k += 1
            # Input data is y/x -> needs switch