How to use the statsmodels.api.datasets function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github statsmodels / statsmodels / examples / python / glm.py View on Github external
# ## GLM: Gamma for proportional count response
#
# ### Load data
#
#  In the example above, we printed the ``NOTE`` attribute to learn about
# the
#  Star98 dataset. statsmodels datasets ships with other useful
# information. For
#  example:

print(sm.datasets.scotland.DESCRLONG)

#  Load the data and add a constant to the exogenous variables:

data2 = sm.datasets.scotland.load()
data2.exog = sm.add_constant(data2.exog, prepend=False)
print(data2.exog[:5, :])
print(data2.endog[:5])

# ### Fit and summary

glm_gamma = sm.GLM(data2.endog, data2.exog, family=sm.families.Gamma())
glm_results = glm_gamma.fit()
print(glm_results.summary())

# ## GLM: Gaussian distribution with a noncanonical link
#
# ### Artificial data

nobs2 = 100
x = np.arange(nobs2)
github jseabold / statsmodels-tutorial / generic_mle.py View on Github external
# 3.0

# 

import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

# 

print sm.datasets.spector.NOTE

# 

data = sm.datasets.spector.load_pandas()
exog = sm.add_constant(data.exog, prepend=True)
endog = data.endog

# 

sm_probit = sm.Probit(endog, exog).fit()

# 

# * To create your own Likelihood Model, you just need to overwrite the loglike method.

# 

class MyProbit(GenericLikelihoodModel):
    def loglike(self, params):
        exog = self.exog
github statsmodels / statsmodels / examples / python / statespace_varmax.py View on Github external
#
# This is a brief introduction notebook to VARMAX models in statsmodels.
# The VARMAX model is generically specified as:
# $$
# y_t = \nu + A_1 y_{t-1} + \dots + A_p y_{t-p} + B x_t + \epsilon_t +
# M_1 \epsilon_{t-1} + \dots M_q \epsilon_{t-q}
# $$
#
# where $y_t$ is a $\text{k_endog} \times 1$ vector.

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

dta = sm.datasets.webuse('lutkepohl2', 'https://www.stata-press.com/data/r12/')
dta.index = dta.qtr
endog = dta.loc['1960-04-01':'1978-10-01',
                ['dln_inv', 'dln_inc', 'dln_consump']]

# ## Model specification
#
# The `VARMAX` class in statsmodels allows estimation of VAR, VMA, and
# VARMA models (through the `order` argument), optionally with a constant
# term (via the `trend` argument). Exogenous regressors may also be included
# (as usual in statsmodels, by the `exog` argument), and in this way a time
# trend may be added. Finally, the class allows measurement error (via the
# `measurement_error` argument) and allows specifying either a diagonal or
# unstructured innovation covariance matrix (via the `error_cov_type`
# argument).

# ## Example 1: VAR
github statsmodels / statsmodels / docs / source / plots / graphics_functional_hdrboxplot.py View on Github external
# coding: utf-8

#Load the El Nino dataset.  Consists of 60 years worth of Pacific Ocean sea
#surface temperature data.

import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
data = sm.datasets.elnino.load(as_pandas=False)

#Create a HDR functional boxplot. We see that the years 1982-83 and 1997-98 are
#outliers; these are the years where El Nino (a climate pattern
#characterized by warming up of the sea surface and higher air pressures)
#occurred with unusual intensity.

fig = plt.figure()
ax = fig.add_subplot(111)
fig, res = sm.graphics.hdrboxplot(data.raw_data[:, 1:],
                                  labels=data.raw_data[:, 0].astype(int),
                                  ax=ax)

ax.plot([0, 10], [25, 25])
ax.set_xlabel("Month of the year")
ax.set_ylabel("Sea surface temperature (C)")
ax.set_xticks(np.arange(13, step=3) - 1)
github nyoka-pmml / nyoka / examples / statsmodels / exponential_smoothing / stats_models / docs / source / plots / load_macrodata.py View on Github external
import statsmodels.api as sm
import pandas as pd
dta = sm.datasets.macrodata.load_pandas().data
dates = sm.tsa.datetools.dates_from_range('1959Q1', '2009Q3')
index = pd.DatetimeIndex(dates)
dta.set_index(index, inplace=True)
github jseabold / statsmodels-tutorial / discrete_choice.py View on Github external
# 

# A survey of women only was conducted in 1974 by *Redbook* asking about extramarital affairs.

# 

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import logit, probit, poisson, ols

# 

print sm.datasets.fair.SOURCE

# 

print sm.datasets.fair.NOTE

# 

dta = sm.datasets.fair.load_pandas().data

# 

dta['affair'] = (dta['affairs'] > 0).astype(float)
print dta.head(10)

# 
github statsmodels / statsmodels / examples / python / recursive_ls.py View on Github external
# the parameter vectors, and can be constructed using the formula interface.

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from pandas_datareader.data import DataReader

np.set_printoptions(suppress=True)

# ## Example 1: Copper
#
# We first consider parameter stability in the copper dataset (description
# below).

print(sm.datasets.copper.DESCRLONG)

dta = sm.datasets.copper.load_pandas().data
dta.index = pd.date_range('1951-01-01', '1975-01-01', freq='AS')
endog = dta['WORLDCONSUMPTION']

# To the regressors in the dataset, we add a column of ones for an
# intercept
exog = sm.add_constant(
    dta[['COPPERPRICE', 'INCOMEINDEX', 'ALUMPRICE', 'INVENTORYINDEX']])

# First, construct and fit the model, and print a summary. Although the
# `RLS` model computes the regression parameters recursively, so there are
# as many estimates as there are datapoints, the summary table only presents
# the regression parameters estimated on the entire sample; except for small
# effects from initialization of the recursions, these estimates are
# equivalent to OLS estimates.
github DUanalytics / pyAnalytics / 42-dataIE / data-pd_csv_excel.py View on Github external
#Python write to file
#csv
#excel
#googlesheets


import statsmodels.api as sm
iris = sm.datasets.get_rdataset(dataname='iris', package='datasets')
iris.data
iris.data.head()
df1 = iris.data
type(df1)
#https://vincentarelbundock.github.io/Rdatasets/datasets.html
mtcars = sm.datasets.get_rdataset(dataname='mtcars', package='datasets')
mtcars.data
mtcars.data.head()
df2 = mtcars.data
type(df2)

#
import pandas as pd
#check the cwd. file will get saved there
df2.to_excel('exceloutput.xlsx')
#save only when file is not opened
df1.to_excel('exceloutput.xlsx','iris')
df2.to_excel('exceloutput.xlsx', engine='xlsxwriter')
df1.to_excel("E:/pywork/pydata/exceloutput2.xlsx",'iris')
#see direction of / and check if folders exist
github blaze / blaze / samples / ooc-groupby.py View on Github external
def statsmodel_stream(stream):
    import statsmodels.api as sm
    data = getattr(sm.datasets, stream)
    f = open(data.PATH, 'rb')
    if stream == 'randhie':
        # For a description of this dataset, see:
        # http://statsmodels.sourceforge.net/devel/datasets/generated/randhie.html
        f.readline()   # read out the headers line
        dtypes = ('{mdvis: string, lncoins: float32, idp: int32,'
                  ' lpi:float32, fmde: float32, physlm: float32,'
                  ' disea: float32, hlthg: int32, hlthf: int32,'
                  ' hlthp: int32}')
    else:
        raise NotImplementedError(
            "Importing this dataset has not been implemented yet")

    sreader = csv.reader(f)
    dtype = ndt.type(dtypes)
    return sreader, dtype
github statsmodels / statsmodels / statsmodels / sandbox / tsa / examples / example_var.py View on Github external
"""
Look at some macro plots, then do some VARs and IRFs.
"""

import numpy as np
import statsmodels.api as sm
import scikits.timeseries as ts
import scikits.timeseries.lib.plotlib as tplt

data = sm.datasets.macrodata.load(as_pandas=False)
data = data.data


### Create Timeseries Representations of a few vars

dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1),
    end_date=ts.Date('Q', year=2009, quarter=3))

ts_data = data[['realgdp','realcons','cpi']].view(float).reshape(-1,3)
ts_data = np.column_stack((ts_data, (1 - data['unemp']/100) * data['pop']))
ts_series = ts.time_series(ts_data, dates)


fig = tplt.tsfigure()
fsp = fig.add_tsplot(221)
fsp.tsplot(ts_series[:,0],'-')