statsmodels require Python3.10.* (that is an older version)
from scipy import interpolate
import scipy as sp
import sklearn
import statsmodels as sm
from statsmodels.gam.api import GLMGam, BSplines
from statsmodels.gam.generalized_additive_model import LogitGam
from statsmodels.gam.tests.test_penalized import df_autos
from patsy import dmatrices
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data=pd.read_csv('data/heart_deasease.csv')
data.head(2)
X=data.copy()
X.drop(['chd','row.names','famhist', 'obesity'],axis=1,inplace=True)
y=data.chd
data_heart=pd.read_csv('data/heart_deasease.csv')
data_heart.drop(['row.names','famhist', 'obesity'],axis=1,inplace=True)
data_heart.columns
#'sbp', 'tobacco', 'ldl', 'adiposity', 'typea', 'alcohol', 'age'
x_spline = data_heart[['ldl','age', 'adiposity','typea']]
bs = BSplines(x_spline, df=[5,5,5,5], degree=[3, 3,3,3])
gam_bs = GLMGam.from_formula('chd ~ ldl + age + adiposity + typea', data=data_heart,
smoother=bs)#, alpha=alpha)
res_bs = gam_bs.fit()
#alpha = np.array((10000000.0, 10000000.0, 215.44346900318845, 46415.88833612782))
gam_bs_poiss = GLMGam.from_formula('chd ~ ldl + age + adiposity + typea', data=data_heart,
smoother=bs, #alpha=alpha,
family=sm.genmod.families.family.Poisson())
#statsmodels.gam.generalized_additive_model.LogitGam
res_bs_poiss = gam_bs_poiss.fit()
#print(res_bs_poiss.summary())
#gam_bs.select_penweight()[0]
#gam_bs.select_penweight_kfold()[0]
#res_bs_poiss.__dict__['_results'].__dict__['model'].__dict__.keys()
X
sbp | tobacco | ldl | adiposity | typea | alcohol | age | |
---|---|---|---|---|---|---|---|
0 | 160 | 12.00 | 5.73 | 23.11 | 49 | 97.20 | 52 |
1 | 144 | 0.01 | 4.41 | 28.61 | 55 | 2.06 | 63 |
2 | 118 | 0.08 | 3.48 | 32.28 | 52 | 3.81 | 46 |
3 | 170 | 7.50 | 6.41 | 38.03 | 51 | 24.26 | 58 |
4 | 134 | 13.60 | 3.50 | 27.78 | 60 | 57.34 | 49 |
... | ... | ... | ... | ... | ... | ... | ... |
457 | 214 | 0.40 | 5.98 | 31.72 | 64 | 0.00 | 58 |
458 | 182 | 4.20 | 4.41 | 32.10 | 52 | 18.72 | 52 |
459 | 108 | 3.00 | 1.59 | 15.23 | 40 | 26.64 | 55 |
460 | 118 | 5.40 | 11.61 | 30.79 | 64 | 23.97 | 40 |
461 | 132 | 0.00 | 4.82 | 33.41 | 62 | 0.00 | 46 |
462 rows × 7 columns
alpha = np.array((1.0, 1.0, 1.0))#, 215.4446900318845, 46415.88833612782))
#gam_bs = LogitGam.from_formula('chd ~ ldl + age + adiposity + typea', data=data_heart)#, smoother=bs,alpha=alpha)
y,X=dmatrices('chd ~ ldl + age + adiposity', data=data_heart,return_type='dataframe')
x_spline2 = data_heart[['ldl','age','adiposity']]
bs2 = BSplines(x_spline2, df=[8,8,8], degree=[3, 3,3])
log_m=LogitGam(y, smoother=bs2,alpha=alpha)
#res_lm=log_m.fit()
fit=log_m.fit_regularized()
Optimization terminated successfully (Exit mode 0) Current function value: 0.5426514025933599 Iterations: 173 Function evaluations: 173 Gradient evaluations: 173
#fig, ax_lst = plt.subplots(2, 3) # 3 rows, 1 column; numbered row-wise
for i in range(3):
res_bs.plot_partial(i, cpr=True)
alp=gam_bs_poiss.select_penweight_kfold(alphas=None, cv_iterator=None, cost=None, k_folds=5, k_grid=4)
alp
((46415.88833612782, 10000000.0, 215.44346900318845, 46415.88833612782), <statsmodels.gam.gam_cross_validation.gam_cross_validation.MultivariateGAMCVPath at 0x19ec662f910>)