from scipy import interpolate
import scipy as sp
import sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter("ignore")
Heart deasease
Vowel
Both from ESLII https://hastie.su.domains/ElemStatLearn/
data_heart=pd.read_csv('data/heart_deasease.csv')
X_heart=data_heart.copy()
X_heart.drop(['chd','row.names','famhist', 'obesity'],axis=1,inplace=True)
y_heart=data_heart.chd
data_vowel=pd.read_csv('data/vowel.csv')
X_vowel=data_vowel.copy()
X_vowel.drop(['row.names','y'],axis=1,inplace=True)
y_vowel=data_vowel.y
from sklearn.linear_model import LogisticRegression
0.6666666666666666
lr=LogisticRegression(penalty='none')
columns=['sbp']
X=X_heart[columns]
y=y_heart
X_predict=X_heart[columns]
lr.fit(X,y)
plt.scatter(X_heart[columns],lr.predict_proba(X_predict)[:,1])
plt.title('P(chd | {})'.format(columns))
plt.ylabel('Predicted Probability')
plt.xlabel(columns)
lr.score(X_predict, y)
0.6666666666666666
X=X_heart
y=y_heart
X_predict=X_heart
lr.fit(X,y)
lr.predict_proba(X_predict)
lr.score(X_predict, y)
0.7164502164502164
from sklearn.inspection import PartialDependenceDisplay
features = [0, 2, (0, 2)]
PartialDependenceDisplay.from_estimator(lr, X, features)
<sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay at 0x2539e29d3f0>
lr_full=LogisticRegression(penalty='l2')
lr_full.fit(X,y)
coef=pd.Series(lr_full.coef_[0],index=lr_full.feature_names_in_)
coef
sbp 0.005275 tobacco 0.073312 ldl 0.190721 adiposity -0.011366 typea 0.038391 alcohol 0.001693 age 0.055438 dtype: float64
from sklearn import preprocessing
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
scaler = preprocessing.StandardScaler().fit(X_vowel)
X_scaled = scaler.transform(X_vowel)
lr_vowel=LogisticRegression(penalty='l2')
lr_vowel.fit(X_scaled, y_vowel)
pipe = make_pipeline(StandardScaler(), LogisticRegression())
pipe.fit(X_vowel, y_vowel);
from mlxtend.plotting import plot_decision_regions
fill=dict(pd.Series(X_scaled.mean(axis=0)[2:],index=range(2,10)))
plot_decision_regions(X_scaled, y_vowel.values,
clf=lr_vowel, filler_feature_values=fill,legend=2)
<AxesSubplot:>
X_scaled.mean(axis=0)[2:]
array([ 0.00000000e+00, 5.38289951e-17, -2.69144976e-17, 2.69144976e-17, -1.34572488e-17, -8.07434927e-17, 0.00000000e+00, -2.69144976e-17])
from sklearn.neighbors import KNeighborsClassifier
from IPython.display import display, HTML
clf = KNeighborsClassifier(n_neighbors=15)
clf.fit(X_vowel, y_vowel) ################### fit
plot_decision_regions(X_scaled, y_vowel.values,
clf=clf, filler_feature_values=fill,legend=2)
plt.title('Train data Knn with K={}'.format(2))
Text(0.5, 1.0, 'Train data Knn with K=2')
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
#sklearn.lda.LDA(solver='svd', shrinkage=None, priors=None, n_components=None, store_covariance=False, tol=0.0001)
lda=LinearDiscriminantAnalysis()
lda.fit(X_vowel, y_vowel)
plot_decision_regions(X_scaled, y_vowel.values,
clf=lda, filler_feature_values=fill,legend=2)
plt.title('Linear Discriminant Analysis');
def f(x):
x_points = [ 0, 1, 2, 3, 4, 5]
y_points = [12,14,22,39,27,15]
tck = sp.interpolate.make_interp_spline(x_points, y_points,k=3)
return interpolate.splev(x, tck)
print(f(1.25))
14.718750000000004