file <- "C:/Users/krizp/Documents/vyuka MFF/NMFM402 - Nezivotni pojisteni 2/Cviceni/CV5/reseni/data_claimSeverity.csv" ##change the path### #read data: ClaimData <- read.csv(file , header = TRUE, sep = ";") #declare columns as factors, will not be treated as numerical variables in GLM: ClaimData <- within(ClaimData , { CarType <- factor(CarType ) DriverAge<- factor(DriverAge) }) #fitting the models: model.GammaLog <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = Gamma("log")) model.NormalLog <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = gaussian("log")) model.InverseGaussianLog <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = inverse.gaussian("log")) #compare AIC of the models AIC(model.GammaLog) AIC(model.NormalLog) AIC(model.InverseGaussianLog) #backwards stepwise selection based on minimiztion of AIC for the chosen model backwards <- step(model.InverseGaussianLog) # test if we can drop CarType by assessing the model differences with F test model.reduced <- update(model.InverseGaussianLog, . ~ . - CarType) anova(model.reduced, model.InverseGaussianLog, test = "F") # test if we can drop DriverAge by assessing the model differences with F test model.reduced <- update(model.InverseGaussianLog, . ~ . - DriverAge) anova(model.reduced, model.InverseGaussianLog, test = "F")