file <- "data_claimSeverity.csv" #read data: ClaimData <- read.csv(file , header = TRUE, sep = ";") #declare columns as factors, will not be treated as numerical variables in GLM: ClaimData <- within(ClaimData , { CarType <- factor(CarType ) DriverAge<- factor(DriverAge) }) #GLM with gamma distribution with log link = multiplicative tariff structure model.GammaLog <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = Gamma("log")) summary(model.GammaLog) #GLM with gamma distribution with inverse link = canonical link model.GammaInverse <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = Gamma("inverse")) #GLM with normal distribution with log link = multiplicative tariff structure model.NormalLog <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = gaussian("log")) #GLM with normal distribution with identity link = canonical link model.NormalIdnetity <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = gaussian("identity")) #GLM with inverse gaussian distribution with log link = multiplicative tariff structure model.InverseGaussianLog <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = inverse.gaussian("log")) #GLM with inverse gaussian distribution with 1/mu^2 link = canonical link model.InverseGaussianCanonical <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = inverse.gaussian("1/mu^2")) #drop last column to make data for prediction: newData <- ClaimData[-3] #make prediction newData$Y.GammaLog <- predict(model.GammaLog, newData, type = "response") newData$Y.GammaInverse <- predict(model.GammaInverse, newData, type = "response") newData$Y.NormalLog <- predict(model.NormalLog , newData, type = "response") newData$Y.NormalIdnetity <- predict(model.NormalIdnetity , newData, type = "response") newData$Y.InverseGaussianLog <- predict(model.InverseGaussianLog , newData, type = "response") newData$Y.InverseGaussianCanonical <- predict(model.InverseGaussianCanonical, newData, type = "response") #insert predictions from simple models from Practical1 newData$Y.BaileySimon <- c(2176,1751,1491,1493,2079,1674,1425,1427,2456,1977,1684,1686) newData$Y.MarginSums <- c(2170, 1749, 1490, 1490,2076, 1673, 1425, 1425, 2454, 1977, 1685, 1685) newData$Y.LogLinear <- c(2182, 1759, 1500, 1501, 2063, 1663, 1417, 1419,2444, 1970, 1680, 1682) newData plot(newData$Y.GammaLog, pch=15, col="blue", ylim=c(1200,2600), xlab="risk class", ylab="expected claim size") axis(side = 1, at = 1:12) points(newData$Y.GammaInverse, pch = 20, col = "blue") points(x= (1:12) + 0.1, y = newData$Y.NormalLog , pch = 15, col = "green") points(x= (1:12) + 0.1, y =newData$Y.NormalIdnetity, pch = 20, col = "green") points(x= (1:12) + 0.2, y = newData$Y.InverseGaussianLog , pch = 15, col = "red") points(x= (1:12) + 0.2, y =newData$Y.InverseGaussianCanonical , pch = 20, col = "red") points(x= (1:12) + 0.4, y =newData$Y.BaileySimon , pch = 0, col = "black") points(x= (1:12) + 0.4, y =newData$Y.MarginSums , pch = 1, col = "black") points(x= (1:12) + 0.4, y =newData$Y.LogLinear , pch = 2, col = "black") legend("topright", legend=c("GammaLog", "GammaInverse","NormalLog ","NormalIdnetity","InverseGaussianLog","InverseGaussianCanonical","BaileySimon ","MarginSums ","LogLinear"), col=c("blue", "blue","green","green","red","red","black","black","black"), pch=c(15,20,15,20,15,20,0,1,2)) #### Comments to results: #### Predictions made by different GLM models vary, so the choice of appropriate error distribution and link function is important. We can see #### less variability among models with log link function (coloured squares on the graph) compared to models with canonical links (coloured circles on the graph). #### This is because the models in the latter group differ not only by error function, but also by link function. Recall that link function postulate the type of #### dependence of the predictions on the linear predictors (determined by combination of individual risk factors). #### Predictions made by simple methods from the Practical 1 are closer to GLMs with log link than those with canonical lind. The reason is that the simple methods #### are based on multiplicative tariff structure, which corresponds to log link function. Interpretation of tariff structure for canonical (other than logarithmic) #### function is rather difficult and far less straightforward. ################################################################################################################# ###############################################WEIGHTS FOR POISSON################################################# ################################################################################################################# w <- 100 ClaimData2 <- within(ClaimData , { Claim <- w*Claim Exposure <- w }) #GLM with Poisson distribution with log link = multiplicative tariff structure model.PoissonLogW1 <- glm(Claim/Exposure ~ CarType + DriverAge, data = ClaimData2, family = poisson("log")) summary(model.PoissonLogW1 ) #GLM with Poisson distribution with log link = multiplicative tariff structure model.PoissonLogW2 <- glm(Claim ~ CarType + DriverAge + offset(log(Exposure)), data = ClaimData2, family = poisson("log")) summary(model.PoissonLogW2) #GLM with Poisson distribution with log link = multiplicative tariff structure model.PoissonLogW3 <- glm(Claim/Exposure ~ CarType + DriverAge, weights= Exposure, data = ClaimData2, family = poisson("log")) summary(model.PoissonLogW3) ###############################AND GAMMA?################################################ #GLM with gamma distribution with log link = multiplicative tariff structure model.GammaLogW1 <- glm(Claim/Exposure ~ CarType + DriverAge, data = ClaimData2, family = Gamma("log")) summary(model.GammaLogW1) #GLM with gamma distribution with log link = multiplicative tariff structure model.GammaLogW2 <- glm(Claim ~ CarType + DriverAge + offset(log(Exposure)), data = ClaimData2, family = Gamma("log")) summary(model.GammaLogW2) #GLM with gamma distribution with log link = multiplicative tariff structure model.GammaLogW3<- glm(Claim/Exposure ~ CarType + DriverAge, weights= Exposure, data = ClaimData2, family = Gamma("log")) summary(model.GammaLogW3)