file <- "data_claimSeverity.csv"
#read data:
ClaimData <- read.csv(file , header = TRUE, sep = ";")


#declare columns as factors, will not be treated as numerical variables in GLM:
ClaimData <- within(ClaimData , {
    CarType <- factor(CarType )
    DriverAge<- factor(DriverAge)
    })

#GLM with gamma distribution with log link = multiplicative tariff structure
model.GammaLog <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = Gamma("log"))
summary(model.GammaLog)



#GLM with gamma distribution with inverse link = canonical link 
model.GammaInverse <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = Gamma("inverse"))


#GLM with normal distribution with log link =  multiplicative tariff structure
model.NormalLog <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = gaussian("log"))


#GLM with normal distribution with identity link = canonical link 
model.NormalIdnetity <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = gaussian("identity"))


#GLM with inverse gaussian distribution with log link = multiplicative tariff structure
model.InverseGaussianLog <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = inverse.gaussian("log"))


#GLM with inverse gaussian distribution with 1/mu^2 link = canonical link 
model.InverseGaussianCanonical <- glm(Claim ~ CarType + DriverAge, data = ClaimData, family = inverse.gaussian("1/mu^2"))


#drop last column to make data for prediction:
newData <- ClaimData[-3] 

#make prediction
newData$Y.GammaLog <- predict(model.GammaLog, newData, type = "response")
newData$Y.GammaInverse <- predict(model.GammaInverse, newData, type = "response")
newData$Y.NormalLog <- predict(model.NormalLog , newData, type = "response")
newData$Y.NormalIdnetity <- predict(model.NormalIdnetity , newData, type = "response")
newData$Y.InverseGaussianLog <- predict(model.InverseGaussianLog , newData, type = "response")
newData$Y.InverseGaussianCanonical <- predict(model.InverseGaussianCanonical, newData, type = "response")


#insert predictions from simple models from Practical1
newData$Y.BaileySimon <- c(2176,1751,1491,1493,2079,1674,1425,1427,2456,1977,1684,1686)
newData$Y.MarginSums <- c(2170, 1749, 1490, 1490,2076, 1673, 1425, 1425, 2454, 1977, 1685, 1685)
newData$Y.LogLinear <- c(2182, 1759, 1500, 1501, 2063, 1663, 1417, 1419,2444, 1970, 1680, 1682)

newData


plot(newData$Y.GammaLog, pch=15, col="blue", ylim=c(1200,2600), xlab="risk class", ylab="expected claim size")
axis(side = 1, at = 1:12)
points(newData$Y.GammaInverse, pch = 20, col = "blue")
points(x= (1:12) + 0.1, y = newData$Y.NormalLog , pch = 15, col = "green")
points(x= (1:12) + 0.1, y =newData$Y.NormalIdnetity, pch = 20, col = "green")
points(x= (1:12) + 0.2, y = newData$Y.InverseGaussianLog , pch = 15, col = "red")
points(x= (1:12) + 0.2, y =newData$Y.InverseGaussianCanonical , pch = 20, col = "red")

points(x= (1:12) + 0.4, y =newData$Y.BaileySimon , pch = 0, col = "black")
points(x= (1:12) + 0.4, y =newData$Y.MarginSums , pch = 1, col = "black")
points(x= (1:12) + 0.4, y =newData$Y.LogLinear , pch = 2, col = "black")


legend("topright", legend=c("GammaLog", "GammaInverse","NormalLog ","NormalIdnetity","InverseGaussianLog","InverseGaussianCanonical","BaileySimon ","MarginSums ","LogLinear"),
       col=c("blue", "blue","green","green","red","red","black","black","black"), pch=c(15,20,15,20,15,20,0,1,2))


#### Comments to results:
#### Predictions made by different GLM models vary, so the choice of appropriate error distribution and link function is important. We can see
#### less variability among models with log link function (coloured squares on the graph) compared to models with canonical links (coloured circles on the graph).
#### This is because the models in the latter group differ not only by error function, but also by link function. Recall that link function postulate the type of
#### dependence of the predictions on the linear predictors (determined by combination of individual risk factors). 
#### Predictions made by simple methods from the Practical 1 are closer to GLMs with log link than those with canonical lind. The reason is that the simple methods
#### are based on multiplicative tariff structure, which corresponds to log link function. Interpretation of tariff structure for canonical (other than logarithmic)
#### function is rather difficult and far less straightforward. 



#################################################################################################################
###############################################WEIGHTS FOR POISSON#################################################
#################################################################################################################



w <- 100
ClaimData2 <- within(ClaimData , {
    Claim <- w*Claim 
    Exposure <- w
    })


#GLM with Poisson distribution with log link = multiplicative tariff structure
model.PoissonLogW1 <- glm(Claim/Exposure ~ CarType + DriverAge, data = ClaimData2, family = poisson("log"))
summary(model.PoissonLogW1 )


#GLM with Poisson distribution with log link = multiplicative tariff structure
model.PoissonLogW2 <- glm(Claim ~ CarType + DriverAge + offset(log(Exposure)), data = ClaimData2, family = poisson("log"))
summary(model.PoissonLogW2)


#GLM with Poisson distribution with log link = multiplicative tariff structure
model.PoissonLogW3 <- glm(Claim/Exposure ~ CarType + DriverAge, weights= Exposure, data = ClaimData2, family = poisson("log"))
summary(model.PoissonLogW3)


###############################AND GAMMA?################################################


#GLM with gamma distribution with log link = multiplicative tariff structure
model.GammaLogW1 <- glm(Claim/Exposure ~ CarType + DriverAge, data = ClaimData2, family = Gamma("log"))
summary(model.GammaLogW1)


#GLM with gamma distribution with log link = multiplicative tariff structure
model.GammaLogW2 <- glm(Claim ~ CarType + DriverAge + offset(log(Exposure)), data = ClaimData2, family = Gamma("log"))
summary(model.GammaLogW2)


#GLM with gamma distribution with log link = multiplicative tariff structure
model.GammaLogW3<- glm(Claim/Exposure ~ CarType + DriverAge, weights= Exposure, data = ClaimData2, family = Gamma("log"))
summary(model.GammaLogW3)