## 2. ESTIMATING CAUSAL EFFECTS WITH RANDOMIZED EXPERIMENTS ## 2.5 DO SMALL CLASSES IMPROVE STUDENT PERFORMANCE? ## Set the working directory setwd("~/Desktop/DSS") # example of setwd() for Mac setwd("C:/user/Desktop/DSS") # example for Windows ## Load the dataset star <- read.csv("STAR.csv") # reads and stores data ## Look at the data head(star) # shows first observations ## Relational operators in R 3==3 # example of a TRUE statement 3==4 # example of a FALSE statement star$classtype=="small" ## Creating new variables ifelse(star$classtype=="small", 1, 0) # creates contents of variable star$small <- ifelse(star$classtype=="small", 1, 0) # creates new variable head(star) # shows first observations ## Subsetting variables mean(star$reading) # calculates the mean of reading mean(star$reading[star$small==1]) # mean of reading for treatment group mean(star$reading[star$small==0]) # mean of reading for control group ## Compute the difference−in−means estimator for reading mean(star$reading[star$small==1]) - mean(star$reading[star$small==0]) ## Compute the difference−in−means estimator for math mean(star$math[star$small==1]) - mean(star$math[star$small==0]) ## Compute the difference-in-means estimator for graduated mean(star$graduated[star$small==1]) - mean(star$graduated[star$small==0]) ## To help with interpretation mean(star$graduated[star$small==1]) # mean of graduated for treatment group mean(star$graduated[star$small==0]) # mean of graduated for control group 0.8735043 - 0.8664731 # difference-in-means estimator for graduated ## Uncertainty hist(star$reading[star$small==1]) hist(star$reading[star$small==0]) boxplot(star$reading~star$small) test <- t.test(star$reading~star$small) test ## Tidyverse approach library(tidyverse) star %>% mutate(small2 = if_else(classtype=="small",1,0)) %>% group_by(small2) %>% summarise(reading_avg = mean(reading), math_avg = mean(math), grad_avg = mean(graduated)) %>% mutate(reading_diff=diff(reading_avg), math_diff=diff(math_avg), grad_diff=diff(grad_avg)) star %>% ggplot(aes(y=reading, x=classtype))+ geom_boxplot() library(ggstatsplot) star %>% ggbetweenstats(small, reading) star %>% ggbetweenstats(small, reading, type="robust")