vignette.Rmd

---
title: "vignette"
output:
  pdf_document: default
  html_document: default
date: '2022-12-20'
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Causal isotonic calibration

```{r}
library(causalCalibration)
# Generate dataset used for calibration
set.seed(123)
n <- 1000
W <- runif(n, -1 , 1)
pA1 <- plogis(2*W)
A <- rbinom(n, size = 1, pA1)
# True CATE(W) = 1 + W
CATE <- 1 + W
EY0 <- W
EY1 <- W + CATE
Y <- rnorm(n, W + A * CATE, 0.3)

# Initial uncalibrated predictor is a monotone transformation of the true CATE
# Thus it is highly correlated with true CATE but predictions are not uncalibrated for CATE values
tau <- exp(CATE) - 1
 
plot(W, tau)

n <- 10000
Wnew <- runif(n, -1 , 1)
Wnew <- Wnew[abs(Wnew) <= 0.99]
 
n <- length(Wnew)
pA1new <- plogis(2*Wnew)
Anew <- rbinom(n, size = 1, pA1new)
# True CATE(W) = 1 + W
CATE <- 1 + Wnew
tau_new <- exp(CATE) - 1


calibrator <- causalCalibrate(tau, A, Y, EY1, EY0, pA1, tau_pred = tau_new)
library(ggplot2)
library(latex2exp)
tau_cal <- calibrator$tau_calibrated
data <- data.frame(W = Wnew, CATE = c(CATE, tau_new, tau_cal), type = rep(c("CATE", "Original", "Calibrated"), each = length(Wnew)  ))

ggplot(data, aes(x = W, y = CATE, color = type)) + geom_line() + labs(x = TeX("$\\tau(w)$"), y =  TeX("$\\tau_0(w) = E[Y_1 - Y_0 | W = w]$") )+ ggtitle("Effect of calibration")   + scale_x_continuous( )    + theme_bw()
 
tau <-  1 + W +sin(5*W)
tau_new <- 1 + Wnew+ sin(5*Wnew)


calibrator <- causalCalibrate(tau, A, Y, EY1, EY0, pA1, tau_pred = tau_new)
library(ggplot2)
library(latex2exp)
tau_cal <- calibrator$tau_calibrated
data <- data.frame(W = Wnew, CATE = c(CATE, tau_new, tau_cal), type = rep(c("CATE", "Original", "Calibrated"), each = length(Wnew)  ))

ggplot(data, aes(x = W, y = CATE, color = type)) + geom_line()  + ggtitle("Effect of calibration")   + scale_x_continuous( )    + theme_bw()
 
  
tau <-  1 + W  
tau_new <- 1 + Wnew  


calibrator <- causalCalibrate(tau, A, Y, EY1, EY0, pA1, tau_pred = tau_new)
library(ggplot2)
library(latex2exp)
tau_cal <- calibrator$tau_calibrated
data <- data.frame(W = Wnew, CATE = c(CATE, tau_new, tau_cal), type = rep(c("CATE", "Original", "Calibrated"), each = length(Wnew)  ))

ggplot(data, aes(x = W, y = CATE, color = type)) + geom_line()  + ggtitle("Effect of calibration")   + scale_x_continuous( )    + theme_bw()
  
  
        tau <-  1 + W + (W >= 0) * ( -0.8*   W)
tau_new <- 1 + Wnew + (Wnew >= 0) * ( -0.8* Wnew)


calibrator <- causalCalibrate(tau, A, Y, EY1, EY0, pA1, tau_pred = tau_new)
library(ggplot2)
library(latex2exp)
tau_cal <- calibrator$tau_calibrated
data <- data.frame(W = Wnew, CATE = c(CATE, tau_new, tau_cal), type = rep(c("CATE", "Original", "Calibrated"), each = length(Wnew)  ))

ggplot(data, aes(x = W, y = CATE, color = type)) + geom_line()  + ggtitle("Effect of calibration")   + scale_x_continuous( )    + theme_bw()
 
  
  tau <-  1 + W + (W >= 0) * ( -2*   W)
tau_new <- 1 + Wnew + (Wnew >= 0) * ( -2* Wnew)


calibrator <- causalCalibrate(tau, A, Y, EY1, EY0, pA1, tau_pred = tau_new)
library(ggplot2)
library(latex2exp)
tau_cal <- calibrator$tau_calibrated
data <- data.frame(W = Wnew, CATE = c(CATE, tau_new, tau_cal), type = rep(c("CATE", "Original", "Calibrated"), each = length(Wnew)  ))

ggplot(data, aes(x = W, y = CATE, color = type)) + geom_line()  + ggtitle("Effect of calibration")   + scale_x_continuous( )    + theme_bw()
 
  
    tau <-  1 + W + (W >= 0) * ( -1*   W)
tau_new <- 1 + Wnew + (Wnew >= 0) * ( -1* Wnew)


calibrator <- causalCalibrate(tau, A, Y, EY1, EY0, pA1, tau_pred = tau_new)
library(ggplot2)
library(latex2exp)
tau_cal <- calibrator$tau_calibrated
data <- data.frame(W = Wnew, CATE = c(CATE, tau_new, tau_cal), type = rep(c("CATE", "Original", "Calibrated"), each = length(Wnew)  ))

ggplot(data, aes(x = W, y = CATE, color = type)) + geom_line()  + ggtitle("Effect of calibration")   + scale_x_continuous( )    + theme_bw()
   

```

# cross-calibration
 
## cross-calibration with nnet
 
 
```{r}
library(sl3) #tlverse/sl3 on github. Used for cross fitting
library(data.table)
 
n <- 1000
W <- runif(n, -1 , 1)
pA1 <- plogis(1.5*W)
A <- rbinom(n, size = 1, pA1)
# True CATE(W) = 1 + W
CATE <- abs(W)
EY0 <- W
EY1 <- W + CATE
EY <- ifelse(A==1, EY1, EY0)
Y <- rnorm(n, W + A * CATE, 0.3)
zeta <- EY1 - EY0 + (A - pA1) /( (1-pA1)*pA1) *(Y - EY)
data <- data.table(W, zeta = zeta)

task <- sl3_Task$new(data, covariates = c("W"), outcome = "zeta", folds = 10)
# choose between base_learners

base_learner <- Lrnr_nnet$new(size = 25) # single layer neural net


lrnr_tau <- Lrnr_cv$new(base_learner, full_fit = TRUE)
#train predictor
lrnr_tau <- lrnr_tau$train(task) # does both cross fitting and full sample fitting

tau_no_crossfit <-  lrnr_tau$predict_fold(task, "full") # predictions using full predictor fit on all data
tau_out_of_fold <- lrnr_tau$predict_fold(task, "validation") # pooled out of fold predictions from cross fitted predictors
tau_all <- do.call(cbind, lapply(1:10, function(k) {
  lrnr_tau$predict_fold(task, k) # fold-specific predictions
})) # Get full sample predictions from each fold-specific predictor.

# Compute calibrator from data
calibrator <- causalCalibrate(tau = tau_out_of_fold, A = A, Y = Y, EY1 = EY1, EY0 = EY0, pA1 = pA1)


# tau_all should be the stacked cross-fitted prediction matrix for observations at which we would like to get the calibrated predictions.
tau_cal <- cross_calibrate(calibrator, tau_all)


data <- data.frame(W = W, CATE = c(CATE, tau_no_crossfit, tau_cal), type = rep(c("True CATE", "Uncalibrated", "Calibrated"), each = length(W)  ))

ggplot(data, aes(x = W, y = CATE, color = type)) + geom_line()  + ggtitle("Effect of calibration")   + scale_x_continuous( )    + theme_bw()
   
 
# We improve mean-squared error by calibrating.
mean((tau_no_crossfit - CATE)^2)
mean((tau_cal - CATE)^2)
 
```
 
 
## cross-calibration with xgboost
 
 
```{r}
library(sl3) #tlverse/sl3 on github. Used for cross fitting
library(data.table)
 
n <- 1000
W <- runif(n, -1 , 1)
pA1 <- plogis(1.5*W)
A <- rbinom(n, size = 1, pA1)
# True CATE(W) = 1 + W
CATE <- abs(W)
EY0 <- W
EY1 <- W + CATE
EY <- ifelse(A==1, EY1, EY0)
Y <- rnorm(n, W + A * CATE, 0.3)
zeta <- EY1 - EY0 + (A - pA1) /( (1-pA1)*pA1) *(Y - EY)
data <- data.table(W, zeta = zeta)

task <- sl3_Task$new(data, covariates = c("W"), outcome = "zeta", folds = 10)
# choose between base_learners
base_learner <- Lrnr_xgboost$new(max_depth = 3, nrounds = 10) # xgboost
 

lrnr_tau <- Lrnr_cv$new(base_learner, full_fit = TRUE)
#train predictor
lrnr_tau <- lrnr_tau$train(task) # does both cross fitting and full sample fitting

tau_no_crossfit <-  lrnr_tau$predict_fold(task, "full") # predictions using full predictor fit on all data
tau_out_of_fold <- lrnr_tau$predict_fold(task, "validation") # pooled out of fold predictions from cross fitted predictors
tau_all <- do.call(cbind, lapply(1:10, function(k) {
  lrnr_tau$predict_fold(task, k) # fold-specific predictions
})) # Get full sample predictions from each fold-specific predictor.

# Compute calibrator from data
calibrator <- causalCalibrate(tau = tau_out_of_fold, A = A, Y = Y, EY1 = EY1, EY0 = EY0, pA1 = pA1)


# tau_all should be the stacked cross-fitted prediction matrix for observations at which we would like to get the calibrated predictions.
tau_cal <- cross_calibrate(calibrator, tau_all)


data <- data.frame(W = W, CATE = c(CATE, tau_no_crossfit, tau_cal), type = rep(c("True CATE", "Uncalibrated", "Calibrated"), each = length(W)  ))

ggplot(data, aes(x = W, y = CATE, color = type)) + geom_line()  + ggtitle("Effect of calibration")   + scale_x_continuous( )    + theme_bw()
   
 
# We improve mean-squared error by calibrating.
mean((tau_no_crossfit - CATE)^2)
mean((tau_cal - CATE)^2)
 
```