2  Parallel computing

suppressPackageStartupMessages(require(dplyr))

2.1 Last Time

The coverage probability is an operating characteristic of methods or algorithms that create confidence intervals.

If one repeatedly draws a sample of size \(N\) from the population and constructs a confidence interval, the coverage probability is the proportion of intervals that capture the population parameter of interest.

2.2 Last Time: Coverage probability

2.3 Last Time

2.4 Last Time

The framework described above suggests how one may write modular code to perform the simulation. One can write a function to perform each of the primary tasks. For example:

2.5 Last Time

set.seed(43589)
R <- 5000
out <- rep(NA, R)
for(i in 1:R){
  out[i] <- 
    parameters %>% 
    generate_data %>% 
    construct_ci %>% 
    capture_param
}
coverage_probability <- mean(out)

2.6 This time

N <- 201 # parameters

generate_data <- function(N){ 
  # Hard coded standard normal distribution
  rnorm(N) 
}

boot_ci <- function(data, summary = "median"){
  # Next slide
}

capture_median <- function(ci){
  # Hard coded 0 as parameter of interest
  1*(ci[1] < 0 & 0 < ci[2])
}

2.7 Sampling distribution by bootstrap

boot_ci <- function(data, summary = "median"){
  # Hard coded number of draws
  R <- 5000
  # Set median as default summary measure
  sm <- get(summary)
  sampdist <- rep(NA, R)
  for(i in 1:R){
    b <- sample(data, length(data), replace = TRUE)
    sampdist[i] <- sm(b)
  }
  # Hard coded symmetric density interval
  quantile(sampdist, c(0.025, 0.975))
}

2.8 Do each of the functions work

201 %>% generate_data
  [1] -0.01940201 -1.82120267  0.44799261  1.88568685  0.67635725 -0.36273872
  [7] -1.70378986  0.35027602 -0.83654532 -1.39141279 -0.24101661 -1.74344869
 [13] -0.02365455 -1.61469385 -0.23360828 -0.06272531 -0.75529350 -0.12284804
 [19] -0.09152494  0.24708076  0.71407592 -0.58219252 -0.36695261  0.07722695
 [25] -0.22586839  0.90892600  0.81913488  0.11095328  0.97262568  2.32804200
 [31]  1.17928507  0.18779521 -1.34175771 -0.15051516  0.53605514 -0.63465722
 [37] -1.81922167 -1.08815381 -0.51726338 -0.48323935 -0.01838499  0.42976036
 [43] -0.14622589 -1.69944318 -0.87690454  1.18908164  0.96358839  0.25622827
 [49]  0.50779332  1.42945153 -0.51389587 -0.64092731 -0.61622052 -0.28323201
 [55]  1.29620185 -0.88092971 -0.68194381  0.63690155 -1.01041862  0.70703162
 [61] -0.48156307 -0.13030187  0.70979352 -1.48739092 -0.26383242 -2.28538276
 [67]  0.31456107 -1.02170654 -0.36850328 -0.22155832 -0.16866231  1.90090876
 [73]  0.23821551 -1.42714481  0.51709746  0.64516767  0.25168249 -0.09314028
 [79] -1.16027715  1.27040205 -1.09094959  0.66862971 -1.50267268  1.77178427
 [85]  1.27888909 -0.03231166  0.31037266  0.19681092  0.66129702  0.73326171
 [91] -0.24470869  0.24159006 -0.67659721 -0.27563048 -0.66751692  0.01201869
 [97]  0.14347383 -1.25554444 -0.55103974  2.19861041 -0.68028542 -0.32370690
[103]  0.10659134  0.25171250  0.29102262 -0.66178149 -1.82197597  2.13134072
[109]  1.36481881  0.38534636  1.49185240 -1.79387312  0.14141156 -1.52153296
[115] -0.93853955 -0.96219574  0.93454219  1.15023355 -0.21301911  0.67353174
[121]  0.28174930 -0.40803889  0.50420010  0.02891573  0.39366123 -0.90105931
[127]  1.02345428  0.49313891 -0.12073488 -0.07716957  0.26736520  1.57248644
[133] -1.24369934 -1.10457785  1.44744054 -0.06768671 -1.52203082 -0.50504132
[139]  0.35306645 -0.94998936 -1.05371147 -0.54750406 -1.71660557  0.59338320
[145]  1.10976506  1.30182480 -0.43956408  0.62653417 -1.68467905  0.04887087
[151]  0.17388456 -2.28404018  0.77796568  1.38039939 -1.71499788 -1.82130689
[157]  0.83142598 -0.95740680 -0.04439578 -0.19135970  0.11562283  0.27448982
[163]  1.37502312  2.56074402 -1.09949989 -0.10618504 -0.71049113  0.08760942
[169] -0.22524494 -0.95888098  1.41913365  0.04465294 -0.11874464  0.50255983
[175] -1.80255389 -0.15642812 -0.80663667  0.19974461  0.83033002  0.95386830
[181]  1.64687303 -1.81886796  1.48479304 -0.81185456  1.03616289 -1.58463794
[187] -0.34734621 -0.36749540  0.21690963 -0.46992453  0.26175158  0.13580190
[193] -1.19313319 -0.82841723 -0.88931764 -0.40029615  0.02598329  2.80224206
[199] -0.30118388 -0.42805831 -0.05197986

2.9 Do each of the functions work

201 %>% generate_data %>% boot_ci
       2.5%       97.5% 
-0.21652287  0.08798463 

2.10 Do each of the functions work

c(-1,1) %>% capture_median
[1] 1
c(1,2) %>% capture_median
[1] 0
c(-2,-1) %>% capture_median
[1] 0
c(0,1) %>% capture_median
[1] 0

2.11 A single replicate

201 %>% generate_data %>% boot_ci %>% capture_median
2.5% 
   1 

2.12 With the loop

M <- 10 # Make small, we are still testing the code
captures <- rep(NA, M)
for(i in 1:M){
  captures[i] <- 
    201 %>% generate_data %>% boot_ci %>% capture_median
}
capture_prob <- mean(captures)

2.13 Code is slowish

2.13.1 Possible solutions

  • Array job on ACCRE
  • May use AWS, Digital Ocean, reddis, etc.
  • Parallel processing on personal computer or ACCRE

2.14 Foreach

require(foreach)
Loading required package: foreach

2.15 Foreach

# Sequential approach, by default returns a list
foreach(i = 1:3) %do% {
 i  
}
[[1]]
[1] 1

[[2]]
[1] 2

[[3]]
[1] 3

2.16 Foreach

# Sequential approach, by default returns a list
# Use .combine to return as different object
foreach(i = 1:3, .combine = c) %do% {
 i  
}
[1] 1 2 3

2.17 Foreach

# Sequential approach, by default returns a list
# Use .combine to return as different object
foreach(i = 1:3, .combine = cbind) %do% {
 i  
}
     result.1 result.2 result.3
[1,]        1        2        3

2.18 Foreach

# Sequential approach, by default returns a list
# Use .combine to return as different object
foreach(i = 1:3, .combine = rbind) %do% {
 i  
}
         [,1]
result.1    1
result.2    2
result.3    3

2.19 Foreach

# Sequential approach, by default returns a list
# Use .combine to return as different object
# Can also sum or multiply
foreach(i = 1:3, .combine = `+`) %do% {
 i  
}
[1] 6

2.20 Foreach

# This is a silly example for demonstration
# No one would perform this calculation in this way
g <- foreach(i = 1:30000, .combine = c) %do% {
  median(rnorm(1000))
}

2.21 Foreach with dopar

# Create parallel backend
require(doParallel)
cores_2_use <- detectCores() - 1
cl <- makeCluster(cores_2_use)
clusterSetRNGStream(cl, 2344)
registerDoParallel(cl)

g <- foreach(i = 1:30000, .combine = c) %dopar% {
 median(rnorm(1000))  
}
stopCluster(cl)

2.22 Foreach with dopar for coverage

# Create parallel backend
cl <- makeCluster(cores_2_use)
clusterSetRNGStream(cl, 2344)
registerDoParallel(cl)

captures <- foreach(
    i = 1:5000
  , .combine = c
  , .packages = c('dplyr') # Need to indicate which packages
) %dopar% {
  201 |> generate_data() |>  boot_ci() |> capture_median()
}
stopCluster(cl)

2.23 Working hard

2.24 Hands on