suppressPackageStartupMessages(require(dplyr))
2 Parallel computing
2.1 Last Time
The coverage probability is an operating characteristic of methods or algorithms that create confidence intervals.
If one repeatedly draws a sample of size \(N\) from the population and constructs a confidence interval, the coverage probability is the proportion of intervals that capture the population parameter of interest.
2.2 Last Time: Coverage probability
2.3 Last Time
2.4 Last Time
The framework described above suggests how one may write modular code to perform the simulation. One can write a function to perform each of the primary tasks. For example:
2.5 Last Time
set.seed(43589)
<- 5000
R <- rep(NA, R)
out for(i in 1:R){
<-
out[i] %>%
parameters %>%
generate_data %>%
construct_ci
capture_param
}<- mean(out) coverage_probability
2.6 This time
<- 201 # parameters
N
<- function(N){
generate_data # Hard coded standard normal distribution
rnorm(N)
}
<- function(data, summary = "median"){
boot_ci # Next slide
}
<- function(ci){
capture_median # Hard coded 0 as parameter of interest
1*(ci[1] < 0 & 0 < ci[2])
}
2.7 Sampling distribution by bootstrap
<- function(data, summary = "median"){
boot_ci # Hard coded number of draws
<- 5000
R # Set median as default summary measure
<- get(summary)
sm <- rep(NA, R)
sampdist for(i in 1:R){
<- sample(data, length(data), replace = TRUE)
b <- sm(b)
sampdist[i]
}# Hard coded symmetric density interval
quantile(sampdist, c(0.025, 0.975))
}
2.8 Do each of the functions work
201 %>% generate_data
[1] -0.01940201 -1.82120267 0.44799261 1.88568685 0.67635725 -0.36273872
[7] -1.70378986 0.35027602 -0.83654532 -1.39141279 -0.24101661 -1.74344869
[13] -0.02365455 -1.61469385 -0.23360828 -0.06272531 -0.75529350 -0.12284804
[19] -0.09152494 0.24708076 0.71407592 -0.58219252 -0.36695261 0.07722695
[25] -0.22586839 0.90892600 0.81913488 0.11095328 0.97262568 2.32804200
[31] 1.17928507 0.18779521 -1.34175771 -0.15051516 0.53605514 -0.63465722
[37] -1.81922167 -1.08815381 -0.51726338 -0.48323935 -0.01838499 0.42976036
[43] -0.14622589 -1.69944318 -0.87690454 1.18908164 0.96358839 0.25622827
[49] 0.50779332 1.42945153 -0.51389587 -0.64092731 -0.61622052 -0.28323201
[55] 1.29620185 -0.88092971 -0.68194381 0.63690155 -1.01041862 0.70703162
[61] -0.48156307 -0.13030187 0.70979352 -1.48739092 -0.26383242 -2.28538276
[67] 0.31456107 -1.02170654 -0.36850328 -0.22155832 -0.16866231 1.90090876
[73] 0.23821551 -1.42714481 0.51709746 0.64516767 0.25168249 -0.09314028
[79] -1.16027715 1.27040205 -1.09094959 0.66862971 -1.50267268 1.77178427
[85] 1.27888909 -0.03231166 0.31037266 0.19681092 0.66129702 0.73326171
[91] -0.24470869 0.24159006 -0.67659721 -0.27563048 -0.66751692 0.01201869
[97] 0.14347383 -1.25554444 -0.55103974 2.19861041 -0.68028542 -0.32370690
[103] 0.10659134 0.25171250 0.29102262 -0.66178149 -1.82197597 2.13134072
[109] 1.36481881 0.38534636 1.49185240 -1.79387312 0.14141156 -1.52153296
[115] -0.93853955 -0.96219574 0.93454219 1.15023355 -0.21301911 0.67353174
[121] 0.28174930 -0.40803889 0.50420010 0.02891573 0.39366123 -0.90105931
[127] 1.02345428 0.49313891 -0.12073488 -0.07716957 0.26736520 1.57248644
[133] -1.24369934 -1.10457785 1.44744054 -0.06768671 -1.52203082 -0.50504132
[139] 0.35306645 -0.94998936 -1.05371147 -0.54750406 -1.71660557 0.59338320
[145] 1.10976506 1.30182480 -0.43956408 0.62653417 -1.68467905 0.04887087
[151] 0.17388456 -2.28404018 0.77796568 1.38039939 -1.71499788 -1.82130689
[157] 0.83142598 -0.95740680 -0.04439578 -0.19135970 0.11562283 0.27448982
[163] 1.37502312 2.56074402 -1.09949989 -0.10618504 -0.71049113 0.08760942
[169] -0.22524494 -0.95888098 1.41913365 0.04465294 -0.11874464 0.50255983
[175] -1.80255389 -0.15642812 -0.80663667 0.19974461 0.83033002 0.95386830
[181] 1.64687303 -1.81886796 1.48479304 -0.81185456 1.03616289 -1.58463794
[187] -0.34734621 -0.36749540 0.21690963 -0.46992453 0.26175158 0.13580190
[193] -1.19313319 -0.82841723 -0.88931764 -0.40029615 0.02598329 2.80224206
[199] -0.30118388 -0.42805831 -0.05197986
2.9 Do each of the functions work
201 %>% generate_data %>% boot_ci
2.5% 97.5%
-0.21652287 0.08798463
2.10 Do each of the functions work
c(-1,1) %>% capture_median
[1] 1
c(1,2) %>% capture_median
[1] 0
c(-2,-1) %>% capture_median
[1] 0
c(0,1) %>% capture_median
[1] 0
2.11 A single replicate
201 %>% generate_data %>% boot_ci %>% capture_median
2.5%
1
2.12 With the loop
<- 10 # Make small, we are still testing the code
M <- rep(NA, M)
captures for(i in 1:M){
<-
captures[i] 201 %>% generate_data %>% boot_ci %>% capture_median
}<- mean(captures) capture_prob
2.13 Code is slowish
2.13.1 Possible solutions
- Array job on ACCRE
- May use AWS, Digital Ocean, reddis, etc.
- Parallel processing on personal computer or ACCRE
2.14 Foreach
require(foreach)
Loading required package: foreach
2.15 Foreach
# Sequential approach, by default returns a list
foreach(i = 1:3) %do% {
i }
[[1]]
[1] 1
[[2]]
[1] 2
[[3]]
[1] 3
2.16 Foreach
# Sequential approach, by default returns a list
# Use .combine to return as different object
foreach(i = 1:3, .combine = c) %do% {
i }
[1] 1 2 3
2.17 Foreach
# Sequential approach, by default returns a list
# Use .combine to return as different object
foreach(i = 1:3, .combine = cbind) %do% {
i }
result.1 result.2 result.3
[1,] 1 2 3
2.18 Foreach
# Sequential approach, by default returns a list
# Use .combine to return as different object
foreach(i = 1:3, .combine = rbind) %do% {
i }
[,1]
result.1 1
result.2 2
result.3 3
2.19 Foreach
# Sequential approach, by default returns a list
# Use .combine to return as different object
# Can also sum or multiply
foreach(i = 1:3, .combine = `+`) %do% {
i }
[1] 6
2.20 Foreach
# This is a silly example for demonstration
# No one would perform this calculation in this way
<- foreach(i = 1:30000, .combine = c) %do% {
g median(rnorm(1000))
}
2.21 Foreach with dopar
# Create parallel backend
require(doParallel)
<- detectCores() - 1
cores_2_use <- makeCluster(cores_2_use)
cl clusterSetRNGStream(cl, 2344)
registerDoParallel(cl)
<- foreach(i = 1:30000, .combine = c) %dopar% {
g median(rnorm(1000))
}stopCluster(cl)
2.22 Foreach with dopar for coverage
# Create parallel backend
<- makeCluster(cores_2_use)
cl clusterSetRNGStream(cl, 2344)
registerDoParallel(cl)
<- foreach(
captures i = 1:5000
.combine = c
, .packages = c('dplyr') # Need to indicate which packages
, %dopar% {
) 201 |> generate_data() |> boot_ci() |> capture_median()
}stopCluster(cl)