--- title: "A/B bootstrap" output: html_notebook: self_contained: TRUE editor_options: chunk_output_type: inline ---
library(tidyverse) library(magrittr) library(tictoc) library(glue) library(dabestr)
シミュレーション
操作の期間の対数正規分布を作成します。
my_rlnorm <- function(n, mean, sd){ # . : https://en.wikipedia.org/wiki/Log-normal_distribution#Arithmetic_moments location <- log(mean^2 / sqrt(sd^2 + mean^2)) shape <- sqrt(log(1 + (sd^2 / mean^2))) print(paste("location:", location)) print(paste("shape:", shape)) rlnorm(n, location, shape) } # N (A = Control) A_control <- my_rlnorm(n = 10^3, mean = 500, sd = 150) %T>% {print(glue("mean = {mean(.)}; sd = {sd(.)}"))} # N (B = Test) B_test <- my_rlnorm(n = 10^3, mean = 525, sd = 150) %T>% {print(glue("mean = {mean(.)}; sd = {sd(.)}"))}
dabestr
ツールを使用して分析に必要な形式でデータを収集し、分析を実施します。
df <- tibble(Control = A_control, Test = B_test) %>% gather(key = "group", value = "value") tic("bootstrapping") two_group_unpaired <- df %>% dabest(group, value, # The idx below passes "Control" as the control group, # and "Test" as the test group. The mean difference # will be computed as mean(Test) - mean(Control). idx = c("Control", "Test"), paired = FALSE, reps = 5000 ) toc()
結果を見てみましょう
two_group_unpaired plot(two_group_unpaired)
================================================== ====