1 Body Temperatures

> BT <- read.csv("http://www.lock5stat.com/datasets1e/BodyTemp50.csv")
> library(dplyr)
> BT %>%
+   summarize(mean(BodyTemp), sd(BodyTemp), n())
  mean(BodyTemp) sd(BodyTemp) n()
1          98.26    0.7653197  50
> library(ggplot2)
> ggplot(data = BT, mapping = aes(x = BodyTemp)) + 
+   geom_dotplot(fill = "lightblue") + 
+   theme_bw()

1.1 Null and Alternative Hypothesis

\[\begin{equation} H_O: \mu = 98.6 \\ H_A: \mu \neq 98.6 \tag{1.1} \end{equation}\]

Note: In order to construct a sampling distribution to assess the evidence in BT, one must generate new samples consistent with the null hypothesis \((\mu = 98.6)\). One way to generate samples that are consistent with the null hypothesis is to add 0.34 to all of the values in BodyTemp.

> BT <- BT %>%
+   mutate(NBT = BodyTemp + 0.34) 
> head(BT)
  BodyTemp Pulse Gender   NBT
1     97.6    69      0 97.94
2     99.4    77      1 99.74
3     99.0    75      0 99.34
4     98.8    84      1 99.14
5     98.0    71      0 98.34
6     98.9    76      1 99.24
> summarize(BT, mean(NBT))
  mean(NBT)
1      98.6

1.1.1 Generate bootstrap distribution

> B <- 10000 - 1
> xbarstarA <- numeric(B)
> xbarstar <- numeric(B)
> for(i in 1:B){
+   BSA <- sample(BT$NBT, size = 50, replace = TRUE)
+   BS <- sample(BT$BodyTemp, size = 50, replace = TRUE)
+   xbarstarA[i] <- mean(BSA)
+   xbarstar[i] <- mean(BS)
+ }
> mean(xbarstarA)
[1] 98.59951
> mean(xbarstar)
[1] 98.26071
> sd(xbarstarA)
[1] 0.1070428
> sd(xbarstar)
[1] 0.1085097
> ggplot(data = data.frame(x = xbarstarA), aes(x = x)) + 
+   geom_density(fill = "pink") + 
+   theme_bw() + 
+   geom_vline(xintercept = 98.26)

> p_value <- (sum(xbarstarA <= 98.26) + 1)/(B + 1)
> p_value
[1] 6e-04
> ggplot(data = data.frame(x = xbarstar), aes(x = x)) + 
+   geom_density(fill = "pink") + 
+   theme_bw()

1.1.2 Two bootstrap CIs

1.1.2.1 Percentile CI

> PCI <- quantile(xbarstar, probs = c(0.025, 0.975))
> PCI
  2.5%  97.5% 
98.048 98.474 

1.1.2.2 Bootstrap SE

> CIB <- mean(BT$BodyTemp) + c(-1, 1)*qnorm(.975)*sd(xbarstar)
> CIB
[1] 98.04732 98.47268

1.2 Classical Sampling Distribution

Assume \(X \sim\mathcal{N}(98.6, 0.77) \rightarrow \bar{X}\sim\mathcal{N}(98.6, 0.77/\sqrt{50} = 0.1088944)\)

1.2.1 Simulation approach

> SIMS <- 10000 - 1
> xbarC <- numeric(SIMS)
> xbarA <- numeric(SIMS)
> for(i in 1:SIMS){
+   xbarC[i] <- mean(rnorm(50, 98.26, 0.77))
+   xbarA[i] <- mean(rnorm(50, 98.26 + 0.34, 0.77))
+ }
> xbarS <- mean(xbarC)
> SES <- sd(xbarC)
> c(xbarS, SES)
[1] 98.2592156  0.1080678
> # Simulation
> ggplot(data = data.frame(x = xbarA), aes(x = x)) + 
+   geom_density(fill = "lightblue") + 
+   theme_bw()

> p_value <- (sum(xbarA <= 98.26) + 1)*2/(SIMS + 1)
> p_value
[1] 0.0026
> CIS <- quantile(xbarC, probs = c(0.025, 0.975))
> CIS
    2.5%    97.5% 
98.05016 98.47249 
> # Theoretical
> p_value <- pnorm(98.26, 98.6, 0.77/sqrt(50))*2
> p_value
[1] 0.001794503
> CIT <- mean(BT$BodyTemp) + c(-1, 1)*qnorm(0.975)*0.77/sqrt(50)
> CIT
[1] 98.04657 98.47343
> ## Or
> PASWR2::z.test(x = BT$BodyTemp, sigma.x = 0.77, mu = 98.6)

    One Sample z-test

data:  BT$BodyTemp
z = -3.1223, p-value = 0.001795
alternative hypothesis: true mean is not equal to 98.6
95 percent confidence interval:
 98.04657 98.47343
sample estimates:
mean of x 
    98.26 

1.3 Data Tweaking

> BT <- BT %>%
+   mutate(Gender = factor(Gender, levels = c(0, 1), labels = c( "Male", "Female")))
> DT::datatable(BT)
> BT %>%
+   group_by(Gender) %>%
+   summarize(mean(BodyTemp), sd(BodyTemp), n())
# A tibble: 2 × 4
  Gender `mean(BodyTemp)` `sd(BodyTemp)` `n()`
  <fctr>            <dbl>          <dbl> <int>
1   Male           98.172      0.6754751    25
2 Female           98.348      0.8505488    25
\[\begin{equation} H_O: \mu_{\text{Female}} - \mu_{\text{Male}} = 0 \\ H_O: \mu_{\text{Female}} - \mu_{\text{Male}} \neq 0 \tag{1.2} \end{equation}\]

1.3.1 Classical Approach

> t.test(BodyTemp ~ Gender, data = BT)

    Welch Two Sample t-test

data:  BodyTemp by Gender
t = -0.81021, df = 45.658, p-value = 0.422
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.6133456  0.2613456
sample estimates:
  mean in group Male mean in group Female 
              98.172               98.348 
> STUFF <- BT %>%
+   group_by(Gender) %>%
+   summarize(MA = mean(BodyTemp))
> obsdiff <- diff(tapply(BT$BodyTemp, BT$Gender, mean))
> obsdiff
Female 
 0.176 

1.3.2 Randomization Approach

> SIMS <- 10^4 - 1
> diffmeans <- numeric(SIMS)
> for(i in 1:SIMS){
+   diffmeans[i] <- diff(tapply(BT$BodyTemp, sample(BT$Gender), mean))
+ }
> mean(diffmeans)
[1] 0.00110171
> sd(diffmeans)
[1] 0.2151828
> pvalue <- ((sum(diffmeans >= obsdiff) + 1)/(SIMS + 1))*2
> pvalue
[1] 0.4402

1.3.3 Bootstrap CI

> B <- 10^4 - 1
> bsd <- numeric(B)
> for(i in 1:B){
+   bsm <- sample(BT$BodyTemp[BT$Gender == "Male"], size = 25, replace = TRUE)
+   bsf <- sample(BT$BodyTemp[BT$Gender == "Female"], size = 25, replace = TRUE)
+   bsd[i] <- mean(bsf) - mean(bsm)
+ }
> CI <- quantile(bsd, probs = c(0.025, 0.975))
> CI
  2.5%  97.5% 
-0.240  0.596