> BT <- read.csv("http://www.lock5stat.com/datasets1e/BodyTemp50.csv")
> library(dplyr)
> BT %>%
+ summarize(mean(BodyTemp), sd(BodyTemp), n())
mean(BodyTemp) sd(BodyTemp) n()
1 98.26 0.7653197 50
> library(ggplot2)
> ggplot(data = BT, mapping = aes(x = BodyTemp)) +
+ geom_dotplot(fill = "lightblue") +
+ theme_bw()
Note: In order to construct a sampling distribution to assess the evidence in BT
, one must generate new samples consistent with the null hypothesis \((\mu = 98.6)\). One way to generate samples that are consistent with the null hypothesis is to add 0.34 to all of the values in BodyTemp
.
> BT <- BT %>%
+ mutate(NBT = BodyTemp + 0.34)
> head(BT)
BodyTemp Pulse Gender NBT
1 97.6 69 0 97.94
2 99.4 77 1 99.74
3 99.0 75 0 99.34
4 98.8 84 1 99.14
5 98.0 71 0 98.34
6 98.9 76 1 99.24
> summarize(BT, mean(NBT))
mean(NBT)
1 98.6
> B <- 10000 - 1
> xbarstarA <- numeric(B)
> xbarstar <- numeric(B)
> for(i in 1:B){
+ BSA <- sample(BT$NBT, size = 50, replace = TRUE)
+ BS <- sample(BT$BodyTemp, size = 50, replace = TRUE)
+ xbarstarA[i] <- mean(BSA)
+ xbarstar[i] <- mean(BS)
+ }
> mean(xbarstarA)
[1] 98.59951
> mean(xbarstar)
[1] 98.26071
> sd(xbarstarA)
[1] 0.1070428
> sd(xbarstar)
[1] 0.1085097
> ggplot(data = data.frame(x = xbarstarA), aes(x = x)) +
+ geom_density(fill = "pink") +
+ theme_bw() +
+ geom_vline(xintercept = 98.26)
> p_value <- (sum(xbarstarA <= 98.26) + 1)/(B + 1)
> p_value
[1] 6e-04
> ggplot(data = data.frame(x = xbarstar), aes(x = x)) +
+ geom_density(fill = "pink") +
+ theme_bw()
> PCI <- quantile(xbarstar, probs = c(0.025, 0.975))
> PCI
2.5% 97.5%
98.048 98.474
> CIB <- mean(BT$BodyTemp) + c(-1, 1)*qnorm(.975)*sd(xbarstar)
> CIB
[1] 98.04732 98.47268
Assume \(X \sim\mathcal{N}(98.6, 0.77) \rightarrow \bar{X}\sim\mathcal{N}(98.6, 0.77/\sqrt{50} = 0.1088944)\)
> SIMS <- 10000 - 1
> xbarC <- numeric(SIMS)
> xbarA <- numeric(SIMS)
> for(i in 1:SIMS){
+ xbarC[i] <- mean(rnorm(50, 98.26, 0.77))
+ xbarA[i] <- mean(rnorm(50, 98.26 + 0.34, 0.77))
+ }
> xbarS <- mean(xbarC)
> SES <- sd(xbarC)
> c(xbarS, SES)
[1] 98.2592156 0.1080678
> # Simulation
> ggplot(data = data.frame(x = xbarA), aes(x = x)) +
+ geom_density(fill = "lightblue") +
+ theme_bw()
> p_value <- (sum(xbarA <= 98.26) + 1)*2/(SIMS + 1)
> p_value
[1] 0.0026
> CIS <- quantile(xbarC, probs = c(0.025, 0.975))
> CIS
2.5% 97.5%
98.05016 98.47249
> # Theoretical
> p_value <- pnorm(98.26, 98.6, 0.77/sqrt(50))*2
> p_value
[1] 0.001794503
> CIT <- mean(BT$BodyTemp) + c(-1, 1)*qnorm(0.975)*0.77/sqrt(50)
> CIT
[1] 98.04657 98.47343
> ## Or
> PASWR2::z.test(x = BT$BodyTemp, sigma.x = 0.77, mu = 98.6)
One Sample z-test
data: BT$BodyTemp
z = -3.1223, p-value = 0.001795
alternative hypothesis: true mean is not equal to 98.6
95 percent confidence interval:
98.04657 98.47343
sample estimates:
mean of x
98.26
> BT <- BT %>%
+ mutate(Gender = factor(Gender, levels = c(0, 1), labels = c( "Male", "Female")))
> DT::datatable(BT)
> BT %>%
+ group_by(Gender) %>%
+ summarize(mean(BodyTemp), sd(BodyTemp), n())
# A tibble: 2 × 4
Gender `mean(BodyTemp)` `sd(BodyTemp)` `n()`
<fctr> <dbl> <dbl> <int>
1 Male 98.172 0.6754751 25
2 Female 98.348 0.8505488 25
\[\begin{equation}
H_O: \mu_{\text{Female}} - \mu_{\text{Male}} = 0 \\
H_O: \mu_{\text{Female}} - \mu_{\text{Male}} \neq 0
\tag{1.2}
\end{equation}\]
> t.test(BodyTemp ~ Gender, data = BT)
Welch Two Sample t-test
data: BodyTemp by Gender
t = -0.81021, df = 45.658, p-value = 0.422
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.6133456 0.2613456
sample estimates:
mean in group Male mean in group Female
98.172 98.348
> STUFF <- BT %>%
+ group_by(Gender) %>%
+ summarize(MA = mean(BodyTemp))
> obsdiff <- diff(tapply(BT$BodyTemp, BT$Gender, mean))
> obsdiff
Female
0.176
> SIMS <- 10^4 - 1
> diffmeans <- numeric(SIMS)
> for(i in 1:SIMS){
+ diffmeans[i] <- diff(tapply(BT$BodyTemp, sample(BT$Gender), mean))
+ }
> mean(diffmeans)
[1] 0.00110171
> sd(diffmeans)
[1] 0.2151828
> pvalue <- ((sum(diffmeans >= obsdiff) + 1)/(SIMS + 1))*2
> pvalue
[1] 0.4402
> B <- 10^4 - 1
> bsd <- numeric(B)
> for(i in 1:B){
+ bsm <- sample(BT$BodyTemp[BT$Gender == "Male"], size = 25, replace = TRUE)
+ bsf <- sample(BT$BodyTemp[BT$Gender == "Female"], size = 25, replace = TRUE)
+ bsd[i] <- mean(bsf) - mean(bsm)
+ }
> CI <- quantile(bsd, probs = c(0.025, 0.975))
> CI
2.5% 97.5%
-0.240 0.596