Data Frame
Height <- c(70, 70, 72, 70, 73, 63, 65, 70, 64, 70)
WingSpan <- c(69, 69, 73, 71, 73, 62, 66, 69, 64, 70)
Sex <- c(rep("Male", 5), rep("Female", 5))
Name <- c("Tanner", "Kory", "Chandler", "Elliot", "Eddie",
          "Emma", "Jacqueline", "Catherine", "Andi", "Emily")
DF <- data.frame(Name = Name, HeightIn = Height, WingSpan = WingSpan, Sex = Sex)
rm(Name, Height, WingSpan, Sex)
str(DF)'data.frame':   10 obs. of  4 variables:
 $ Name    : Factor w/ 10 levels "Andi","Catherine",..: 10 9 3 5 4 7 8 2 1 6
 $ HeightIn: num  70 70 72 70 73 63 65 70 64 70
 $ WingSpan: num  69 69 73 71 73 62 66 69 64 70
 $ Sex     : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 1 1 1 1 1DF$Name <- as.character(DF$Name)
str(DF)'data.frame':   10 obs. of  4 variables:
 $ Name    : chr  "Tanner" "Kory" "Chandler" "Elliot" ...
 $ HeightIn: num  70 70 72 70 73 63 65 70 64 70
 $ WingSpan: num  69 69 73 71 73 62 66 69 64 70
 $ Sex     : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 1 1 1 1 1library(ggplot2)
ggplot(data = DF, aes(x = HeightIn, y = WingSpan, color = Sex)) +
  geom_point() + 
  geom_smooth(method = "lm") + 
  theme_bw()library(dplyr)
Res <- DF %>%
  group_by(Sex) %>%
  summarize(MH = mean(HeightIn), SH = sd(HeightIn), n = n())
Res# A tibble: 2 × 4
     Sex    MH       SH     n
  <fctr> <dbl>    <dbl> <int>
1 Female  66.4 3.361547     5
2   Male  71.0 1.414214     5knitr::kable(Res)| Sex | MH | SH | n | 
|---|---|---|---|
| Female | 66.4 | 3.361547 | 5 | 
| Male | 71.0 | 1.414214 | 5 | 
DT::datatable(Res)\(H_0: \textrm{The average height of males is the no different than the average height of females.}\)
\(H_A: \textrm{The average height of males is greater than the average height of females.}\)
Written mathematically,
\(H_0: \mu_{\textrm{males}} - \mu_{\textrm{females}} = 0\)
\(H_A: \mu_{\textrm{males}} - \mu_{\textrm{females}} > 0\)
# Using ggplot2
ggplot(data = DF, aes(sample = HeightIn, color = Sex)) +
  geom_qq() + 
  theme_bw()xbar1 <- Res[2, 2]
xbar2 <- Res[1, 2]
s1 <- Res[2, 3]
s2 <- Res[1, 3]
n1 <- Res[2, 4]
n2 <- Res[1, 4]
TS <- (xbar1 - xbar2)/(s1^2/n1 + s2^2/n2)^0.5
names(TS) <- "TestStatistic"
TS  TestStatistic
1      2.820441str(TS$TestStatistic) num 2.82nu <- (s1^2/n1 + s2^2/n2)^2 / ((s1^2/n1)^2/(n1-1) + (s2^2/n2)^2/(n2-1))
names(nu) <- "DegreesOfFreedom"
nu  DegreesOfFreedom
1         5.372921curve(dt(x, nu$DegreesOfFreedom), from = -4, to = 4, n = 500, col = "purple", ylab = "")ggplot2ggplot(data.frame(x = c(-4, 4)), aes(x=x)) +
  stat_function(fun = dt, args = list(df = nu$DegreesOfFreedom)) + 
  theme_bw() + 
  geom_vline(xintercept = TS$TestStatistic, color = "purple") + 
  geom_hline(yintercept = 0, color = "black")ts <- TS$TestStatistic
ts[1] 2.820441nu <- nu$DegreesOfFreedom
nu[1] 5.372921pvalue <- pt(ts, nu, lower = FALSE)
pvalue[1] 0.01711664The \(P(t_{\nu} = t_{5.3729213} \ge 2.820441) = 0.0171166.\)
t.test(HeightIn ~ Sex, data = DF, alternative = "less")
    Welch Two Sample t-test
data:  HeightIn by Sex
t = -2.8204, df = 5.3729, p-value = 0.01712
alternative hypothesis: true difference in means is less than 0
95 percent confidence interval:
      -Inf -1.363279
sample estimates:
mean in group Female   mean in group Male 
                66.4                 71.0 N <- 10^4 - 1
PTS <- numeric(N)
for(i in 1:N){
  PTS[i] <- t.test(HeightIn ~ sample(Sex), data = DF)$stat
}
epv <- (sum(PTS >= ts) + 1)/(N + 1)
epv[1] 1e-04ggplot(data = data.frame(x = PTS), aes(x = x)) +
  geom_density(fill = "purple") + 
  theme_bw() + 
  labs(x = expression(bar(x)[Male]-bar(x)[Female]), title = "Randomization Distribution")HeightInM <- DF$HeightIn[DF$Sex =="Male"]
HeightInM[1] 70 70 72 70 73HeightInF <- subset(DF, Sex == "Female", select = HeightIn, drop = TRUE)
HeightInF[1] 63 65 70 64 70B <- 10^4 - 1
BSD <- numeric(B)
for(i in 1:B){
  bs1 <- sample(HeightInM, size = 5, replace = TRUE)
  bs2 <- sample(HeightInF, size = 5, replace = TRUE)
  BSD[i] <- mean(bs1) - mean(bs2)
}
ggplot(data = data.frame(x = BSD), aes(x = x)) +
  geom_density(fill = "lightblue") +
  theme_bw() + 
  labs(x = substitute(paste(bar(x)[1],"*", -bar(x)[2],"*")), title = "Bootstrap Distribution")quantile(BSD, probs = c(0.05, 0.95)) 5% 95% 
2.2 7.0