# --- R Demo A: Sampling distribution of the mean ---
set.seed(123)
# 1) Create a synthetic population (e.g., salaries in ???)
N <- 100000
pop_mean <- 1900
pop_sd   <- 200
population <- rnorm(N, mean = pop_mean, sd = pop_sd)


# 2) Draw many samples and compute sample means
B <- 3000      # number of samples
n <- 30        # sample size per sample
samp_means <- replicate(B, mean(sample(population, n)))
samp_means
hist(samp_means)

# 3) Compare empirical sampling distribution to theory
mean_samp_means <- mean(samp_means)
sd_samp_means   <- sd(samp_means)           # empirical SE
se_theory       <- pop_sd / sqrt(n)         # theoretical SE (?? known)

cat(sprintf("Mean of sample means: %.2f (true ?? = %.0f)\n", mean_samp_means, pop_mean))

# 4) Plot

library(ggplot2)

df <- data.frame(samp_means = samp_means)

# plot the sampling distribution of the means with B replication 
hist(samp_means,
     freq = FALSE,                         # density
     breaks = 40,                          # num of bins
     col = "grey80", border = "grey40",    # colori
     main = sprintf("Sampling distribution of the mean (n = %d, B = %d)", n, B),
     xlab = "Sample mean", ylab = "Density")

# normal curve 
curve(dnorm(x, mean = pop_mean, sd = se_theory),
      add = TRUE, lwd = 2, col = "blue")

# Aggiungi un sottotitolo con SE empirico e teorico
mtext(sprintf("Empirical SE ??? %.1f; Theoretical SE = ??/???n = %.1f",
              sd_samp_means, se_theory), side = 3, line = 0.5, cex = 0.8)


#############

# --- R Demo B: One-sample t CI for the mean ---
set.seed(42)
# Pretend this is your sample of expected salaries (n small ??? use t)
n  <- 25
x  <- rnorm(n, mean = 1900, sd = 220)
xbar <- mean(x)
s    <- sd(x) # st. deviation in the sample
alpha <- 0.05

tcrit <- qt(1 - alpha/2, df = n - 1)
se    <- s / sqrt(n)
L <- xbar - tcrit * se
U <- xbar + tcrit * se

round(c(mean = xbar, L = L, U = U), 2)

# Nice plot of the CI on a number line
library(ggplot2)

df_ci <- data.frame(xbar = xbar, L = L, U = U)

p_ci <- ggplot(df_ci) +
  geom_segment(aes(x = L, xend = U, y = 0, yend = 0), linewidth = 3) +
  geom_point(aes(x = xbar, y = 0), size = 3) +
  geom_vline(xintercept = 1900, linetype = 2) +
  labs(title = "95% t-confidence interval for ?? (?? unknown)",
       subtitle = sprintf("x?? = %.1f, s = %.1f, n = %d", xbar, s, n),
       x = "Euros", y = "") +
  theme_minimal(base_size = 13) +
  theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())

print(p_ci)


############################################################################
set.seed(123)

# true parameters of the pop (to be used for ssimulate the data) 
mu_true <- 1900
sigma_true <- 220

# sample and n.of replication
n  <- 25     # dimension of the sample 
B  <- 200    # n. of sample(s)
conf <- 0.95 # confidence level
alpha <- 1 - conf

# Simulate B samples, compute sampling mean x??, s, t-IC
xbar <- s <- L <- U <- rep(NA_real_, B)
for (b in 1:B) {
  x <- rnorm(n, mean = mu_true, sd = sigma_true)
  xbar[b] <- mean(x)
  s[b]    <- sd(x)
  tcrit   <- qt(1 - alpha/2, df = n - 1)
  se      <- s[b] / sqrt(n)
  L[b]    <- xbar[b] - tcrit * se
  U[b]    <- xbar[b] + tcrit * se
}

# Coverage (TRUE if Confidence interval include the true parameter)
cover <- (L <= mu_true) & (U >= mu_true)
cover
coverage_rate <- mean(cover)

# 
ord <- order(xbar) # sort the mean (only for aestethic purposes)
xbar <- xbar[ord]; L <- L[ord]; U <- U[ord]; cover <- cover[ord]

# ---- figure R: "caterpillar plot" of conf. int ----
par(mar = c(4, 8, 3, 2))
plot(x = xbar, y = seq_len(B),
     xlim = range(c(L, U, mu_true)),
     pch = 16, col = ifelse(cover, "forestgreen", "firebrick"),
     yaxt = "n", ylab = "", xlab = "Mean & 95% CI",
     main = sprintf("t-CIs for ?? (n = %d, B = %d) ??? Coverage ??? %.1f%%", n, B, 100*coverage_rate))

# add vertical line on the true mean ( ?? ) 
abline(v = mu_true, lty = 2, lwd = 2)


# segments for each conf. int
segments(x0 = L, y0 = seq_len(B), x1 = U, y1 = seq_len(B),
         col = ifelse(cover, "forestgreen", "firebrick"), lwd = 2)


mtext(sprintf("Green = covers ??; Red = misses ??  |  True ?? = %.0f", mu_true), side = 3, line = 0.5, cex = 0.9)


############################################################################
###################### --- R Demo C: CI for a proportion ---

set.seed(7)
# Suppose we surveyed n people; y said "Yes" (success)
n <- 400
y <- rbinom(1, size = n, prob = 0.32)   # simulate 32% true proportion
phat <- y / n
se   <- sqrt(phat * (1 - phat) / n)
alpha <- 0.05
z <- qnorm(1 - alpha/2)
L <- phat - z * se
U <- phat + z * se

round(c(phat = phat, L = L, U = U), 3)


# --- R Demo D: Normal curve with shaded central area ---
library(ggplot2)

mu <- 0; sigma <- 1
Lz <- -1.96; Uz <- 1.96
x <- seq(-4, 4, length.out = 1000)

df <- data.frame(x = x, d = dnorm(x, mu, sigma))
df$shade <- with(df, x >= Lz & x <= Uz)

p <- ggplot(df, aes(x, d)) +
  geom_line(linewidth = 1) +
  geom_area(data = subset(df, shade), aes(y = d), alpha = 0.3) +
  labs(title = "Standard Normal with Central 95% Shaded",
       x = "z", y = "Density") +
  theme_minimal(base_size = 13)
print(p)