###############################################################################
###################### confidence interval ###################################

# variance in the population known -> sampling mean is a Normal R.V. 

mean<-2.20 #sampling mean
sigma<-0.35 # st. dev in the pop.
n<-11 # dimension of the sample 

alpha<-0.05 # confidence level 

# computation of the z quantile (zeta alpha half)
quant_Z <- qnorm(1 - alpha / 2)
quant_Z         #1.959964

# st. error 
se<- sigma/(sqrt(n))
low_bound<-mean-quant_Z*se
upp_bound<-mean+quant_Z*se

ci<-c(low_bound, upp_bound)
ci

# variance in the population unknown -> sampling mean is a T-student R.V. 

mean<-50 #sampling mean
S<-8 # st. dev in the SAMPLE (S)
n<-25 # dimension of the sample 

alpha<-0.05 # confidence level 
# define n. of gdl of t-student (n-1)
gdl <- n-1
gdl                                                                        

# quantile t-student 
quant_T <- qt(1 - alpha / 2,gdl)
quant_T      #2.0638

se<- S/(sqrt(n))
low_bound<-mean-quant_T*se
upp_bound<-mean+quant_T*se

ci<-c(low_bound, upp_bound)
ci

ci_t(l, 50, 1.6, 25, 0.05) #type: l(lower) u(upper)

# proportion (n is big --> sampling distribution is approximated as a Normal R.V.)
# to be verified that: np > 5 and n(1-p) > 5

n=100
p=25/100 # p is the sampling mean 

n*p #25
n*(1-p) #75 
# the two conditions are verified

s<-p*(1-p) # sampling var 

se<-sqrt(s/n) # standard error for the proportion

alpha<-0.05 # confidence level 

# computation of the z quantile (zeta alpha half)
quant_Z <- qnorm(1 - alpha / 2)
quant_Z         #1.959964


low_bound<-p-quant_Z*se
upp_bound<-p+quant_Z*se

ci<-c(low_bound, upp_bound)
ci


############################ confidence interval with dataset ##############
# load mtcars dataset 
data(mtcars)
str(mtcars)
nrow(mtcars)
# compute c.i. for the variable mpg 95%
summary(mtcars$mpg)
mx<-mean(mtcars$mpg)
S<-sd(mtcars$mpg)
n<-nrow(mtcars)

#variabce in the pop unknown, n small-> use t distrib
alpha <- 0.05
# define n. of gdl of t-student 
gdl_X <- n - 1
gdl_X                                                                          

# quantile
quant_t <- qt(1 - alpha / 2,gdl_X)
quant_t   

low_b <- mx - quant_t * S / sqrt(n)
upp_b <- mx + quant_t *S / sqrt(n)


IC95 <- c(low_b, upp_b)
IC95  


##### t.test compute c.i. with a single command. 
t.test(mtcars$mpg, conf.level = 0.95)$conf.int


################################################################################
# confidence interval with variance known --> use z (normal distrib)! 

# estimates i.c. for the variable "mpg" (dataset mtcars) with sigma (st.dev) known 
# sigma=5 (value in the population known)
# c.i. 95%

install.packages("BSDA")
library("BSDA")

z.test(mtcars$mpg, sigma.x=5, conf.level = 0.95)$conf.int


#### test for proportions with R 
#  variable am 
# A binary variable signaling whether vehicle has automatic (am=0) or manual (am=1) transmission configuration

table(mtcars$am)
# am=1 --> x=13
# n=32 (dim of the dataset)
# p-proportion 


prop.test(x = 13, n = 32,  correct = FALSE)$conf.int


##############################################################################################
# exercise to do 
# open file marketing campaign (recall it as m.c.)
# estimates c.i. at 90% for fruit expenditure for all the customers in the pop. 
# estimates c.i. 90% for the proportion of customers who "complaint" in the pop.