library(readxl)
data <- read_excel("C:/Users/utente aula-9B/Downloads/dataset_clean.xlsx")
View(data)

# select a sample 
library(dplyr)
n<-20 # sample size n=20
datasample<-data[sample(nrow(data), n, replace=F), ]


# point est of sampling mean - salary 
summary(datasample$salary)
hist(datasample$salary)
samp_mean<- mean(datasample$salary, na.rm = T)

mean(data$salary, na.rm=T) # population mean

# conf interval (90%) - salary 
# small sample 
# variance of the pop not known --> student t distr

# 1- sampling mean 
samp_mean
# 2 - crit. val student t distr (conf + df ) 
alpha<-0.10 
df=n-1
t_crit<-qt(1-alpha/2, df=df)
# 3 - standard error (S/sqrt(n))
sd=sd(datasample$salary, na.rm=T) # sampling standard deviation
se<-sd/sqrt(n)

lower<-samp_mean - t_crit*se
upper<-samp_mean + t_crit*se

c(lower, upper)
# comment on the c.i. 
# with confidence level equal to 90%, the true pop. mean is included in this bound

t.test(datasample$salary, conf.level = 0.90)$conf.int


################################################################################
table(datasample$prepared_CV) 
p<-18/20 # sampling proportion 
sampling_var<-p*(1-p) # Sampling variance (S2)

#st.error (sd/sqrt(n))    

se<-sqrt(sampling_var/n)

# proportion (p), critical value , se
# to obtain the critical value z we use the stat.table Z
alpha<-0.1
z_crit<-qnorm(1-alpha/2)
z_crit


lower<-p - z_crit*se
upper<-p + z_crit*se

c(lower, upper) # conf interval at 90% 

# conf.int with a single command? 
prop.test( x=18, n=20, conf.level=0.90, correct = F)$conf.int