library(readxl) data <- read_excel("C:/Users/utente aula-9B/Downloads/dataset_clean.xlsx") View(data) # select a sample library(dplyr) n<-20 # sample size n=20 datasample<-data[sample(nrow(data), n, replace=F), ] # point est of sampling mean - salary summary(datasample$salary) hist(datasample$salary) samp_mean<- mean(datasample$salary, na.rm = T) mean(data$salary, na.rm=T) # population mean # conf interval (90%) - salary # small sample # variance of the pop not known --> student t distr # 1- sampling mean samp_mean # 2 - crit. val student t distr (conf + df ) alpha<-0.10 df=n-1 t_crit<-qt(1-alpha/2, df=df) # 3 - standard error (S/sqrt(n)) sd=sd(datasample$salary, na.rm=T) # sampling standard deviation se<-sd/sqrt(n) lower<-samp_mean - t_crit*se upper<-samp_mean + t_crit*se c(lower, upper) # comment on the c.i. # with confidence level equal to 90%, the true pop. mean is included in this bound t.test(datasample$salary, conf.level = 0.90)$conf.int ################################################################################ table(datasample$prepared_CV) p<-18/20 # sampling proportion sampling_var<-p*(1-p) # Sampling variance (S2) #st.error (sd/sqrt(n)) se<-sqrt(sampling_var/n) # proportion (p), critical value , se # to obtain the critical value z we use the stat.table Z alpha<-0.1 z_crit<-qnorm(1-alpha/2) z_crit lower<-p - z_crit*se upper<-p + z_crit*se c(lower, upper) # conf interval at 90% # conf.int with a single command? prop.test( x=18, n=20, conf.level=0.90, correct = F)$conf.int