########################################################## ########## Linear regression model ########## ########## Conf. interval and Hypotesis test ########## ########################################################## ############## Confidence interval lm with R ############## library(readxl) food <- read_excel("food.xlsx") lm <- lm(food_exp ~ income, data=food) print(lm) summary(lm) #confidence interval computation by hand b1 <- coef(lm)[[1]] #intercept b2 <- coef(lm)[[2]] #slope se_b1<-coef(summary(lm))[1,"Std. Error"] #s.e. of intercept (b1) se_b2<-coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2) #compute conf.int for b2 and b1 alpha <- 0.05 # confidence level alpha # define n. of gdl of t-student nrow(food) #dimension of the dataset food gdl <- (nrow(food)) - 2 #remove two d.f. (since we estimated two parameters b1 and b2) quant_t <- qt(1 - alpha / 2,gdl) quant_t low_bound=b2-quant_t*(se_b2) upp_bound=b2+quant_t*(se_b2) IC_b2 <- c(low_bound,upp_bound) IC_b2 low_bound=b1-quant_t*(se_b1) upp_bound=b1+quant_t*(se_b1) IC_b1 <- c(low_bound,upp_bound) IC_b1 # in R (for the regression model) we can use the following command: # run confint after the command lm (linear model) confint(lm) #confidence interval ############## Hypotesis test linear model with R ############## library(readxl) food <- read_excel("food.xlsx") lm <- lm(food_exp ~ income, data=food) print(lm) #example1 - upper tail test # The null hypothesis is H0:beta2 = 0 #The alternative hypothesis is H1:beta2 > 0 #alpha=0.05 B2=0 # value included in the null hypotesis (assumed in the population) # B2=0 means that this coefficient is NOT significant in the population b2=coef(lm)[[2]] #slope estimated in the model se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2) n=nrow(food) # nrow # determine residual degrees of freedom (in this case you have n-k-1 or n-2), #k is n.ro of independed variables (x vars) in the simple linear regr. model K=1) df<-lm$df.residual #(n-2 or n-k-1 this second is a general formula) t=(b2-B2)/se_b2 # stat. T t #evidence from the "sample" alpha=0.05 qt=qt(1-alpha, df=n-2) #critical value table t.student qt=qt(1-alpha, df=df) # alternatively use df qt #critical value computed from the t-stud table #t>qt -> reject Ho (upper tail test) # p-value pval = pt(t, df=df, lower.tail=FALSE) pval # is significant the coeff b2? #this test can be used only for the significance of the coeff (when we declare that the null Hyp is = 0) library(lmtest) coeftest(lm) library(car) linearHypothesis(lm, "income=0") #income is the indep. variable, you can adapt this command with the name of the x-var ## example 2 - upper tail test #The null hypothesis is H0:beta2 = 5.5 #The alternative hypothesis is H1:beta2 > 5.5 #alpha=0.01 B2=5.5 b2=coef(lm)[[2]] #slope estimated in the model se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2) t=(b2-B2)/se_b2 # statistic T t alpha=0.01 qt=qt(1-alpha, df=n-2) #critical value table t.student qt # t>qt -> no reject Ho pval = pt(t, df=df, lower.tail=FALSE) pval #pval>alpha - no reject H0 library(car) linearHypothesis(lm, "income=5.5") ## example 3 - lower tail test - left rej area #The null hypothesis is H0:beta2 = 15 #The alternative hypothesis is H1:beta2 < 15 #alpha=0.05 B2=15 b2=coef(lm)[[2]] #slope estimated in the model se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2) t=(b2-B2)/se_b2 # statistic T t alpha=0.05 qt=qt(alpha, df=df) #critical value table t.student qt # - reject Ho since |t| > |qt| # p-value pval = pt(t, df=df) pval ## example 4 - two tails test #The null hypothesis is H0:beta2 = 7.5 #The alternative hypothesis is H1:beta2 ??? 7.5 #alpha=0.05 B2=7.5 b2=coef(lm)[[2]] #slope estimated in the model se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2) t=(b2-B2)/se_b2 # statistic T t alpha=0.05 qt=qt(1-alpha/2, df=df) #critical values table t.student qt # Do not reject Ho pval = 2 * pt(t, df=df, lower.tail = FALSE) # lower tail pval #p-val>alpha-> no reject Ho ### example 5 - two tails test #The null hypothesis is H0:beta = 0 #The alternative hypothesis is H1:beta ??? 0 #alpha=0.05 B2=0 b2=coef(lm)[[2]] #slope estimated in the model se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2) t=(b2-B2)/se_b2 # statistic T t alpha=0.05 qt=qt(1-alpha/2, df=df) #critical values table t.student qt # reject H0 pval = 2 * pt(t, df=df, lower.tail = FALSE) # lower tail pval #p-val reject Ho