##########################################################
##########      Linear regression model         ##########
##########    Conf. interval and Hypotesis test ##########
##########################################################

############## Confidence interval lm with R ##############

library(readxl)
food <- read_excel("food.xlsx")

lm <- lm(food_exp ~ income, data=food) 
print(lm)
summary(lm)

#confidence interval computation by hand
b1 <- coef(lm)[[1]] #intercept
b2 <- coef(lm)[[2]] #slope
se_b1<-coef(summary(lm))[1,"Std. Error"] #s.e. of intercept (b1)
se_b2<-coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2)

#compute conf.int for b2 and b1 
alpha <- 0.05 # confidence level
alpha      

# define n. of gdl of t-student 
nrow(food) #dimension of the dataset food 
gdl <- (nrow(food)) - 2 #remove two d.f. (since we estimated two parameters b1 and b2)

quant_t <- qt(1 - alpha / 2,gdl)
quant_t      

low_bound=b2-quant_t*(se_b2)
upp_bound=b2+quant_t*(se_b2)

IC_b2 <- c(low_bound,upp_bound)
IC_b2

low_bound=b1-quant_t*(se_b1)
upp_bound=b1+quant_t*(se_b1)

IC_b1 <- c(low_bound,upp_bound)
IC_b1


# in R (for the regression model) we can use the following command: 
# run confint after the command lm (linear model)
confint(lm) #confidence interval 


############## Hypotesis test linear model with R ##############
library(readxl)
food <- read_excel("food.xlsx")

lm <- lm(food_exp ~ income, data=food) 
print(lm)

#example1 - upper tail test
# The null hypothesis is H0:beta2 = 0
#The alternative hypothesis is H1:beta2 > 0
#alpha=0.05

B2=0 # value included in the null hypotesis (assumed in the population)
# B2=0 means that this coefficient is NOT significant in the population


b2=coef(lm)[[2]] #slope estimated in the model 
se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2)
n=nrow(food) # nrow 

# determine residual degrees of freedom (in this case you have n-k-1 or n-2), 
#k is n.ro of independed variables (x vars) in the simple linear regr. model K=1)

df<-lm$df.residual  #(n-2 or n-k-1 this second is a general formula)

t=(b2-B2)/se_b2 # stat. T 
t #evidence from the "sample"
alpha=0.05
qt=qt(1-alpha, df=n-2) #critical value table t.student
qt=qt(1-alpha, df=df) # alternatively use df 
qt #critical value computed from the t-stud table

#t>qt -> reject Ho (upper tail test) 
# p-value
pval = pt(t, df=df, lower.tail=FALSE) 
pval    

# is significant the coeff b2? 
#this test can be used only for the significance of the coeff (when we declare that the null Hyp is = 0)
library(lmtest)
coeftest(lm)

library(car)
linearHypothesis(lm, "income=0") #income is the indep. variable, you can adapt this command with the name of the x-var


## example 2 - upper tail test 
#The null hypothesis is H0:beta2 = 5.5
#The alternative hypothesis is H1:beta2 > 5.5
#alpha=0.01

B2=5.5


b2=coef(lm)[[2]] #slope estimated in the model 
se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2)

t=(b2-B2)/se_b2 # statistic T 
t

alpha=0.01
qt=qt(1-alpha, df=n-2) #critical value table t.student
qt

# t>qt -> no reject Ho 
pval = pt(t, df=df, lower.tail=FALSE) 
pval   #pval>alpha - no reject H0 


library(car)
linearHypothesis(lm, "income=5.5")


## example 3 - lower tail test - left rej area
#The null hypothesis is H0:beta2 = 15
#The alternative hypothesis is H1:beta2 < 15
#alpha=0.05

B2=15

b2=coef(lm)[[2]] #slope estimated in the model 
se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2)


t=(b2-B2)/se_b2 # statistic T 
t

alpha=0.05
qt=qt(alpha, df=df) #critical value table t.student
qt

# - reject Ho since |t| > |qt|
# p-value
pval = pt(t, df=df) 
pval   

## example 4 - two tails test 

#The null hypothesis is H0:beta2 = 7.5
#The alternative hypothesis is H1:beta2 ??? 7.5
#alpha=0.05

B2=7.5

b2=coef(lm)[[2]] #slope estimated in the model 
se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2)

t=(b2-B2)/se_b2 # statistic T 
t

alpha=0.05
qt=qt(1-alpha/2, df=df) #critical values table t.student
qt

# Do not reject Ho

pval = 2 * pt(t, df=df, lower.tail = FALSE)  # lower tail 
pval  #p-val>alpha-> no reject Ho

### example 5 - two tails test
#The null hypothesis is H0:beta = 0
#The alternative hypothesis is H1:beta ??? 0
#alpha=0.05
B2=0

b2=coef(lm)[[2]] #slope estimated in the model 
se_b2= coef(summary(lm))[2,"Std. Error"] #s.e. of slope (b2)

t=(b2-B2)/se_b2 # statistic T 
t

alpha=0.05
qt=qt(1-alpha/2, df=df) #critical values table t.student
qt

# reject H0 

pval = 2 * pt(t, df=df, lower.tail = FALSE)  # lower tail 
pval  #p-val<alpha-> reject Ho