# EX_ANOVA_R 09/02/2021 data <- PlantGrowth head(data) summary(data) # Show the levels levels(data$group) data$group <- ordered(data$group, levels = c("ctrl", "trt1", "trt2")) #1- compute mean of weight by group tapply(data$weight, data$group, mean) #2- represent graphically variable weight by group with a box plot boxplot(weight~group,data=data, main="weight by group", xlab="group", ylab="weight", col = c("#00AFBB", "#E7B800", "#FC4E07")) #descriptive statistics library(dplyr) group_by(data, group) %>% summarise( count = n(), mean = mean(weight, na.rm = TRUE), sd = sd(weight, na.rm = TRUE) ) tapply(data$weight, data$group, mean) #3- compute ANOVA res.aov <- aov(weight ~ group, data = data) # Summary of the analysis summary(res.aov) # results interpretation: As the p-value is less than the significance level 0.05, #we can conclude that there are significant differences between the groups highlighted with ???*" in the model summary. # homosjedasticity assumption par(mfrow=c(2,2)) # set 4 quadtrants in the R-plot window plot(res.aov) # plot residuals of the one way anova model par(mfrow=c(1,1)) # return 1 quadrant tukey.one.way<-TukeyHSD(res.aov) tukey.one.way #Graphical representation tukey.plot.test<-TukeyHSD(res.aov) plot(tukey.plot.test, las = 1) ######## focus on normality assumption ####### # Check the normality assumption #Normality plot of residuals. In the plot below, the quantiles of the residuals are plotted against the quantiles of the normal distribution. #A 45-degree reference line is also plotted. #The normal probability plot of residuals is used to check the assumption that the residuals are normally distributed. #It should approximately follow a straight line. # check Normality of weight data d <- density(data$weight) # returns the density data plot(d) # plots the results shapiro.test(data$weight) # plot residuals of ANOVA plot(res.aov, 2) #As all the points fall approximately along this reference line, we can assume normality. # Extract the residuals aov_residuals <- residuals(object = res.aov ) # Run Shapiro-Wilk test shapiro.test(x = aov_residuals ) #The conclusion above, is supported by the Shapiro-Wilk test on the ANOVA residuals (W = 0.96, p = 0.6) which finds no indication that normality is violated. # exercise 2 library(datasets) data(iris) str(iris) #let s have a look to the dataset: str(iris) summary(iris) table(iris$Species) levels(iris$Species) summary(iris$Petal.Length) tapply(iris$Petal.Length, iris$Species, mean) # Compute the analysis of variance res.aov <- aov(Petal.Length ~ Species, data = iris) # Summary of the analysis summary(res.aov) nrow(iris) ## Further exercise to do......... # load titanic data... # try to compute ANOVA for fare variable. #IT is possible to assume at level alpha=0.05 that the mean of the fare differ in relation to the level of "class" variable?