########################## Students dataset - exercise solutions ################### # - set your working directory (w.d.) and open students.xlxs dataset setwd( ... ) # set your w.d. library(readxl) stud <- read_excel("students.xlsx") str(stud) # 1- n.row and n. coloumns dim(stud) # 2 - How many students study at least 5 weekley hours? # legend (1=- <2 hours, 2 = 2 to 5 hours, 3 = 5 to 10 hours, 4 = >10 hours) # at least 5 hours --> 5 or more table <- table(stud$studytime) # students who study at least 5 weekley hours are the sum of the freq. included in modalities 3 and 4. # 3 - mode of the reasons for chosing this school table(stud$reason) # the main reason is "course" this is the mode # 4. 4. Plot the rel.freq. distribution of free time after school and provide a comment on this distribution nj<-table(stud$freetime) fj<-prop.table(nj) fj barplot(fj*100, main=" Bar Plot of free time", xlab="free time after school", ylab="% Frequency", col="lightblue") # 38% of student have a score equal to 3 ((label from 1 to 5, 3 could be an intermediate free time) 3 is the mode. # 10% have very high free time (modality equal to 5), while only 6.9% have very low free time (modality equal to 1) # 5. Plot weekend alcohol consumption weekend alcohol consumption (numeric: from 1 - very low to 5 - very high) table<-table(stud$Walc) freq<-prop.table(table) freq # labels weekend alcohol consumption (numeric: from 1 - very low to 5 - very high) # 38% of distrib very low alchool cons. # barplot barplot(freq, main=" Bar Plot", xlab="weekend alchool consumption", ylab="Frequency", col="blue") # col add colour to the bars #5. Compute summary statistics of the number of school absences. summary(stud$absences) hist(stud$absences) # asimmetric distribution (positive skewness) # 6. Compute a box plot of the variable absences. Can you detect the presence of any outliers? # if yes, Identify the n. of outliers and compute a new boxplot without outliers. boxplot(stud$absences) # yes!! there are outliers! #outliers identification outlier <- boxplot(stud$absences)$out outlier.number <- length(boxplot(stud$absences)$out) outlier.number # 21 outlier boxplot(stud$absences, outline=FALSE) #7. Compute a box plot of the variable absences by alcohol consumption. Provide a comment of this graphical representation boxplot(stud$absences ~ stud$Dalc, outline=FALSE, xlab="Alcool consumpt", ylab=" absences distrib") # comment?? # 8. variability indicators of variable absence var(stud$absences) sd<-sd(stud$absences) mean<-mean(stud$absences, rm.na=TRUE) cv<-sd/mean # 9. Compute the mean and sdeviation of day absences by workday alcohol consumption mean<-tapply(stud$absences, stud$Dalc, mean) sd<-tapply(stud$absences, stud$Dalc, sd) cv<-mean/sd cv