## Modello Logistico Categorico ## a.s. 2024/2025 ## code per esercitazione ## Dati ANES 2020 rm(ist=ls()) load("/Users/mariagraziapittau/Library/CloudStorage/Dropbox-Universita'DegliStudiDiRomaLaSapienza/grazia.pittau@uniroma1.it/Lezioni/Statistica_Aziendale/R/Categorico/nes20_recode.Rdata") #attach(dati) names(nes20) require(arm) require(car) ### Distribuzione della variabile latente al variare del valore del predittore lineare ## considero il primo cupoint c=0 par(mfrow=c(3,1)) x1<- seq(-6, 6, length=100) plx1 <- dnorm(x1, -0.7) plot(x1, plx1, type="l", lty=2, xlab="", ylab="Density", main="Predittore lineare =-0.7") abline(v=-0.8, lty=1) ## valore della media abline(v=0, lty=2) ## valore del primo cutpoint plx2 <- dnorm(x1, 1.2) plot(x1, plx2, type="l", lty=2, xlab="", ylab="Density", main="Predittore lineare =1.2") abline(v=1.2, lty=1) ##valore della media abline(v=0, lty=2) ##valore del primo cutpoint plx3 <- dnorm(x1, 2.8) plot(x1, plx3, type="l", lty=2, xlab="", ylab="Density", main="Predittore lineare =2.8") abline(v=2.8, lty=1) ##valore della media abline(v=0, lty=2)##vlore del primo cutpoint ## le relative probabilita' cambiano plogis(0.7) plogis(0) ## Homosexual job discrimination ## V201414x job_discrimation ## R FAVOR/OPPOSE LAWS PROTECT GAYS LESBIANS AGAINST JOB DISCRIMINATION ##gayjobscale Il R si dichiara contrario o favorevole al fatto che la legge deve ##proteggere gli omossessuoli da discriminazioni nel lavoro ## 1. Favor Strongly (fortemente a favore) ## 2. Favor Not Strongly (ragionevolmente a favore ) ## 3. Oppose Not Strongly (ragionevolmente contro) ## 4. Oppose Strongly (fortemente contro) ##POST: HOW MUCH INCREASE/DECREASE IN GOVERMENT HELP PAYING FOR HEALTH CARE ## 1. A great deal ## 2. A moderate amount ## 3. A little nes20$increase_health_care<-recode(nes20$increase_health_care, "-9:-1=NA; else=nes20$increase_health_care") table(nes20$increase_health_care, exclude=NULL) nes20$increase_health_care<-4-nes20$increase_health_care ##male: variabile dummy che risponde alla domanda "di che sesso sei?": ##1. Uomo ##o. Donna ##married: variabile categorica che risponde alla domanda "in questo momento sei sposato?" ##1. Sposato ##0. No table(nes20$marital_status) ## Are you now married, widowed, divorced, separated or never married? ## -9. Refused ## -8. Don’t know ## 1. Married: spouse present ## 2. Married: spouse absent {VOL - video/phone only} ## 3. Widowed ## 4. Divorced ##5. Separated ##6. Never married nes20$married<-recode(nes20$marital_status, "-9:-8=NA; 1:2=1; 3:8=0") table(nes20$married, exclude=NULL) ##employ: variabile categorica che risponde alla domanda: "in questo momento sei occupato?" ##1. Si ##0. No ## Stato occupazionale ## V201534x: R OCCUPATION STATUS 1 DIGIT ##-2. Refused/Don’t know/Inapplicable ## 1. R working now (if also retired, disabled, homemaker or student, working 20 or more hrs/wk) ## 2. R temporarily laid off ## 4. R unemployed ## 5. R retired (if also working, working <20 hrs/wk) ##6. R permanently disabled (if also working, working <20 hrs/wk) 7. R homemaker (if also working, working <20 hrs/wk/incl nonworkg rs both homemaker and student) ## 8. R student (if also working, working <20 hrs/wk) table(nes20$occup_status) nes20$employ<-recode(nes20$occup_status, "1=1; 2:8=0; -2=NA") table(nes20$employ, exclude=NULL) ##reddito: variabile categorica che risponde alla domanda "in che fascia di reddito (espresso in US$ annuali) sei?". ##Le risposte vanno da un minimo di 1 a un massimo di 22. ##la considero come continua ## table(nes20$hh_income, exclude=NULL) z=22 mean.z<-mean(nes20$hh_income, na.rm=TRUE) sd.z<-sd(nes20$hh_income, na.rm=TRUE) z.income<-(z-mean.z)/(2*sd.z) ##partyid table(nes20$party_id, exclude=NULL) ### democratico vs repubblicano R<-2 mean.R<-mean(nes20$party_id, na.rm=TRUE) sd.R<-sd(nes20$party_id, na.rm=TRUE) z.R<-(R-mean.R)/(2*sd.R) ##Valuto la variabile outcome ## da 1 a 4---fortemente favorevole--fortemente contrario table(nes20$job_discrimination) nes20$job_discrimination<-recode(nes20$job_discrimination, "-2= NA; else=nes20$job_discrimination") ## Considero 1 sfavorevole e 4 favorevole nes20$job_discrimination<-5-nes20$job_discrimination ### 1.Stima e sintesi del modello multinomiale logistico ### approssimazione lineare fit.1 <- lm (job_discrimination ~ rescale(hh_income) + male, data=nes20) display(fit.1) ## Al crescere del reddito aumenta la prob di essere totalmente favorevole ## I ricchi sono piu' favorevoli ## Gli uomini meno favorevoli ### approssimazione con il modello logistico nes20$y.binary <- ifelse(nes20$job_discrimination>2, 1, 0) ##1==favorevoli 0==non favorevoli fit.2 <- glm (y.binary ~ rescale(hh_income)+ male, family = binomial(link = "logit"), data=nes20) display(fit.2) ## i segni non cambiano ma gli errori standard sono piu' ampi ## modello categorico y.cat in factor fit.3 <- polr(factor(job_discrimination) ~ rescale(hh_income) + male, method = c("logistic"), data=nes20) display(fit.3) ##stessi segni ma errori standard piu' bassi ##non abbiamo la devianza nulla quindi si confrontano due modelli ## Introduco l'identificazione politica fit.3a <- polr (factor(job_discrimination) ~rescale(hh_income)+ male+ rescale(party_id), method = c("logistic"), data=nes20) display(fit.3a) ## I repubblicani molto contrari rispetto ai democratici ##introduco altri predittori fit.4 <- polr (factor(job_discrimination) ~ rescale(hh_income) + male + employ + rescale(party_id) + rescale(hh_income):rescale(party_id), data=nes20) display(fit.4) ##Devianza 10805 contro 13096 di fit.3 coefs.fit.4 <- as.data.frame(coef(fit.4)) coefs <- rbind(coefs.fit.4[1, ], coefs.fit.4[2, ], c(0, 0), coefs.fit.4[3, ], c(0, 0), coefs.fit.4[4, ], coefs.fit.4[5, ])[,1] se<-summary(fit.4)$coefficients[,2] stnd.err <- rbind(se[1], se[2], c(0, 0), se[3], c(0,0), se[4], se[5])[,1] Labels <- c("Reddito", "Uomini", "Donne", "Occupati", "Disoccupati", "Identificazione politica", "Reddito e Id. politica") op <- par(mar = c(5,5,4,2) + 0.1) par(op) coefplot(coefs, stnd.err, varnames = Labels, mar = c(2, 8, 6, 2), cex.pts = .75, CI = 1, main="Modello categorico") par(op) dev.off() ##Esempi ## Republicano, mascio, disoccupato, reddito=12, partyid=7 ## Reddito Inc=20 mean_Income=mean(nes20$hh_income, na.rm=TRUE) sd_Income=sd(nes20$hh_income, na.rm=TRUE) z_income=(Inc-mean_Income)/(2*sd_Income) ##Party identification Partyid=11 mean_Partyid=mean(nes20$party_id, na.rm=TRUE) sd_Partyid=sd(nes20$party_id, na.rm=TRUE) z_Partyid=(Partyid-mean_Partyid)/(2*sd_Partyid) ## Predittore lineare xibeta<-coef(fit.4)["rescale(hh_income)"]*z_income + coef(fit.4)["male"]*1+ coef(fit.4)["employ"]*0 + coef(fit.4)["rescale(party_id)"]*z_Partyid + coef(fit.4)["rescale(hh_income):rescale(party_id)"]*z_income*z_Partyid ##Religione-- R ATTEND RELIGIOUS SERVICES HOW OFTEN ##Do you go to religious services every week, almost every week, once or twice a month, a few times a year, or never? ##-9. Refused ##-8. Don’t know ##-1. Inapplicable ## 1. Every week ## 2. Almost every week ## 3. Once or twice a month 4. A few times a year ## 5. Never table(nes20$attend_how_often) nes20$weekatt2<-recode(nes20$attend_how_often, "1:2=1; 2:5=0; -9:-1=NA") table(nes20$weekatt2) ##0. Less than weekly 1. weekly atttendance ## 1819 2160 ##Altri controlli e interazione relig--reddito fit.4.1 <- polr (factor(job_discrimination) ~ rescale(hh_income) + male + married+ white + employ + college + rescale(party_id) + weekatt2 + rescale(hh_income): weekatt2, data=nes20) display(fit.4.1) ##Devianza 5789.7 ##Grafico ##ricodifico y.cat trasformandola in tre categorie nes20$y.cat <- ifelse(nes20$job_discrimination==1, 1, ifelse (nes20$job_discrimination==4, 3, 2)) ## 1 fortemente sfavorevole ## 3 fortemente favorevole ## 2 medio--neutrale nes20$y.cat1<-4-nes20$y.cat fit.5 <- polr (factor(y.cat) ~ rescale(hh_income) + male + married+ white + employ + college + rescale(party_id) + weekatt2 + rescale(hh_income): weekatt2, data=nes20) display(fit.5) ##ESEMPIO GRAFICO fit.plot <- polr (factor(y.cat) ~ party_id, data=nes20) display(fit.plot) fit.plot <- polr (factor(y.cat1) ~ hh_income, data=nes20) display(fit.plot) expected.logistic <- function (x, c1.5, c2.5, sigma){ p1.5 <- invlogit ((x-c1.5)/sigma) p2.5 <- invlogit ((x-c2.5)/sigma) return ((1*(1-p1.5) + 2*(p1.5-p2.5) + 3*p2.5)) } expected.gauss <- function (x, c1.5, c2.5, sigma){ p1.5 <- pnorm ((x-c1.5)/sigma) p2.5 <- pnorm ((x-c2.5)/sigma) return ((1*(1-p1.5) + 2*(p1.5-p2.5) + 3*p2.5)) } ##definisco c1.5 e c2.5 c1.5<-fit.plot[[2]][1]/fit.plot[[1]] c2.5<-fit.plot[[2]][2]/fit.plot[[1]] sigma<-1.6/abs(fit.plot[[1]]) plot(nes20$party_id, jitter(nes20$y.cat, 0.14), xlim=c(-25,25), xaxt="n", yaxt="n", xlab="Ideologia politica: democratici versus republicani", ylab="", main="Rappresentazione grafica dei valori osservati, \n dei cut-points e della curva stimata") yi <- c("Sfavorevole" , "Neutrale", "Favorevole" ) ## y.cat1: 3 sfavorevole, 2 medio--neutrale, 1 fortemente favorevole xi <- c("Estrem. Democratico", "Estrem. Repubblicano") axis(1, c(-20, 20), xi, tick = T, col.axis = 'black') axis(2, 1:3, yi, tick = T, col.axis = 'black') lines (rep (c1.5, 2), c(1,2)) lines (rep (c2.5, 2), c(2,3)) curve (expected.logistic (-x, c1.5, c2.5, sigma), lty=1, add=TRUE) curve (expected.gauss (-x, c1.5, c2.5, sigma), lty=2, add=TRUE, col="red") ###Alternativamente plot(nes20$party_id, jitter(nes20$y.cat1, 0.14), xlim=c(-25,25), xaxt="n", yaxt="n", xlab="Ideologia politica: democratici versus republicani", ylab="", main="Rappresentazione grafica dei valori osservati, \n dei cut-points e della curva stimata") yi <- c("Favorevole" , "Neutrale", "Sfavorevole" ) ## y.cat1: 3 sfavorevole, 2 medio--neutrale, 1 fortemente favorevole xi <- c("Estrem. Democratico", "Estrem. Repubblicano") axis(1, c(-20, 20), xi, tick = T, col.axis = 'black') axis(2, 1:3, yi, tick = T, col.axis = 'black') lines (rep (c1.5, 2), c(1,2)) lines (rep (c2.5, 2), c(2,3)) curve (expected.logistic (x, c1.5, c2.5, sigma), lty=1, add=TRUE) curve (expected.gauss (x, c1.5, c2.5, sigma), lty=2, add=TRUE, col="red") fit.6<-polr (factor(increase_health_care) ~ rescale(hh_income) + male + married+ white + employ + generation + rescale(hh_income) :white, data=nes20) display(fit.6) fit.7<-polr (factor(increase_health_care) ~ rescale(hh_income), data=nes20) display(fit.7) fit.8<-polr (factor(increase_health_care) ~ party_id, data=nes20) display(fit.8) ##https://www.quora.com/What-do-cut-points-or-thresholds-mean-when-doing-ordered-probit-or-ordered-logit-analysis-on-Stata-or-R-or-Gretl-etc ##https://stats.oarc.ucla.edu/stata/output/ordered-logistic-regression/#:~:text=Standard%20interpretation%20of%20the%20ordered,the%20model%20are%20held%20constant.