# the library() function loads the modules that we need
library(gmodels)
# We'll use Children of Immigrants Longitudinal Study (CILS).
CILS<-read.csv("http://www.courseserve.info/files/CILS2010.csv")
attach(CILS)
summary(CILS)
# In logistic regression our DV is binary, coded (0,1).
# Let's look at an example from the CILS data. First, we
# look at an incomplete model, and then a complete model
# we do this because the significance test is a Chi-squared
# test on the change in deviance (goodness of fit).
summary(glm(V448H~V139+V134+V132+V206, family=binomial(), data=CILS, na.action(na.exclude)))
summary(glm(V448H~V139+V134+V132+V148+V206, family=binomial(), data=CILS, na.action(na.exclude)))
# The difference between the models is the addition of V148.
# Let's translate the coefficients back into odds.
exp(coef(glm(V448H~V139+V134+V132+V148+V206, family=binomial(), data=CILS, na.action(na.exclude))))
# To test the model for significance, we use the anova() function with Chi-squared
# We're comparing the added explanatory power of the predictors compared to
# an incomplete model.
anova(glm(V448H~V139+V134+V132+V148+V206, family=binomial(), data=CILS, na.action(na.exclude)),glm(V448H~V139+V134+V132+V206, family=binomial(), data=CILS, na.action(na.exclude)),test="Chisq")
# For the sake of parsimony, we could omit a variable if it doesn't improve the fit
# unless we think it ought to be included as a control.