Chapter 8: Generalized Linear Models and Survival Analysis
Packages required: “DAAG”, “lattice”, “MASS”, “survival”
The script that follows is designed to be executed as it stands. It sets up
functions that create Ch 8 graphs, then runs those functions. If executed by
clicking on the RStudio “Compile Notebook …” command, it will be processed
through R Markdown, generating a document that includes code, output and
graphs generated by executing the functions. If some needed packages are
missing, warning messages will appear. See further:
http://rmarkdown.rstudio.com/r_notebook_format.html
g8.1 <-
function(device="")
{
if(device!="")hardcopy(width=2.15, height=2.15, device=device)
titl <- paste("The logit or log(odds) transformation.",
"\nShown here is a plot of log(odds) versus proportion.",
"\nNotice how the range is stretched out at both ends.")
oldpar <- par(mfrow = c(1, 1), mgp = c(2.25, 0.5, 0),
mar = c(4.1,3.6,0.6,2.6), pty="s",
cex.axis=0.85, tcl=-0.25)
on.exit(par(oldpar))
p <- (1:999)/1000
gitp <- log(p/(1 - p))
plot(p, gitp, xlab = "", ylab = "", type = "l", pch = 1,
cex.lab=0.85, las=1)
mtext(side = 1, line = 2.15, "Proportion", cex=0.85)
mtext(side = 2, line = 2.15, "logit(Proportion) = log(Odds)", cex=0.85)
box()
pval <- c(0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999)
par(mgp = c(2.5, 0.5, 0))
axis(4, adj=0.075, at = log(pval/(1 - pval)), las=1,
labels = paste(pval))
cat(titl, "\n")
if(device!="")dev.off()
invisible()
}
g8.2 <-
function(device="")
{
if(device!="")hardcopy(width=2.15, height=2.15,
device=device)
titl <- paste("Plot, versus concentration, of proportion of patients",
"\nnot moving. The horizontal line is the estimate of",
"\nthe proportion of moves one would expect if the",
"\nconcentration had no effect.", sep = "")
oldpar <- par(mfrow = c(1, 1), mgp = c(2.25, 0.5, 0),
mar = c(4.1,4.1,1.1,2.1), pty="s", cex.axis=0.85, tcl=-0.25)
on.exit(par(oldpar))
if(!require("DAAG"))return("DAAG package must be installed")
z <- table(anesthetic$nomove, anesthetic$conc)
tot <- apply(z, 2, sum)
prop <- z[2, ]/(tot)
oprop <- sum(z[2, ])/sum(tot)
conc <- as.numeric(dimnames(z)[[2]])
print(prop)
print(conc)
plot(conc, prop, xlab = "Concentration", ylab = "Proportion",
xlim=c(0.5, 2.5), ylim = c(0, 1), pch = 16, axes=F)
axis(1, cex=0.9)
axis(2, at=c(0, 0.5, 1.0), cex=0.9)
axis(2, at=c(0.25, 0.75), cex=0.9)
box()
chh <- par()$cxy[2]
chw <- par()$cxy[1]
text(conc - 0.3 * chw, prop-sign(prop-0.5)*chh/4, paste(tot),
adj = 1, cex=0.65)
abline(h = oprop, lty = 2)
cat(titl, "\n")
if(device!="")dev.off()
invisible()
}
g8.3 <-
function(device="")
{
if(device!="")hardcopy(width=2.15, height=2.15, device=device)
oldpar <- par(mfrow = c(1, 1), mgp = c(2.25, 0.5, 0),
mar = c(4.1,4.1,1.1,2.1), pty="s", cex.axis=0.85, tcl=-0.25)
on.exit(par(oldpar))
tab <- table(anesthetic$nomove, anesthetic$conc)
tot <- colSums(tab) # totals at each concentration
unique.conc <- as.numeric(colnames(tab))
emplogit <- log((tab[2,]+0.5)/(tot-tab[2,]+0.5))
plot(unique.conc, log((tab[2,] +0.5)/(tab[1,]+0.5)),
xlab = "Concentration", xlim = c(0, 2.75), xaxs="i",
ylab = "Empirical logit", ylim=c(-2, 2.4), cex=1.5, pch = 16)
prop <- tab[2, ]/tot
text(unique.conc, emplogit, paste(round(prop,2)),
pos=c(2,4,2,4,4,4), cex=0.8)
abline(-6.47, 5.57)
if(device!="")dev.off()
invisible()
}
g8.4 <-
function(device=""){
if(device!="")hardcopy(width=4.2, height=2.4, device=device)
oldpar<-par(mar=c(4.6, 4.1, 1.6, 1.1), mgp=c(2.75, .75, 0),
mfrow=c(1,1), cex.axis=0.85, tcl=-0.4)
on.exit(par(oldpar))
if(!require("DAAG"))return("DAAG package must be installed")
plot(northing ~ easting, data=frogs, pch=c(1,16)[frogs$pres.abs+1],
xlab="Meters east of reference point", ylab="Meters north")
if(device!="")dev.off()
invisible()
}
g8.5 <-
function(device=""){
if(device!="")hardcopy(width=4.8, height=4.8, device=device)
pairs(frogs[,c(5:10,4)], oma=c(2,2,2,2), col="grey50", cex=0.75)
if(device!="")dev.off()
invisible()
}
g8.6 <-
function(device=""){
if(device!="")hardcopy(width=3.5, height=3.25, device=device)
oldpar<-par(mfrow=c(3,3),mar=c(4.6,4.1,1.1,1.1), mgp=c(2.25,.5, 0),
bty="L", tcl=-0.4)
on.exit(par(oldpar))
frognam<-names(frogs)
for(nam in c("distance","NoOfPools","NoOfSites")){
y<-frogs[,nam]
if(nam!="distance") ylab <- "" else ylab <- "Distance"
plot(density(y),main="",ylab=ylab, xlab=nam)
}
for(nam in c("distance","NoOfPools","NoOfSites")){
y<-frogs[,nam]
if(nam!="distance") ylab <- "" else ylab <- "Distance"
plot(density(sqrt(y)),main="",xlab=paste("sqrt(",nam,")",sep=""),
ylab=ylab)
}
for(nam in c("distance","NoOfPools","NoOfSites")){
y<-frogs[,nam]
if(nam!="distance") ylab <- "" else ylab <- "Distance"
if (nam!="NoOfSites")
plot(density(log(y)),main="",
xlab=paste("log(",nam,")",sep=""), ylab=ylab)
else
plot(density(log(y+1)),main="",
xlab=paste("log(",nam,"+1)",sep=""),ylab="")
}
if(device!="")dev.off()
invisible()
}
g8.7 <-
function(device=""){
if(device!="")hardcopy(width=4.8, height=4.8, device=device)
with(frogs,
pairs(cbind(log(distance), log(NoOfPools), NoOfSites, avrain,
altitude, meanmax+meanmin, meanmax-meanmin), col="gray",
labels=c("log(distance)", "log(NoOfPools)", "NoOfSites",
"Av. rainfall", "altitude", "meanmax+meanmin",
"meanmax-meanmin"),
panel=panel.smooth, oma=rep(1.75,4), cex=0.9))
if(device!="")dev.off()
invisible()
}
g8.8 <-
function(device=""){
if(device!="")hardcopy(width=5.2, height=1.3, device=device)
oldpar <- par(mar=c(3.1,4.1,1.1,1.1),mfrow=c(1,4),
mgp=c(2, 0.5, 0), pty="s")
on.exit(par(oldpar))
frogs$maxminSum <- with(frogs, meanmax+meanmin)
frogs$maxminDiff <- with(frogs, meanmax-meanmin)
frogs.glm <- glm(pres.abs ~ log(distance) + log(NoOfPools) +
maxminSum + maxminDiff,
family = binomial, data = frogs)
termplot(frogs.glm,data=frogs)
if(device!="")dev.off()
invisible()
}
g8.9 <-
function(device=""){
if(device!="")hardcopy(width=2, height=2, device=device)
oldpar <- par(mar=c(3.6,3.6,1.1,1.1), mgp=c(2.25,0.5,0), pty="s",
cex.axis=0.85, tcl=-0.25)
on.exit(par(oldpar))
if(!require("DAAG"))return("DAAG package must be installed")
plot(count~endtime, data=ACF1, pch=16, log="x")
if(device!="")dev.off()
invisible()
}
g8.10 <-
function(lmobj = NULL, device="")
{
if(device!="")hardcopy(width=3.5, height=2.25, device=device, trellis=TRUE,
color=F, pointsize=c(9,6))
if(!require("DAAG"))return("'DAAG' package must be installed")
if(!require("lattice"))return("'lattice' package must be installed")
mothdot <-
dotplot(habitat ~ A, data=moths, xlab="Number of moths (species A)",
panel=function(x, y, ...){
panel.dotplot(x,y, pch=1, col="black", ...)
av <- sapply(split(x,y),mean);
ypos <- unique(y)
lpoints(ypos~av, pch=3, col="black", cex=1.25)
},
key=list(text=list(c("Individual transects", "Mean")),
points=list(pch=c(1,3), cex=c(1,1.25), col=c("black","gray45")),
columns=2), scales=list(tck=0.5))
print(mothdot)
if(device!="")dev.off()
invisible()
}
g8.11 <-
function(lmobj = NULL, device="")
{
if(device!="")hardcopy(width=5.2, height=1.4, device=device)
oldpar <- par(mfrow=c(1,4), mar=c(3.1,4.1,2.1,0.6),
mgp=c(2.0,.5,0),oma=c(0,0,0,1), pty="s")
on.exit(par(oldpar))
if(!require("DAAG"))return("'DAAG' package must be installed")
lmobj <- glm(A ~ habitat + log(meters), family = quasipoisson,
data = moths, subset=habitat!="Bank")
plot(lmobj, caption=c("Resids vs Fitted",
"Normal Q-Q", "Scale-Location", "",
"Resids vs Leverage"), which=c(1:3,5), panel=panel.smooth)
if(device!="")dev.off()
invisible()
}
g8.12 <-
function(device=""){
if(device!="")hardcopy(width=2, height=2, device=device)
oldpar <- par(mar=c(4.1,4.1,1.1,1.1), mgp=c(2.25,0.4,0), pty="s", tcl=-0.25)
on.exit(par(oldpar))
p <- (1:99)/100
x <- log(p/(1-p))
u <- glm(p ~ x, family=binomial, weights=rep(100,99))
phat <- predict(u, type="response")
plot(x, hatvalues(u), type="l", ylab="Leverage", xaxt="n", yaxt="n",
ylim=c(0, 0.038), yaxs="i", xlab="Fitted proportion")
pos=c(.01, .05, .2, .5, .8, .95,.99)
axis(1, at=log(pos/(1-pos))[c(1,3,5,7)], labels=paste(pos)[c(1,3,5,7)],
cex.axis=0.7)
axis(3, at=log(pos/(1-pos))[c(2,4,6)], labels=paste(pos)[c(2,4,6)],
cex.axis=0.7)
axis(2, at=c(0,.01,.02,.03), cex.axis=.7)
x <- qnorm(p)
u <- glm(p ~ x, family=binomial(link=probit), weights=rep(100,99))
phat <- predict(u, type="response")
lines(log(phat/(1-phat)), hatvalues(u), type="l", lty=2)
x <- log(-log(1-p))
u <- glm(p ~ x, family=binomial(link=cloglog), weights=rep(100,99))
phat <- predict(u, type="response")
lines(log(phat/(1-phat)), hatvalues(u), type="l", col="gray")
xy <- par()$usr[c(2,3)]
chw <- par()$cxy[1]
chh <- par()$cxy[2]
legend(xy[1], xy[2]-0.25*chh, xjust=1, yjust=0, lty=c(1,2,1),
legend=c("logit link", "probit link", "cloglog link"),
col=c("black","black","gray"), bty="n", cex=0.8)
if(device!="")dev.off()
invisible()
}
g8.13 <-
function (device="")
{
if(device!="")hardcopy(width=4.5, height=3.5, pointsize=8, device=device)
df <- data.frame(x0 = c(1, 5, 1, 2, 14, 10, 12, 19)*30,
x1 = c(46, 58, 85, 67, 17, 85, 18, 42)*30,
fail = c(1, 0, 0, 1, 1, 0, 0, 1))
oldpar <- par(mar = par()$mar + c(0, 0, 4, 0))
on.exit(par(oldpar))
plot(c(0, 2610), c(0.85, 8.15), type = "n", xlab = "",
ylab = "Subject number", axes = F)
mtext(side = 1, line = 3.75, "Days from beginning of study", adj = 0.5)
m <- dim(df)[1]
par(las=2)
axis(2, at = (1:m), labels = paste((m:1)), lty=0)
par(las=1)
abline(v = 600, lty = 4)
abline(v = 2550)
mtext(side = 3, line = 0.5, at = c(600, 2550),
text = c("\nEnd of recruitment",
"\nEnd of study"), cex = 0.9)
lines(rep((0:8) * 300, rep(3, 9)), rep(c(0, 0.2, NA), 9),
xpd = T)
mtext(side = 1, line = 1.85, at = (0:8) * 300,
text = paste((0:8) * 300), adj = 0.5)
chw <- par()$cxy[1]
xx <- as.vector(t(cbind(df[, 1], df[, 2] - 0.25 * chw,
rep(NA, m))))
yy <- as.vector(t(cbind(matrix(rep(m:1, 2), ncol = 2),
rep(NA, m))))
lines(as.numeric(xx), as.numeric(yy))
points(df[, 1], m:1, pch = 16)
text(df[, 1]-0.25*chw, m:1, paste(df[,1]), pos=1, cex=0.75)
fail <- as.logical(df$fail)
points(df[fail, 2], (m:1)[fail], pch = 15)
points(df[!fail, 2], (m:1)[!fail], pch = 0)
text(df[, 2]+0.25*chw, m:1, paste(df[,2]), pos=1, cex=0.75)
par(xpd=TRUE)
legend(0, 11.5, pch = 16, legend = "Entry")
legend(1230, 11.5, pch = c(15, 0),
legend = c("Dead", "Censored"), ncol=2)
par(xpd=FALSE)
invisible()
if(device!="")dev.off()
}
g8.14 <-
function(device=""){
if(device!="")hardcopy(width=2.25, height=2.25, pointsize=8,
device=device)
if(!require("survival"))return("'survival' package must be installed")
if(!require("MASS"))return("'MASS' package must be installed")
oldpar <- par(mfrow = c(1, 1), mgp = c(2.5, 0.75, 0),
mar = c(4.1,4.1,2.6,1.1), pty="s")
on.exit(par(oldpar))
bloodAids <- subset(Aids2, T.categ=="blood")
bloodAids$days <- bloodAids$death-bloodAids$diag
bloodAids$dead <- as.integer(bloodAids$status=="D")
plot(survfit(Surv(days, dead) ~ sex, data=bloodAids),
col=c("black","gray"), conf.int=TRUE, lty=3,
xlab="Days from diagnosis", ylab="Survival probability")
par(new=TRUE)
plot(survfit(Surv(days, dead) ~ sex, data=bloodAids),
col=c("black","gray45"), conf.int=FALSE, lty=1, lwd=1.5, tcl=-0.35)
par(xpd=TRUE)
legend("top", legend=levels(bloodAids$sex), lty=c(1,1),
col=c("black", "gray60"), horiz=TRUE, bty="n")
if(device!="")dev.off()
}
g8.15 <-
function(device=""){
if(device!="")hardcopy(fignum, width=2.5, height=2.5, pointsize=8,
device=device)
if(!require("survival"))return("'survival' package must be installed")
if(!require("MASS"))return("'MASS' package must be installed")
oldpar <- par(mfrow = c(1, 1), mgp = c(2.75, 0.75, 0),
mar = c(4.1,4.1,1.1,1.1), pty="s")
on.exit(par(oldpar))
hsaids <- subset(Aids2, sex=="M" & T.categ=="hs")
hsaids$days <- hsaids$death-hsaids$diag
hsaids$dead <- as.integer(hsaids$status=="D")
hsaids.surv <- survfit(Surv(days, dead) ~ 1, data=hsaids)
plot(hsaids.surv, col="gray", conf.int=F, tcl=-0.4)
par(new=TRUE)
plot(hsaids.surv,col=1, conf.int=F,mark.time=F,
xlab="Days from diagnosis", ylab="Estimated survival probabality")
chw <- par()$cxy[1]
chh <- par()$cxy[2]
surv <- hsaids.surv$surv
xval <- c(200,700,1400,1900)
hat <- approx(hsaids.surv$time, surv, xout=xval)$y
for(i in 1:2) arrows(xval[i], hat[i], 0, hat[i],
length=0.05, col="gray")
lines(rep(xval[1],2), hat[1:2], col="gray")
## lines(rep(xval[3],2), hat[3:4], col="gray")
## Offset triangle 1
chw <- par()$cxy[1]
lines(xval[c(1,2,1,1)]+650, hat[c(2,2,1,2)]+0.2,col="gray40")
xy1 <- c(mean(xval[c(1,1,2)]), mean(hat[c(1,2,2)]))
arrows(xy1[1], xy1[2], xy1[1]+650, xy1[2]+0.2, col="gray40", length=0.1)
text(xval[1]-0.1*chw+650, hat[1]+0.2,
paste(round(hat[1],2)), col="gray20",cex=0.75, adj=1)
text(xval[1]+650-0.1*chw, hat[2]+0.2,
paste(round(hat[2],2)), col="gray20",cex=0.75, adj=1)
text(mean(xval[1:2])+650, hat[2]+0.2-0.5*chh,
paste(round(diff(xval[1:2]))), col="gray20", cex=0.75)
text(xval[1]+650-0.5*chw, mean(hat[1:2]+0.2), paste(round(hat[1]-hat[2],3)),
srt=90, adj=0.5, col="gray20", cex=0.75)
## Offset triangle 2
## lines(xval[c(3,4,3,3)]+500, hat[c(4,4,3,4)]+0.15,col="gray40")
## xy2 <- c(mean(xval[c(3,3,4)]), mean(hat[c(3,4,4)]))
## arrows(xy2[1], xy2[2], xy2[1]+500, xy2[2]+0.15, col="gray40")
## text(xval[3]+500-0.5*chw, mean(hat[3:4]+0.15),
## paste(round(hat[3]-hat[4],3)),
## srt=90, adj=0.5, col="gray20", cex=0.75)
## text(mean(xval[3:4])+500, hat[4]+0.15-0.5*chh,
## paste(round(diff(xval[3:4]))), col="gray20", cex=0.75)
print(round(hat,3))
if(device!="")dev.off()
invisible()
}
g8.16 <-
function(device=""){
if(device!="")hardcopy(width=3, height=2.25,
pointsize=8, device=device)
oldpar <- par(mfrow = c(1, 1), mgp = c(2.5, 0.75, 0),
mar = c(4.1,4.1,1.1,1.1), pty="s")
on.exit(par(oldpar))
bloodAids<-subset(Aids2,T.categ=="blood")
bloodAids$days<-bloodAids$death-bloodAids$diag
bloodAids$dead<-as.integer(bloodAids$status=="D")
bloodAids.coxph <- coxph(Surv(days, dead) ~ sex, data = bloodAids)
plot(cox.zph(bloodAids.coxph), pch=0.9, tcl=-0.4)
if(device!="")dev.off()
}
pkgs <- c("DAAG", "lattice", "MASS", "survival")
z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)
if(any(!z)){
notAvail <- paste(names(z)[!z], collapse=", ")
warning(paste("The following packages should be installed:", notAvail))
}
g8.1()
## The logit or log(odds) transformation.
## Shown here is a plot of log(odds) versus proportion.
## Notice how the range is stretched out at both ends.
g8.2()
## 0.8 1 1.2 1.4 1.6 2.5
## 0.1429 0.2000 0.6667 0.6667 1.0000 1.0000
## [1] 0.8 1.0 1.2 1.4 1.6 2.5
## Plot, versus concentration, of proportion of patients
## not moving. The horizontal line is the estimate of
## the proportion of moves one would expect if the
## concentration had no effect.
g8.3()
g8.4()
g8.5()
g8.6()
g8.7()
g8.8()
g8.9()
g8.10()
g8.11()
g8.12()
g8.13()
g8.14()
g8.15()
## [1] 0.752 0.326 0.115 0.087
g8.16()