Chapter 8: Generalized Linear Models and Survival Analysis
Packages required: “DAAG”, “lattice”, “MASS”, “survival”

The script that follows is designed to be executed as it stands. It sets up
functions that create Ch 8 graphs, then runs those functions. If executed by
clicking on the RStudio “Compile Notebook …” command, it will be processed
through R Markdown, generating a document that includes code, output and
graphs generated by executing the functions. If some needed packages are
missing, warning messages will appear. See further:
http://rmarkdown.rstudio.com/r_notebook_format.html

g8.1 <-
function(device="")
{
  if(device!="")hardcopy(width=2.15, height=2.15, device=device)
  titl <- paste("The logit or log(odds) transformation.",
                "\nShown here is a plot of log(odds) versus proportion.",
                "\nNotice how the range is stretched out at both ends.")
  oldpar <- par(mfrow = c(1, 1), mgp = c(2.25, 0.5, 0), 
                mar = c(4.1,3.6,0.6,2.6), pty="s",
                cex.axis=0.85, tcl=-0.25)
  on.exit(par(oldpar))
  p <- (1:999)/1000
  gitp <- log(p/(1 - p))
  plot(p, gitp, xlab = "", ylab = "", type = "l", pch = 1,
       cex.lab=0.85, las=1)
  mtext(side = 1, line = 2.15, "Proportion", cex=0.85)
  mtext(side = 2, line = 2.15, "logit(Proportion) = log(Odds)", cex=0.85)
  box()
  pval <- c(0.001, 0.01, 0.1, 0.5, 0.9, 0.99, 0.999)
  par(mgp = c(2.5, 0.5, 0))
  axis(4, adj=0.075, at = log(pval/(1 - pval)), las=1,
       labels = paste(pval))
  cat(titl, "\n")
  if(device!="")dev.off()
  invisible()
}

g8.2 <-
function(device="")
{
  if(device!="")hardcopy(width=2.15, height=2.15,
                         device=device)
  titl <- paste("Plot, versus concentration, of proportion of patients",
                "\nnot moving. The horizontal line is the estimate of",
                "\nthe proportion of moves one would expect if the",
                "\nconcentration had no effect.", sep = "")
  oldpar <- par(mfrow = c(1, 1), mgp = c(2.25, 0.5, 0), 
                mar = c(4.1,4.1,1.1,2.1), pty="s", cex.axis=0.85, tcl=-0.25)
  on.exit(par(oldpar))
  if(!require("DAAG"))return("DAAG package must be installed")  
  z <- table(anesthetic$nomove, anesthetic$conc)
  tot <- apply(z, 2, sum)
  prop <- z[2,  ]/(tot)
  oprop <- sum(z[2,  ])/sum(tot)
  conc <- as.numeric(dimnames(z)[[2]])
  print(prop)
  print(conc)
  plot(conc, prop, xlab = "Concentration", ylab = "Proportion",
       xlim=c(0.5, 2.5), ylim = c(0, 1), pch = 16, axes=F)
  axis(1, cex=0.9)
  axis(2, at=c(0, 0.5, 1.0), cex=0.9)
  axis(2, at=c(0.25, 0.75), cex=0.9)
  box()
  chh <- par()$cxy[2]
  chw <- par()$cxy[1]
  text(conc - 0.3 * chw, prop-sign(prop-0.5)*chh/4, paste(tot),
       adj = 1, cex=0.65)
  abline(h = oprop, lty = 2)
  cat(titl, "\n")
  if(device!="")dev.off()
  invisible()
}

g8.3 <-
function(device="")
{
  if(device!="")hardcopy(width=2.15, height=2.15, device=device)
  oldpar <- par(mfrow = c(1, 1), mgp = c(2.25, 0.5, 0), 
                mar = c(4.1,4.1,1.1,2.1), pty="s", cex.axis=0.85, tcl=-0.25)
  on.exit(par(oldpar))
  tab <- table(anesthetic$nomove, anesthetic$conc)
  tot <- colSums(tab)               # totals at each concentration
  unique.conc <- as.numeric(colnames(tab))
  emplogit <- log((tab[2,]+0.5)/(tot-tab[2,]+0.5))
  plot(unique.conc, log((tab[2,] +0.5)/(tab[1,]+0.5)),
       xlab = "Concentration", xlim = c(0, 2.75), xaxs="i",
       ylab = "Empirical logit", ylim=c(-2, 2.4), cex=1.5, pch = 16)
  prop <- tab[2,  ]/tot
  text(unique.conc, emplogit, paste(round(prop,2)),
       pos=c(2,4,2,4,4,4), cex=0.8) 
  abline(-6.47, 5.57)
  if(device!="")dev.off()
  invisible()
}

g8.4 <-
function(device=""){
    if(device!="")hardcopy(width=4.2, height=2.4, device=device)
    oldpar<-par(mar=c(4.6, 4.1, 1.6, 1.1), mgp=c(2.75, .75, 0),
                mfrow=c(1,1), cex.axis=0.85, tcl=-0.4)
    on.exit(par(oldpar))
    if(!require("DAAG"))return("DAAG package must be installed")  
    plot(northing ~ easting, data=frogs, pch=c(1,16)[frogs$pres.abs+1],
         xlab="Meters east of reference point", ylab="Meters north")
    if(device!="")dev.off()
    invisible()
  }

g8.5 <-
function(device=""){
    if(device!="")hardcopy(width=4.8, height=4.8, device=device)
    pairs(frogs[,c(5:10,4)], oma=c(2,2,2,2), col="grey50", cex=0.75)
    if(device!="")dev.off()
    invisible()
  }

g8.6 <-
function(device=""){
    if(device!="")hardcopy(width=3.5, height=3.25, device=device)
    oldpar<-par(mfrow=c(3,3),mar=c(4.6,4.1,1.1,1.1), mgp=c(2.25,.5, 0),
                bty="L", tcl=-0.4)
    on.exit(par(oldpar))
    frognam<-names(frogs)
    for(nam in c("distance","NoOfPools","NoOfSites")){
      y<-frogs[,nam]
      if(nam!="distance") ylab <- "" else ylab <- "Distance"
      plot(density(y),main="",ylab=ylab, xlab=nam)
    }
    for(nam in c("distance","NoOfPools","NoOfSites")){
      y<-frogs[,nam]
      if(nam!="distance") ylab <- "" else ylab <- "Distance"
      plot(density(sqrt(y)),main="",xlab=paste("sqrt(",nam,")",sep=""),
           ylab=ylab)
    }
    for(nam in c("distance","NoOfPools","NoOfSites")){
      y<-frogs[,nam]
      if(nam!="distance") ylab <- "" else ylab <- "Distance"
      if (nam!="NoOfSites")
        plot(density(log(y)),main="",
             xlab=paste("log(",nam,")",sep=""), ylab=ylab)
      else
        plot(density(log(y+1)),main="",
             xlab=paste("log(",nam,"+1)",sep=""),ylab="")
    }
    if(device!="")dev.off()
    invisible()
  }

g8.7 <-
function(device=""){
    if(device!="")hardcopy(width=4.8, height=4.8, device=device)
with(frogs, 
  pairs(cbind(log(distance), log(NoOfPools), NoOfSites, avrain,
           altitude, meanmax+meanmin, meanmax-meanmin), col="gray",
        labels=c("log(distance)", "log(NoOfPools)", "NoOfSites", 
                 "Av. rainfall", "altitude", "meanmax+meanmin", 
                  "meanmax-meanmin"),
        panel=panel.smooth, oma=rep(1.75,4), cex=0.9))
    if(device!="")dev.off()
    invisible()
  }

g8.8 <-
function(device=""){
    if(device!="")hardcopy(width=5.2, height=1.3, device=device)
    oldpar <- par(mar=c(3.1,4.1,1.1,1.1),mfrow=c(1,4),
                  mgp=c(2, 0.5, 0), pty="s")
    on.exit(par(oldpar))
    frogs$maxminSum <- with(frogs, meanmax+meanmin)
    frogs$maxminDiff <- with(frogs, meanmax-meanmin)
    frogs.glm <- glm(pres.abs ~ log(distance) + log(NoOfPools) + 
                     maxminSum + maxminDiff, 
                     family = binomial, data = frogs)
    termplot(frogs.glm,data=frogs)
    if(device!="")dev.off()
    invisible()
  }

g8.9 <-
function(device=""){
    if(device!="")hardcopy(width=2, height=2, device=device)
    oldpar <- par(mar=c(3.6,3.6,1.1,1.1), mgp=c(2.25,0.5,0), pty="s",
                  cex.axis=0.85, tcl=-0.25)
    on.exit(par(oldpar))
    if(!require("DAAG"))return("DAAG package must be installed")  
    plot(count~endtime, data=ACF1, pch=16, log="x")
    if(device!="")dev.off()
    invisible()
  }

g8.10 <-
function(lmobj = NULL, device="")
{
  if(device!="")hardcopy(width=3.5, height=2.25, device=device, trellis=TRUE,
                         color=F, pointsize=c(9,6))
  if(!require("DAAG"))return("'DAAG' package must be installed")
  if(!require("lattice"))return("'lattice' package must be installed")  
 mothdot <- 
 dotplot(habitat ~ A, data=moths, xlab="Number of moths (species A)", 
        panel=function(x, y, ...){ 
          panel.dotplot(x,y, pch=1, col="black", ...) 
          av <- sapply(split(x,y),mean);
          ypos <- unique(y)
          lpoints(ypos~av, pch=3, col="black", cex=1.25)          
          }, 
        key=list(text=list(c("Individual transects", "Mean")),
          points=list(pch=c(1,3), cex=c(1,1.25), col=c("black","gray45")),
          columns=2), scales=list(tck=0.5))
  print(mothdot)
  if(device!="")dev.off()
  invisible()
}

g8.11 <-
function(lmobj = NULL, device="")
{
  if(device!="")hardcopy(width=5.2, height=1.4, device=device)
  oldpar <- par(mfrow=c(1,4), mar=c(3.1,4.1,2.1,0.6),
                mgp=c(2.0,.5,0),oma=c(0,0,0,1), pty="s")
  on.exit(par(oldpar))
  if(!require("DAAG"))return("'DAAG' package must be installed")  
  lmobj <- glm(A ~ habitat + log(meters), family = quasipoisson, 
               data = moths, subset=habitat!="Bank")
  plot(lmobj, caption=c("Resids vs Fitted", 
                "Normal Q-Q", "Scale-Location", "",
                "Resids vs Leverage"), which=c(1:3,5), panel=panel.smooth)
  if(device!="")dev.off()
  invisible()
}

g8.12 <-
function(device=""){
    if(device!="")hardcopy(width=2, height=2, device=device)
    oldpar <- par(mar=c(4.1,4.1,1.1,1.1), mgp=c(2.25,0.4,0), pty="s", tcl=-0.25)
    on.exit(par(oldpar))
    p <- (1:99)/100
    x <- log(p/(1-p))
    u <- glm(p ~ x, family=binomial, weights=rep(100,99))
    phat <- predict(u, type="response")
    plot(x, hatvalues(u), type="l", ylab="Leverage", xaxt="n", yaxt="n",
         ylim=c(0, 0.038), yaxs="i", xlab="Fitted proportion")
    pos=c(.01, .05, .2, .5, .8, .95,.99)
    axis(1, at=log(pos/(1-pos))[c(1,3,5,7)], labels=paste(pos)[c(1,3,5,7)],
         cex.axis=0.7)
    axis(3, at=log(pos/(1-pos))[c(2,4,6)], labels=paste(pos)[c(2,4,6)],
         cex.axis=0.7)
    axis(2, at=c(0,.01,.02,.03), cex.axis=.7)
    x <- qnorm(p)
    u <- glm(p ~ x, family=binomial(link=probit), weights=rep(100,99))
    phat <- predict(u, type="response")
    lines(log(phat/(1-phat)), hatvalues(u), type="l", lty=2)
    x <- log(-log(1-p))
    u <- glm(p ~ x, family=binomial(link=cloglog), weights=rep(100,99))
    phat <- predict(u, type="response")
    lines(log(phat/(1-phat)), hatvalues(u), type="l", col="gray")
    xy <- par()$usr[c(2,3)]
    chw <- par()$cxy[1]
    chh <- par()$cxy[2]
    legend(xy[1], xy[2]-0.25*chh, xjust=1, yjust=0, lty=c(1,2,1),
           legend=c("logit link", "probit link", "cloglog link"),
           col=c("black","black","gray"), bty="n", cex=0.8)
    if(device!="")dev.off()
    invisible()
  }

g8.13 <-
function (device="") 
{
  if(device!="")hardcopy(width=4.5, height=3.5, pointsize=8, device=device)
  df <- data.frame(x0 = c(1, 5, 1, 2, 14, 10, 12, 19)*30,
                   x1 = c(46, 58, 85, 67, 17, 85, 18, 42)*30,
                   fail = c(1, 0, 0, 1, 1, 0, 0, 1))
  oldpar <- par(mar = par()$mar + c(0, 0, 4, 0))
  on.exit(par(oldpar))
  plot(c(0, 2610), c(0.85, 8.15), type = "n", xlab = "",
       ylab = "Subject number", axes = F)
  mtext(side = 1, line = 3.75, "Days from beginning of study", adj = 0.5)
  m <- dim(df)[1]
  par(las=2)
  axis(2, at = (1:m), labels = paste((m:1)), lty=0)
  par(las=1)
  abline(v = 600, lty = 4)
  abline(v = 2550)
  mtext(side = 3, line = 0.5, at = c(600, 2550),
        text = c("\nEnd of recruitment", 
          "\nEnd of study"), cex = 0.9)
  lines(rep((0:8) * 300, rep(3, 9)), rep(c(0, 0.2, NA), 9), 
        xpd = T)
  mtext(side = 1, line = 1.85, at = (0:8) * 300,
        text = paste((0:8) * 300), adj = 0.5)
  chw <- par()$cxy[1]
  xx <- as.vector(t(cbind(df[, 1], df[, 2] - 0.25 * chw, 
                          rep(NA, m))))
  yy <- as.vector(t(cbind(matrix(rep(m:1, 2), ncol = 2),
                          rep(NA, m))))
  lines(as.numeric(xx), as.numeric(yy))
  points(df[, 1], m:1, pch = 16)
  text(df[, 1]-0.25*chw, m:1, paste(df[,1]), pos=1, cex=0.75)
  fail <- as.logical(df$fail)
  points(df[fail, 2], (m:1)[fail], pch = 15)
  points(df[!fail, 2], (m:1)[!fail], pch = 0)
  text(df[, 2]+0.25*chw, m:1, paste(df[,2]), pos=1, cex=0.75)
  par(xpd=TRUE)
  legend(0, 11.5, pch = 16, legend = "Entry")
  legend(1230, 11.5, pch = c(15, 0),
         legend = c("Dead", "Censored"), ncol=2)
  par(xpd=FALSE)
  invisible()
  if(device!="")dev.off()
}

g8.14 <-
function(device=""){
    if(device!="")hardcopy(width=2.25, height=2.25, pointsize=8,
                           device=device)
    if(!require("survival"))return("'survival' package must be installed")  
    if(!require("MASS"))return("'MASS' package must be installed")      
    oldpar <- par(mfrow = c(1, 1), mgp = c(2.5, 0.75, 0), 
                  mar = c(4.1,4.1,2.6,1.1), pty="s")    
    on.exit(par(oldpar))
    bloodAids <- subset(Aids2, T.categ=="blood")
    bloodAids$days <- bloodAids$death-bloodAids$diag
    bloodAids$dead <- as.integer(bloodAids$status=="D")
    plot(survfit(Surv(days, dead) ~ sex, data=bloodAids), 
         col=c("black","gray"), conf.int=TRUE, lty=3,
         xlab="Days from diagnosis", ylab="Survival probability")
    par(new=TRUE)
    plot(survfit(Surv(days, dead) ~ sex, data=bloodAids), 
         col=c("black","gray45"), conf.int=FALSE, lty=1, lwd=1.5, tcl=-0.35)
    par(xpd=TRUE)
    legend("top", legend=levels(bloodAids$sex), lty=c(1,1),
           col=c("black", "gray60"), horiz=TRUE, bty="n")
    if(device!="")dev.off()    
  }

g8.15 <-
function(device=""){
    if(device!="")hardcopy(fignum, width=2.5, height=2.5, pointsize=8,
                           device=device)
    if(!require("survival"))return("'survival' package must be installed")  
    if(!require("MASS"))return("'MASS' package must be installed")          
    oldpar <- par(mfrow = c(1, 1), mgp = c(2.75, 0.75, 0), 
                  mar = c(4.1,4.1,1.1,1.1), pty="s")
    on.exit(par(oldpar))
    hsaids <- subset(Aids2, sex=="M" & T.categ=="hs")
    hsaids$days <- hsaids$death-hsaids$diag
    hsaids$dead <- as.integer(hsaids$status=="D")
    hsaids.surv <- survfit(Surv(days, dead) ~ 1, data=hsaids)
    plot(hsaids.surv, col="gray", conf.int=F, tcl=-0.4)
    par(new=TRUE)
    plot(hsaids.surv,col=1, conf.int=F,mark.time=F, 
         xlab="Days from diagnosis", ylab="Estimated survival probabality")
    chw <- par()$cxy[1]
    chh <- par()$cxy[2]
    surv <- hsaids.surv$surv
    xval <- c(200,700,1400,1900)
    hat <- approx(hsaids.surv$time, surv, xout=xval)$y
    for(i in 1:2) arrows(xval[i], hat[i], 0, hat[i],
                         length=0.05, col="gray")
    lines(rep(xval[1],2), hat[1:2], col="gray")
    ##    lines(rep(xval[3],2), hat[3:4], col="gray")
    ## Offset triangle 1
    chw <- par()$cxy[1]
    lines(xval[c(1,2,1,1)]+650, hat[c(2,2,1,2)]+0.2,col="gray40")
    xy1 <- c(mean(xval[c(1,1,2)]), mean(hat[c(1,2,2)]))
    arrows(xy1[1], xy1[2], xy1[1]+650, xy1[2]+0.2, col="gray40", length=0.1)
    text(xval[1]-0.1*chw+650, hat[1]+0.2,
         paste(round(hat[1],2)), col="gray20",cex=0.75, adj=1)
    text(xval[1]+650-0.1*chw, hat[2]+0.2,
         paste(round(hat[2],2)), col="gray20",cex=0.75, adj=1)
    text(mean(xval[1:2])+650, hat[2]+0.2-0.5*chh,
         paste(round(diff(xval[1:2]))), col="gray20", cex=0.75)
    text(xval[1]+650-0.5*chw, mean(hat[1:2]+0.2), paste(round(hat[1]-hat[2],3)),
         srt=90, adj=0.5, col="gray20", cex=0.75)
    ## Offset triangle 2
    ##    lines(xval[c(3,4,3,3)]+500, hat[c(4,4,3,4)]+0.15,col="gray40")
    ##    xy2 <- c(mean(xval[c(3,3,4)]), mean(hat[c(3,4,4)]))
    ##    arrows(xy2[1], xy2[2], xy2[1]+500, xy2[2]+0.15, col="gray40")
    ##    text(xval[3]+500-0.5*chw, mean(hat[3:4]+0.15),
    ##         paste(round(hat[3]-hat[4],3)),
    ##         srt=90, adj=0.5, col="gray20", cex=0.75)    
    ##    text(mean(xval[3:4])+500, hat[4]+0.15-0.5*chh,
    ##         paste(round(diff(xval[3:4]))), col="gray20", cex=0.75)
    print(round(hat,3))
    if(device!="")dev.off()
    invisible()
  }

g8.16 <-
function(device=""){
    if(device!="")hardcopy(width=3, height=2.25,
                           pointsize=8, device=device)
    oldpar <- par(mfrow = c(1, 1), mgp = c(2.5, 0.75, 0), 
                  mar = c(4.1,4.1,1.1,1.1), pty="s")
    on.exit(par(oldpar))
    bloodAids<-subset(Aids2,T.categ=="blood") 
    bloodAids$days<-bloodAids$death-bloodAids$diag 
    bloodAids$dead<-as.integer(bloodAids$status=="D") 
    bloodAids.coxph <- coxph(Surv(days, dead) ~ sex, data = bloodAids)
    plot(cox.zph(bloodAids.coxph), pch=0.9, tcl=-0.4)
    if(device!="")dev.off()    
  }

pkgs <- c("DAAG", "lattice", "MASS", "survival")
z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)
if(any(!z)){
  notAvail <- paste(names(z)[!z], collapse=", ")
  warning(paste("The following packages should be installed:", notAvail))
}

g8.1()

plot of chunk unnamed-chunk-1

## The logit or log(odds) transformation. 
## Shown here is a plot of log(odds) versus proportion. 
## Notice how the range is stretched out at both ends.
g8.2()
##    0.8      1    1.2    1.4    1.6    2.5 
## 0.1429 0.2000 0.6667 0.6667 1.0000 1.0000 
## [1] 0.8 1.0 1.2 1.4 1.6 2.5

plot of chunk unnamed-chunk-1

## Plot, versus concentration, of proportion of patients
## not moving. The horizontal line is the estimate of
## the proportion of moves one would expect if the
## concentration had no effect.
g8.3()

plot of chunk unnamed-chunk-1

g8.4()

plot of chunk unnamed-chunk-1

g8.5()