Code for Data Analysis And Graphics Using R, 3rd edn, CUP, 2010

Chapter 5: Regression with a Single Predictor
Packages required: “DAAG”, “grid”, “lattice”, “MCMCpack”, “boot”

The script that follows is designed to be executed as it stands. It sets up
functions that create Ch 5 graphs, then runs those functions. If executed by
clicking on the RStudio “Compile Notebook …” command, it will be processed
through R Markdown, generating a document that includes code, output and
graphs generated by executing the functions. If some needed packages are
missing, warning messages will appear. See further:
http://rmarkdown.rstudio.com/r_notebook_format.html

g5.1 <-
function(y = roller$depression, x = roller$weight, curve = c("reg"),
           fignum=5.2, device="")
{
  if(device!="")hardcopy(width=3.8, height=2.1,
                         device=device)
  titl <- paste("Diagnostic plots for the regression of Figure 5.1:",
                "\nPanel (A) plots residuals against fitted values.", 
                "\nPanel (B) is a normal probability plot of residuals.",
                "\nIf residuals follow a normal distribution",
                "the points \nshould fall close to a line.")
  oldpar <- par(mfrow = c(1, 2), mgp = c(2.25, 0.5, 0), pty="s",
                mar = par()$mar - c(1, 0.5, 0.5, 0), tcl=-0.35)
  on.exit(par(oldpar))
  u <- lm(depression ~ weight, data = roller)
  res <- resid(u)
  hat <- fitted(u)
  plot(hat, res, xlab = "Fitted values", ylab = "Residuals", pch = 16)
  mtext(side = 3, line = 0.25, "A", adj = 0)
  abline(h = 0, lty = 2)
  qqnorm(res, xlab = "Quantiles of Normal Distribution", ylab = 
         "Ordered data values", pch = 16, cex.main=1.15)
  mtext(side = 3, line = 0.25, "B", adj = 0)
  cat(titl, "\n")
  if(device!="")dev.off()
  invisible(u)
}

g5.2 <-
function(device=""){
    if(device!="")hardcopy(width=5, height=3, trellis=TRUE,
                           device=device, pointsize=c(9,5))
`qreference` <-
  function (test = NULL, m = 50, nrep = 6,
            distribution = function(x) qnorm(x, mean = ifelse(is.null(test), 0, mean(test)), sd = ifelse(is.null(test), 
                                                                                            1, sd(test))),
            seed = NULL, nrows = NULL, cex.strip = 0.75, 
            xlab = NULL, ylab = NULL) 
{
  library(lattice)
  if (!is.null(seed)) 
    set.seed(seed)
  if (!is.null(test)) {
    testnam <- deparse(substitute(test))
    m <- length(test)
    av <- mean(test)
    sdev <- sd(test)
    fac <- factor(c(rep(testnam, m), paste("reference", rep(1:(nrep - 
                                                               1), rep(m, (nrep - 1))))))
    fac <- relevel(fac, ref = testnam)
  }
  if (is.null(nrows)) 
    nrows <- floor(sqrt(nrep))
  ncols <- ceiling(nrep/nrows)
  if (is.null(test)) {
    xy <- data.frame(y = distribution(runif(m * nrep)), fac = factor(rep(1:nrep, 
                                                          rep(m, nrep))), id = factor(rep(1, m * nrep)))
    colpch <- c("black")
    qq <- qqmath(~y | fac, data = xy, par.strip.text = list(cex = 0), 
                 distribution = distribution, layout = c(ncols, nrows), 
                 xlab = "", ylab = "", aspect = 1, pch = 16)
  }
  else {
    if (length(test) > 0) {
      yy <- quantile(test, c(0.25, 0.75))
      xx <- distribution(c(0.25, 0.75))
      r <- diff(yy)/diff(xx)
    }
    xy <- data.frame(y = c(test, distribution(runif(m * (nrep - 
                       1)))), fac = fac, id = factor(rep(1:2, c(m, m * (nrep - 
                                           1)))))
    colpch <- c("black")
    if (is.null(xlab)) 
      xlab <- paste("Quantiles of", deparse(substitute(distribution)))
    if (is.null(ylab)) 
      ylab <- ""
    qq <- qqmath(~y | fac, data = xy, layout = c(ncols, nrows), 
                 groups = id, aspect = 1, xlab = xlab, ylab = "",
                 scales=list(tck=0.5),
                 pch = 16, distribution = distribution,
                 par.strip.text = list(cex = cex.strip))
  }
  print(qq)
}    
    if(!exists("roller.lm"))roller.lm <- lm(depression ~ weight, data=roller)
    qreference(residuals(roller.lm), nrep=8, nrows=2, xlab="") 
    if(device!="")dev.off()
  }

g5.3 <-
function(y = ironslag$chemical, x = ironslag$magnetic, device="")
{
  if(device!="")hardcopy(width=3.3, height=3.3,
                         device=device, pointsize=8)
  oldpar <- par(mfrow = c(2, 2), mar = c(4.6, 4.1, 2.1,1.6), pty="s",
                mgp = c(2.5, 0.75, 0),  cex = 0.85, mex = 0.85)
  on.exit(par(oldpar))
  mtext3 <- function(item="A", txt=leg[1],
                     xleft=par()$usr[1]){
    mtext(side = 3, line = 0.75, item, adj = -0.175, cex=0.9)
    mtext(side = 3, line = 0.25, txt, cex = 0.75, at=xleft, adj = 0)
  }
  titl <- paste(reg = 
                "Chemical vs Magnetic Test of Iron Content in Slag.",
                "\nThe fitted curves used the Splus loess smooth.", 
                "\nIn (d) the downward slope suggests a lower variance",
                "\nfor larger fitted values."
                )
  leg <- c("Fitted line and loess curve", 
           "Residuals from line, with smooth", 
           "Observed, vs prediction from line", 
           "Is variance constant about line?")
  plot(x, y, xlab = "Magnetic", ylab = "Chemical", type="n")
  u <- lm(y ~ x)
  abline(u$coef[1], u$coef[2])
  yhat <- predict(u)
  lines(x, yhat)
  mtext3(xleft=par()$usr[1]+0*par()$cxy[1])
  print(panel.smooth(x, y, span = 0.95, lty = 2, lwd = 1.5, pch=0))
  res <- residuals(u)
  plot(x, res, xlab = "Magnetic", ylab = "Residual", type = "n")
  mtext3(item="B", txt=leg[2], xleft=par()$usr[1]+0*par()$cxy[1])
  print(panel.smooth(x, res, span = 0.95))
  points(x, res, pch = 1, cex = 0.9, lwd = 1)
  hat <- fitted(u)
  plot(hat, y, xlab = "Predicted chemical", ylab = "Chemical",
       type = "n")
  print(panel.smooth(hat, y, span = 0.95))
  mtext3(item="C", txt=leg[3], xleft=par()$usr[1]+0*par()$cxy[1])
  yabs <- sqrt(abs(res))
  plot(hat, yabs, xlab = "Predicted chemical", ylab = "", type = "n")
  mtext(side = 2, line = 2.5, expression(sqrt(abs(residual))))
  mtext3(item="D", txt=leg[4], xleft=par()$usr[1]+0*par()$cxy[1])
  print(panel.smooth(hat, yabs, span = 0.95))
  cat(titl, "\n")
  if(device!="")dev.off()
  invisible()
}

g5.4 <-
function(form=chemical ~ magnetic, data=ironslag, device="")
{
  if(device!="")hardcopy(width=3.8, height=1.8, device=device)
  library(DAAG)
  oldpar <- par(mfrow = c(1, 2), mar = c(4.6, 4.1,2.1,1.6), tcl=-0.35,
                pty="s", mgp = c( 2.25, 0.5, 0))
  on.exit(par(oldpar))
  leg <- c("Residuals from fitted lowess curve.", 
           "Is variance about curve constant?")
  yvar <- all.vars(form)[1]
  xvar <- all.vars(form)[2]
  x <- data[,xvar]
  y <- data[,yvar]
  xy <- lowess(y ~ x)
  chemfit <- approx(xy$x, xy$y, xout=x, ties="ordered")$y
  resval <- y - chemfit
  par(mfrow=c(1,2))
  sqrtabs2 <- sqrt(abs(resval))
  plot(resval ~ x, xlab = "Magnetic", ylab = "Residual", pch = 1)
  abline(h=0, lty=2)
  mtext(side = 3, line = 0.75, "A", adj=-0.125, cex=0.9)
  mtext(side = 3, line = 0.25, leg[1], adj = 0, cex=0.75)
  plot(sqrtabs2 ~ chemfit, xlab = "Predicted chemical", ylab = 
       expression(sqrt(abs(residual))), type = "n")
  panel.smooth(chemfit, sqrtabs2, cex = 1.0)
  mtext(side = 3, line = 0.75, "B", adj=-0.125, cex=0.9)  
  mtext(side = 3, line = 0.25, leg[2], adj = 0, cex=0.75)
  if(device!="")dev.off()
  invisible()
}

g5.5 <-
function(y = softbacks$weight, x = softbacks$volume, curve = c("reg"), 
           show.fits = T, device="")
{
  if(device!="")hardcopy(width=2.25, height=2.25,
                         device=device)
  titl <- switch(curve[1],
                 reg = paste("Weight versus volume for softcover books,",
                   "\nwith fitted line."),
                 lo = paste(
                   "Weight versus volume for softcover books,", 
                   "\nwith fitted line and S-PLUS loess smooth curve."))
  oldpar <- par(mar = c(4.1,4.1,1.1,1.1), mgp = c(2.5, 0.5, 0), 
                pty="s", tcl=-0.35)
  on.exit(par(oldpar))
  u <- lm(weight ~ volume, data = softbacks)
  cat("\nCoefficients\n\n")
  options(digits=3)
  print(summary(u)$coef)
  cat("\n\n")
  print(anova(u))
  yhat <- predict(u)
  r <- cor(x, y)
  xlim <- range(x)
  xlim[2] <- xlim[2]+diff(xlim)*0.08
  plot(x, y, xlab = "Volume (cc)", xlim=xlim,
       ylab = "Weight (g)", pch = 1, ylim = range(c(y, yhat)))
  if(match("reg", curve, nomatch = 0)) {
    abline(u$coef[1], u$coef[2], lty = 1)
    z <- summary(u)$coef
    if(show.fits) {
      points(x, yhat, pch = 1, cex = 0.75)
      res <- resid(u)
      for(i in 1:length(res)) {
        resi <- res[i]
        izzy <- as.numeric(resi > 0)
        xi <- x[i]
        yhati <- yhat[i]
        yi <- y[i]
        lines(rep(xi, 2), c(yhati, yi),
              col=2-izzy, lty=2-izzy) 
        eps <- par()$cxy[1] * 0.2
        if(i == 6) {
          adji <- 1
          eps <-  - eps
        }
        else adji <- 0
        text(xi + eps, yhati + resi/2, paste(round(resi, 1)),
             adj = adji, cex = 0.65)
      }
    }
    bottomright <- par()$usr[c(2, 3)]
    chw <- par()$cxy[1]
    chh <- par()$cxy[2]
    btxt <- c(paste("a =", format(round(z[1, 1], 3)),
                    "  SE =", format(round(z[1, 2], 3))),
              paste("b =", format(round(z[2, 1], 3)),
                    "  SE =", format(round(z[2, 2], 3))))
    legend(bottomright[1],  bottomright[2], 
           legend=btxt, xjust=1, yjust=0, cex=0.8, bty="n")
  }
  cat("\n",titl,"\n")
  if(device!="")dev.off()
  invisible(u)
}

g5.6 <-
function(device=""){
    if(device!="")hardcopy(width=5.5, height=1.4, device=device)
    oldpar <- par(mfrow=c(1,4), mar=c(4.1,4.1,2.1,0.6),
                  mgp=c(2.0,.5,0),oma=c(0,0,0,1), pty="s") 
    if(!require(DAAG))return("The package 'DAAG' must be installed.")    
    softbacks.lm<-lm(weight~volume,data=softbacks)
    on.exit(par(oldpar))
    plot(softbacks.lm, which=1:4, caption=
         c("A: Residuals vs Fitted", 
           "B: Normal Q-Q", "C: Scale-Location",
           "D: Cook's distance"))
    figtxt <- "Diagnostic plots for previous figure."
    cat(figtxt, "\n")
    if(device!="")dev.off()
    invisible(softbacks.lm)
  }

g5.7 <-
function(y = roller$depression, x = roller$weight,  pred = T, 
           device="")
{
  if(device!="")hardcopy(width=2, height=2, device=device)
  titl <- paste("Lawn Depression, for Various Weights of Roller,",
                "\nwith fitted line and showing 95% pointwise",
                "\nconfidence bounds for points on the fitted line.")
  oldpar <- par(mar = c(4.1, 4.1, 1.1, 1.1),
                mgp = c(2.5, 0.5, 0), pty="s", tcl=-0.35)
  on.exit(par(oldpar))
  xlim <- c(0, max(x))
  ylim <- c(0, max(y))
  plot(x, y, xlab = "Weight of Roller (tonnes)", xlim=xlim, ylim=ylim,
       ylab = "Depression in Lawn (mm)", pch = 16)
  u <- lm(depression ~ weight, data = roller)
  abline(u$coef[1], u$coef[2], lty = 1)
  z <- summary(u)$coef
  y12 <- par()$usr[3:4]
  if(pred) {
    assign("xy", data.frame(weight = pretty(roller$weight, 
                              20)))
    hat <- predict(u, newdata = xy, interval="confidence")
    ci<-data.frame(lower=hat[,"lwr"],upper=hat[,"upr"])
    here <- ci$lower >= y12[1]
    lines(xy$weight[here], ci$lower[here], lty = 2,lwd=2,col="grey")
    here <- ci$upper < y12[2]
    lines(xy$weight[here], ci$upper[here], lty = 2,lwd=2,col="grey")
  }
  cat(titl, "\n")
  if(device!="")dev.off()
  invisible(u)
}

g5.8 <-
function(color = F, device="")
{
  if(device!="")hardcopy(width=3.4, height=2, device=device)
  figtxt<-paste("Two rubber band experiments, with different ranges",
                "\nof x-values. The dashed curves are pointwise 95%",
                "\nconfidence bounds for the fitted line.")
  oldpar <- par(tcl=-0.35, pty="s")
  on.exit(par(oldpar))
  panelci<-function(data,...)
    {
      nrows<-list(...)$nrows
      ncols<-list(...)$ncols
      if(ncols==1)axis(2)
      if(ncols==1)axis(1) else axis(3)
      x<-data$stretch; y<-data$distance
      u <- lm(y ~ x)
      upred <- predict(u, interval="confidence")
      ci <- data.frame(fit=upred[,"fit"],lower=upred[,"lwr"],
                       upper=upred[,"upr"])
      ord<-order(x)
      lines(x[ord], ci$fit[ord], lty=1, lwd=2)
      lines(lowess(x[ord], ci$upper[ord]), lty=2, lwd=2, col="grey")
      lines(lowess(x[ord], ci$lower[ord]), lty=2, lwd=2, col="grey")
    }
  xy<-rbind(elastic2,elastic1)
  nam <- c("Range of stretch 30-60 mm","Range of stretch 42-54 mm")
  trial<-rep(nam, c(dim(elastic2)[1],dim(elastic1)[1]))
  xlim<-range(elastic2$stretch)
  ylim<-range(elastic2$distance)
  xy<-split(xy,trial)
  xy<-lapply(1:length(xy),function(i){c(as.list(xy[[i]]), list(xlim=xlim,
                                                               ylim=ylim))})
                                        #  xy<-lapply(xy,function(z){z$xlim<-xlim; z$ylim<-ylim; z})
  names(xy) <- nam
  panelplot(xy,panel=panelci,totrows=1,totcols=2,
            par.strip.text=list(cex=.8), oma=c(4,4,2.5,2))
  cat(figtxt, "\n")
  mtext(side = 2, line = 2, "Distance moved (cm)", outer=TRUE)
  mtext(side=1,line=3.5,"Amount of stretch (mm)")
  if(device!="")dev.off()
}

g5.9 <-
function(df = houseprices, m = 3, x = "area", y = "sale.price", seed=47,
           dots=F, coltypes = c("gray40","gray40","gray20"), device="")
{
  if(device!="")hardcopy(width=2.25, height=2.25, device=device)
  if(!is.null(seed))set.seed(seed)
  oldpar<-par(mar=c(4.6, 4.6, 2.1, 1.1), mgp=c(2.5, 0.75, 0), 
              pty="s", tcl=-0.35)
  on.exit(par(oldpar))
  ltypes <- 1:3
  ptypes <- 2:4
  if(is.null(coltypes))coltypes <- c(2, 3, 6)
  options(digits=3)
  n <- dim(df)[1]
  rand <- sample(n)%%m+1
  xv <- df[, x]
  yv <- df[, y]
  plot(xv, yv, xlab = "Floor area", ylab = "Sale price", type = "n")
  xval <- pretty(xv, n = 20)
  houseprices.lm <- lm(yv ~ xv, data=df)
  print(anova(houseprices.lm))
  cat("\n")
  sumss<-0
  sumdf<-0
  par(lwd=2)
  for(i in sort(unique(rand))) {
    cat("\nfold", i, "\n")
    n.in <- (1:n)[rand != i]
    n.out <- (1:n)[rand == i]
    cat("Observations in test set:", n.out, "\n")
    ab <- lm(yv ~ xv, subset = n.in)$coef
    z <- xv[n.out]
    points(xv[n.out], yv[n.out], col=coltypes[i], pch = ptypes[i], cex = 1.0)
    if(dots)
      points(xv[n.out], yv[n.out], col = coltypes[i], pch = 16)
    pred <- ab[1] + ab[2] * z
    resid <- yv[n.out] - pred
    xy <- data.frame(rbind(z,pred, yv[n.out], resid
                           ), row.names=c("Floor area","Predicted price",
                                "Observed price","Residual"))
    yval <- ab[1] + ab[2] * xval
    lines(xval, yval, lwd = 2, col = coltypes[i], lty=ltypes[i])
    num <- length(n.out)
    print(xy,collab=rep("",num))
    ss <- sum(resid^2)
    sumss<-sumss+ss
    sumdf<-sumdf+num
    ms <- ss/num
    cat("\nSum of squares =", round(ss, 2), "   Mean square =", 
        round(ms, 2), "   n =", num, "\n")
  }
  print(c("Overall ms"=sumss/sumdf))
  topleft<-par()$usr[c(1,4)] + c(-1, 1.75)*par()$cxy
  par(lwd=1, xpd=T)
  legend(topleft[1],topleft[2],legend=paste("Fold",1:m," "),pch=ptypes,
         lty=ltypes,col=coltypes, cex=0.75, horiz=T, bty="n", xjust=0)
  par(col = 1, xpd=F)
  if(device!="")dev.off()
}

g5.10 <-
function(device=""){
    if(device!="")hardcopy(width=4, height=2, device=device)
    oldpar <- par(mar = c(4.1,4.1,1.6,2.1), mgp=c(2.5,0.75,0), 
                  pty="s", tcl=-0.35)
    on.exit(par(oldpar))
    par(mfrow=c(1,2))
    houseprices2.fn<-function (houseprices,index)
      {
        house.resample<-houseprices[index,]
        house.lm<-lm(sale.price~area,data=house.resample)
        houseprices$sale.price-predict(house.lm,houseprices)
                                        # resampled prediction errors
      }
    if(!require(boot))return("The package 'boot' must be installed.")
    set.seed(1111)
    n<-length(houseprices$area)
    R<-200
    houseprices.lm<-lm(sale.price~area,data=houseprices)
    houseprices2.boot<-boot(houseprices,R=R,statistic=houseprices2.fn)
    house.fac<-factor(rep(1:n,rep(R,n)))
    plot(house.fac,as.vector(houseprices2.boot$t),
         ylab="", xlab="House")
    mtext(side=2, line=3, "Prediction Errors", cex=0.9)
    mtext(side = 3, line = 0.25, "A", adj = 0)
    boot.se <- apply(houseprices2.boot$t,2,sd)
    model.se <- predict.lm(houseprices.lm,se.fit=T)$se.fit
    plot(boot.se/model.se, ylab="", xlab="House",pch=16)
    mtext(side=2, line=2.5, "Ratio of SEs\nBootstrap to Model-Based",
          cex=0.9)
    mtext(side = 3, line = 0.25, "B", adj = 0)
    abline(1,0)
    if(device!="")dev.off()
    invisible()
  }

g5.11 <-
function(sd = 2, npoints=5, nrep=4, slope=0.8,
           confint = F, stats = F, device="", type = "")
{
  if(device!="")hardcopy(width=4.5, height=2.6, device=device, pointsize=9)
  figtxt <- paste("Test for Linear Trend, versus Multiple Comparisons. The",
                  "\nsix panels are six simulated results from a straight",
                  "\nline model with slope ", slope,", sd=", sd, 
                  ", and ", nrep, " reps per level.", sep="")
  oldpar <- par(mfrow = c(2, 3), mar = c(3.1,3.1,2.6, 0.6), oma=c(0.5,0.5,1.25,0),
                mgp = c(2, 0.5, 0), xpd=TRUE)
  on.exit(par(oldpar))
  x <- rep(1:npoints, rep(nrep, npoints))
  
  for(i in 1:6) {
    y <- 100 + slope * x + rnorm(20, 0, sd)
    if(i<=3)xtxt <- "" else xtxt <- "Treatment level"
    if(i%in%c(1,4))ytxt <- "Response" else ytxt <- ""
    stripchart(y~x, xlab = xtxt, ylab = ytxt,
               vertical=TRUE, cex=1.15, xlim=c(.5,5.5))
    u <- lm(y ~ factor(x))
    z <- summary.aov(u)
    p.aov <- z[[1]][1,"Pr(>F)"]
    u <- lm(y ~ x)
    z1 <- summary(u)
    p.slope <- z1$coef[2, 4]
    if(stats)
      print(z1)
    if(i==1)mtext(side=3, line=2.5, "p-values are -", adj=0, cex=0.8)
    mtext(side = 3, line = .25, paste("Linear trend:", 
                      format(p.slope, digits = 2), "\naov: ", 
                      format(p.aov, digits = 2), sep = ""), adj = 1,cex=.8)
  }
  cat(figtxt, "\n")
  if(device!="")dev.off()
  invisible()
}

g5.12 <-
function(device=""){
    if(device!="")hardcopy(width=2.25, height=2.25, device=device)
    oldpar <- par(pty="s", tcl=-0.35)
    on.exit(par(oldpar))
    simulateLinear(sd = 2, npoints=5, nrep=4, nsets=200, seed=21)
    invisible()
    if(device!="")dev.off()
  }

g5.13 <-
function(tcol = 2, col="gray40", device="")
{
  if(device!="")hardcopy(width=4.4, height=3.2, device=device)
  titl <- paste("The above panels show (as solid curves) some alternative",
                "\npatterns of response. The dotted line shows the power",
                "\nfamily transformation that makes the response linear.", 
                "\nThe formula for this power family transformation appears",
                "\nabove the graph."
                )
  oldpar <- par(mfrow = c(2, 3), pty="s",
                mar = par()$mar - c( 1, 1, 1.0, 1),
                mgp = c(1.5, 0.5, 0), oma=c(0,1,0,1))
  on.exit(par(oldpar))
  powerplot(expr="sqrt(x)", xlab="")
  powerplot(expr="x^0.25", xlab="", ylab="")
  powerplot(expr="log(x)", xlab="", ylab="")
  powerplot(expr="x^2")
  powerplot(expr="x^4", ylab="")
  powerplot(expr="exp(x)", ylab="")
  if(device!="")dev.off()
  par(mfrow=c(1,1))
  invisible()
}

g5.14 <-
function(yvar = "heart", xvar="weight",
           species = "Cape fur seal", dset = cfseal,
           stats = F, device="")
{
  if(device!="")hardcopy(width=2.25, height=2.25, device=device)
  library(DAAG)
  pretext <- c(heart = "Heart weight", brain = "Brain weight", 
               liver = "Liver weight", weight="Body weight")[yvar]
  xtext <- c(heart = "Heart weight", brain = "Brain weight", 
             liver = "Liver weight", weight="Body weight")[xvar]
  oldpar <- par(mar = c(4.1,4.1,2.1,1.1), mgp=c(2.5,0.75,0), 
                pty="s", tcl=-0.35)
  on.exit(par(oldpar))
  x <- log(dset[,xvar])
  y <- log(dset[, yvar])
  ylim <- range(y)
  if (yvar == "heart") ylim<- log(c(82.5,1100))
  xlim <- range(x)
  if(xvar == "weight") xlim <- log(c(17,180))
  ylab <- switch(yvar,
                 heart = "Heart weight (g, log scale)",
                 brain = "Brain weight (g, log scale)",
                 liver = "Liver weight (g, log scale)",
                 weight = "Body weight (kg, log scale)")
  xlab <- switch(xvar,
                 heart = "Heart weight (g, log scale)",
                 brain = "Brain weight (g, log scale)",
                 liver = "Liver weight (g, log scale)",
                 weight = "Body weight (kg, log scale)")
  xtik <- pretty(exp(x),10)
  ytik <- pretty(exp(y), 10)
  xtik <- xtik[xtik >= exp(xlim[1]) & xtik <= exp(xlim[2])]
  ytik <- ytik[ytik >= exp(ylim[1]) & ytik <= exp(ylim[2])]
  plot(x, y, xlab = xlab, ylab = ylab, axes = F, xlim = 
       xlim, ylim = ylim, pch = 16, cex=0.85)
  axis(1, at = log(xtik), labels = paste(xtik))
  axis(2, at = log(ytik), labels = paste(ytik))
  box()
  form1 <- formula(y ~ x)
  u <- lm(form1, data = dset)
  abline(u$coef[1], u$coef[2])
  usum <- summary(u)$coef
  options(digits=3)
  print(usum)
  if(yvar == "brain") {
    u2 <- lm(form1, data = dset, subset = brain < log(1.7))
    redrange <- range(x[y < max(y)])
    yval <- u2$coef[1] + u2$coef[2] * redrange
    lines(redrange, yval, lty = 2)
    if(stats)
      print(summary(u2))
  }
  cwh <- par()$cxy
  eqn <- paste("log y =", round(usum[1, 1], 2), " [SE ",
               round(usum[1, 2], 3), "] + ", round(usum[2, 1], 3),
               " [", round(usum[2, 2], 2), "] log x")
  mtext(side=3, line=0.25, eqn, adj = 0.5, cex = 0.75)
  figtxt <- paste(pretext, " versus ", xtext,
                  " for ", length(y), " members \n of the species ",
                  species, ".", collapse = "")
  if(yvar == "logbrain")
    figtxt <- paste(figtxt, "  The dotted line shows the", 
                    "\neffect of omitting the data point",
                    "for the largest animal.")
  cat(figtxt, "\n")
  if(device!="")dev.off()
  if(stats)
    summary(u)
}

g5.15 <-
function(dset1 = pair65, dset2 = leafshape, device="", col="gray30")
{
  if(device!="")hardcopy(width=4.2, height=2.1, device=device)
  oldpar <- par(mfrow = c(1, 2), mar = c(4.1,4.1,2.1,2.1), mgp=c(2.5,.75,0),
                pty="s", tcl=-0.35)
  library(DAAG)
  on.exit(par(oldpar))
  x <- dset1$ambient
  y <- dset1$heated
  ylab <- "Stretch (heated band)"
  plot(x, y, xlab = "Stretch (band held at ambient)", 
       ylab = ylab, pch = 16)
  topleft <- par()$usr[c(1, 4)] + c(0.5, -0.5) * par()$cxy
  text(topleft[1], topleft[2], paste("r =", round(cor(x, y), 2)),
       adj = 0)
  mtext(side = 3, line = 0.4, "A", adj = 0)
  u1 <- lm(heated ~ ambient, data = dset1)
  abline(u1$coef[1], u1$coef[2])
  u2 <- lm(ambient ~ heated, data = dset1)
  abline( - u2$coef[1]/u2$coef[2], 1/u2$coef[2], lty = 2)
  x <- dset2$logpet
  y <- dset2$loglen
  plot(x, y, xlab = "Petiole length (mm)", ylab = "Leaf length (mm)", 
       axes = F, type="n")
  points(x, y, cex=0.65, col=col)
  topleft <- par()$usr[c(1, 4)] + c(0.5, -0.5) * par()$cxy
  text(topleft[1], topleft[2], paste("r =", round(cor(x, y), 3)),
       adj = 0)
  mtext(side = 3, line = 0.4, "B", adj = 0)
  xlab <- pretty(exp(x))
  xlab <- c(0.5, 1, xlab)
  xlab <- xlab[xlab > 0]
  ylab <- pretty(exp(y))
  ylab <- ylab[ylab > 0]
  axis(1, at = log(xlab), labels = paste(xlab))
  axis(2, at = log(ylab), labels = paste(ylab))
  box()
  u1 <- lm(y ~ x)
  u2 <- lm(x ~ y)
  abline(u1$coef[1], u1$coef[2])
  abline( - u2$coef[1]/u2$coef[2], 1/u2$coef[2], lty = 2)
  figtxt <- paste("Plot (A) is for a dataset that compared the stretch",
                  "\nof an elastic band that was heated with that for an",
                  "\nelastic band that was held at ambient temperature,",
                  "\nwhile (B) is for a leaf dataset. In each plot we",
                  "\nshow the regression line for y on x (solid line),",
                  "\nand the regression line for x on y (dotted line).",
                  "\nIn (A) the lines are quite similar, while in (B)",
                  "\nwhere the correlation is smaller, the lines are",
                  "\nquite different.")
  cat(figtxt, "\n")
  if(device!="")dev.off()
}

g5.16 <-
function(min=30, device=""){
    if(!require(MCMCpack))return("The package MCMCpack must be installed.")
    if(device!="")hardcopy(width=5, height=4, color=FALSE,
                           device=device, pointsize=c(8,5))
    opar <- par(mgp=c(2,.75,0), mar=c(4.1,3.6,2.6,1.1), oma=rep(0,4),
                no.readonly=TRUE)   
    mat <- matrix(c(1:6), byrow=TRUE, ncol=2)
    layout(mat, widths=rep(c(2,1),3), heights=rep(1,8))
    roller.mcmc <- MCMCregress(depression ~ weight, data=roller)
    plot(roller.mcmc, auto.layout=FALSE, ask=FALSE, col="gray40")
    par(opar)
    if (device != "") dev.off() 
  }

pkgs <- c("DAAG","grid","lattice", "MCMCpack", "boot")
z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)

## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:DAAG':
## 
##     hills
## 
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2014 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##

if(any(!z)){
  notAvail <- paste(names(z)[!z], collapse=", ")
  warning(paste("The following packages should be installed:", notAvail))
}

g5.1()

plot of chunk unnamed-chunk-1

## Diagnostic plots for the regression of Figure 5.1: 
## Panel (A) plots residuals against fitted values. 
## Panel (B) is a normal probability plot of residuals. 
## If residuals follow a normal distribution the points 
## should fall close to a line.

g5.2()

plot of chunk unnamed-chunk-1

g5.3()

## NULL

## NULL

## NULL

plot of chunk unnamed-chunk-1

## NULL
## Chemical vs Magnetic Test of Iron Content in Slag. 
## The fitted curves used the Splus loess smooth. 
## In (d) the downward slope suggests a lower variance 
## for larger fitted values.

g5.4()

plot of chunk unnamed-chunk-1

g5.5()

## 
## Coefficients
## 
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   41.372     97.559   0.424 0.686293
## volume         0.686      0.106   6.475 0.000644
## 
## 
## Analysis of Variance Table
## 
## Response: weight
##           Df Sum Sq Mean Sq F value  Pr(>F)
## volume     1 437878  437878    41.9 0.00064
## Residuals  6  62669   10445

plot of chunk unnamed-chunk-1

## 
##  Weight versus volume for softcover books, 
## with fitted line.

g5.6()

plot of chunk unnamed-chunk-1

## Diagnostic plots for previous figure.

g5.7()

plot of chunk unnamed-chunk-1

## Lawn Depression, for Various Weights of Roller, 
## with fitted line and showing 95% pointwise 
## confidence bounds for points on the fitted line.

g5.8()

plot of chunk unnamed-chunk-1

## Two rubber band experiments, with different ranges 
## of x-values. The dashed curves are pointwise 95% 
## confidence bounds for the fitted line.

g5.9()

plot of chunk unnamed-chunk-1

## Analysis of Variance Table
## 
## Response: yv
##           Df Sum Sq Mean Sq F value Pr(>F)
## xv         1  18566   18566       8  0.014
## Residuals 13  30179    2321               
## 
## 
## fold 1 
## Observations in test set: 1 2 8 12 15 
##                     X1    X2    X3    X4   X5
## Floor area      694.00 905.0 714.0 696.0 1191
## Predicted price 196.31 229.5 199.5 196.6  274
## Observed price  192.00 215.0 220.0 255.0  375
## Residual         -4.31 -14.5  20.5  58.4  101
## 
## Sum of squares = 14160    Mean square = 2832    n = 5 
## 
## fold 2 
## Observations in test set: 3 4 5 7 11 
##                    X1     X2   X3    X4    X5
## Floor area      802.0 1366.0  716 821.0 790.0
## Predicted price 234.6  360.9  215 238.9 231.9
## Observed price  215.0  274.0  113 212.0 221.5
## Residual        -19.6  -86.9 -103 -26.9 -10.4
## 
## Sum of squares = 19315    Mean square = 3863    n = 5 
## 
## fold 3 
## Observations in test set: 6 9 10 13 14 
##                    X1     X2    X3  X4     X5
## Floor area      963.0 1018.0 887.0 771 1006.0
## Predicted price 247.5  258.2 232.6 210  255.9
## Observed price  185.0  276.0 260.0 260  293.0
## Residual        -62.5   17.8  27.4  50   37.1
## 
## Sum of squares = 8848    Mean square = 1770    n = 5 
## Overall ms 
##       2822

g5.10()

plot of chunk unnamed-chunk-1

g5.11()

plot of chunk unnamed-chunk-1

## Test for Linear Trend, versus Multiple Comparisons. The
## six panels are six simulated results from a straight
## line model with slope 0.8, sd=2, and 4 reps per level.

g5.12()

## 
## Proportion of datasets where linear p-value < aov p-value = 0.87

g5.13()

plot of chunk unnamed-chunk-1

g5.14()

##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)     1.20     0.2113     5.7 4.12e-06
## x               1.13     0.0547    20.6 1.87e-18
## Heart weight  versus  Body weight  for  30  members 
##  of the species  Cape fur seal .

g5.15()

plot of chunk unnamed-chunk-1

## Plot (A) is for a dataset that compared the stretch 
## of an elastic band that was heated with that for an 
## elastic band that was held at ambient temperature, 
## while (B) is for a leaf dataset. In each plot we 
## show the regression line for y on x (solid line), 
## and the regression line for x on y (dotted line). 
## In (A) the lines are quite similar, while in (B) 
## where the correlation is smaller, the lines are 
## quite different.

g5.16()

plot of chunk unnamed-chunk-1

Code for ‘Data Analysis And Graphics Using R’, 3rd edn, CUP, 2010

John Maindonald and John Braun

Oct 2, 2014