Chapter 7: Exploiting the Linear Model Framework
Packages required: “DAAG”, “lattice”, “splines”, “mgcv”, “MonoPoly”

The script that follows is designed to be executed as it stands. It sets up
functions that create Ch 7 graphs, then runs those functions. If executed by
clicking on the RStudio “Compile Notebook …” command, it will be processed
through R Markdown, generating a document that includes code, output and
graphs generated by executing the functions. If some needed packages are
missing, warning messages will appear. See further:
http://rmarkdown.rstudio.com/r_notebook_format.html

g7.1 <-
function(device="", pointsize=c(9,5))
{
  if(device!="")hardcopy(width=3, height=2, pointsize=pointsize,
                         trellis=TRUE, color=FALSE, device=device)
  if(!require("lattice"))return("Lattice package must be installed")
  if(!require("DAAG"))return("DAAG package must be installed")
  print(stripplot(trt ~ weight, pch=0, xlab="Weight (mg)", data=sugar,
            aspect=0.5, scales=list(tck=0.6)))
  figtxt <- paste("Weights of sugar extracted from plants"
                  )
  cat(figtxt,"\n")
  if(device!="")dev.off()
  invisible()
}

g7.2 <-
function(device="")
{
  appletaste.aov <- aov(aftertaste ~ panelist + product, 
                        data=appletaste)
  if(device!="")hardcopy(width=4, height=2.1, device=device)
  oldpar <- par(mar = c(4.1,4.1,1.1,2.1), mgp=c(2.5,0.75,0), pty="s", tcl=-0.35,
                mfrow=c(1,2))
  on.exit(par(oldpar))
  termplot(appletaste.aov, partial=TRUE, col.res="black")
  if(device!="")dev.off()
  invisible()
}

g7.3 <-
function(dset = leaftemp, device="", cex.eq=0.65)
{
  if(device!="")hardcopy(width=5.25, height=2.65, device=device)

  oldpar <- par(mar = c(4.1,3.6,3.6,0.1), mgp=c(2.5,0.75,0), pty="s", tcl=-0.35)
  figtxt <- paste("A sequence of models fitted to the plot of tempDiff",
                  "\nvs vapPress, for low, medium and high levels of CO2")
  on.exit(par(oldpar))
  options(contrasts = c("contr.treatment", "contr.poly"))
  ## Needed for S-PLUS
  attach(dset)
  yran <- range(tempDiff)
  yran[2] <- yran[2] + diff(yran) * 0.08
  leaf.lm1 <- lm(tempDiff ~ vapPress, data = dset)
  leaf.lm2 <- lm(tempDiff ~ CO2level + vapPress, data = dset)
  leaf.lm3 <- lm(tempDiff ~ CO2level + vapPress + vapPress:CO2level, data
                 = dset)
  par(fig=c(0, 0.35, 0, 0.9))
  plot(vapPress, tempDiff, xlab = "Vapour Pressure", 
       ylab = "Temperature difference", ylim = yran,
       pch = as.numeric(CO2level), cex=0.7, cex.axis=0.8, col="black")
  mtext(side = 3, line = 1.65, "A: Single line", adj = 0)
  topleft <- par()$usr[c(1, 4)] + c(cex.eq, -cex.eq) * par()$cxy
  chh <- par()$cxy[2]*0.5
  ab1 <- leaf.lm1$coef
  mtext(side=3,line=0.75,
        paste("tempDiff =", round(ab1[1], 2), round(ab1[2], 2),
              " x vapPress",sep = ""), adj=0, col="black", cex=cex.eq) 
  abline(ab1[1], ab1[2])
  par(fig=c(0.32, 0.67, 0, 0.9), new=T)
  plot(vapPress, tempDiff, xlab = "Vapour Pressure",
       ylab = "", 
       ylim = yran, pch = as.numeric(CO2level),
       cex=0.7, cex.axis=0.8)
  mtext(side = 3, line = 1.65, "B: Parallel lines", adj = 0)
  a1 <- leaf.lm2$coef[1]
  a2 <- sum(leaf.lm2$coef[1:2])
  a3 <- sum(leaf.lm2$coef[c(1,3)])
  b1 <- leaf.lm2$coef[4]
  mtext(side=3,line=.75,
        paste("Intercepts are:", round(a1, 2), round(a2,2),
              round(a3,2),sep=", ")
        , adj = 0, col = "black", cex = cex.eq)
  mtext(side=3,line=0, paste("Slope is", round(b1, 2), sep = " "), 
        adj = 0, col = "black", cex = cex.eq)
  r1 <- range(vapPress, CO2level=="low")
  r2 <- range(vapPress, CO2level=="medium")
  r3 <- range(vapPress, CO2level=="high")
  y1 <- a1 + b1 * r1
  lines(r1, y1, lty = 2, lwd = 1, col = "black")
  y2 <- a2 + b1 * r2
  lines(r2, y2, lty = 4, lwd = 1, col = "black")
  y3 <- a3 + b1 * r3
  lines(r3, y3, lty = 5, lwd = 1, col = "black")
  par(fig=c(0.64, 0.99, 0, 0.9), new=T)
  plot(vapPress, tempDiff, xlab = "Vapour Pressure",
       ylab = "", 
       ylim = yran,  pch = as.numeric(CO2level),
       cex=0.7, cex.axis=0.8)
  mtext(side = 3, line = 1.65, "C: Separate lines", adj = 0)
  print(summary(leaf.lm3))
  a1 <- leaf.lm3$coef[1]
  a2 <- sum(leaf.lm3$coef[1:2])
  a3 <- sum(leaf.lm3$coef[c(1,3)])   
  b1 <- leaf.lm3$coef[4]
  b2 <- sum(leaf.lm3$coef[4:5])
  b3 <- sum(leaf.lm3$coef[c(4,6)])
  mtext(side=3,line=.75,
        paste("Intercepts are:", round(a1, 2), round(a2,2),
              round(a3,2), sep=", ")
        , adj = 0, col = "black", cex = cex.eq)
  mtext(side=3,line=0,
        paste("Slopes are", round(b1, 2), round(b2,2),
              round(b3,2), sep=", "),adj=0, cex=cex.eq)
  y1 <- a1 + b1 * r1
  lines(r1, y1, lty = 2, lwd = 1, col = "black")
  y2 <- a2 + b2 * r2
  lines(r2, y2, lty = 4, lwd = 1, col = "black")
  y3 <- a3 + b3 * r3
  lines(r3, y3, lty = 5, lwd = 1, col = "black")
  par(fig=c(0, 1, 0, 1),new=T, mar=rep(0,4))
  plot(0:1, 0:1, bty="n", axes=F, xlab="", ylab="", type="n")
  legend(0.5, 0.98, legend=c("low","medium","high"), lty=c(4,5,7), col="black",
         pch=1:3, xjust=0.45, yjust=1, bty="n", pt.cex=1.15, ncol=3,
         text.width=0.25, cex=0.85)
  detach(dset)
  cat(figtxt, "\n")
  par(fig=c(0,1,0,1))
  if(device!="")dev.off()
  invisible()
}

g7.4 <-
function(device="")
{
  if(device!="")hardcopy(width=5.5, height=1.4,
                         device=device)
  oldpar<-par(mar=c(4.1,4.1,2.1,1.6), mfrow=c(1,4), mgp=c(2.25,.5,0), pty="s")
  on.exit(par(oldpar))
  if(!exists("leaf.lm2"))leaf.lm2 <- lm(formula = tempDiff ~ CO2level +
                                        vapPress, data = leaftemp)
  plot(leaf.lm2,caption=c("Resids vs Fitted", 
                  "Normal Q-Q", "Scale-Location", "",
                  "Resid vs Leverage"), which=c(1:3,5), cook.levels=0.12)
  figtxt<-paste("Diagnostic plots for the parallel line model.")
  print(figtxt)   
  if(device!="")dev.off()
  invisible()
}

g7.5 <-
function(device="")
{
  if(device!="")hardcopy(width=1.8, height=1.8, pointsize=7, device=device)
  oldpar <- par(mar = c(3.6,3.1,1.1,1.1), mgp=c(2.25,0.5,0), pty="s", tcl=-0.35)
  on.exit(par(oldpar))
  if(!require(DAAG))return("The package 'DAAG' must be installed.")    
  plot(grain ~ rate, data = seedrates, pch = 16, xlab="Seeding rate",
       xlim = c(50, 160), axes=F, cex=1.2, ylab="Grains per head",
       cex.axis=0.4, cex.lab=1.15)
  figtxt <- paste("Plot of number of grain per head versus seeding rate,",
                  "\nfor the barley seeding rate data, with fitted",
                  "\nquadratic curve.")
  new.df <- data.frame(rate = (1:14) * 12.5)
  atx <- seedrates$rate
  axis(1,at=atx)
  axis(2)
  box()
  seedrates.lm2 <- lm(grain ~ rate + I(rate^2), data = seedrates)
  hat2 <- predict(seedrates.lm2, newdata = new.df, interval="predict", 
                  coverage = 0.95)
  lines(new.df$rate, hat2[,"fit"], lty = 2)
  cat(figtxt,"\n")
  if(device!="")dev.off()
  invisible()
}

g7.6 <-
function(device="")
{
  if(device!="")hardcopy(width=2.25, height=2.25, pointsize=7, device=device)
  oldpar <- par(mar = c(4.1,4.1,1.1,1.1), mgp=c(2.5,0.75,0), pty="s",
                tcl=-0.35)
  on.exit(par(oldpar))
  plot(grain ~ rate, data = seedrates, pch = 16, xlim = c(50, 175), ylim
       = c(15.5, 22), xlab="Seeding rate", ylab="Grains per head",
       cex=1.2, cex.axis=0.8, cex.lab=1.15)     
  figtxt <- paste("Plot of number of grain per head versus seeding rate,",
                  "\nfor the barley seeding rate data, with fitted",
                  "quadratic curve."
                  )
  new.df <- data.frame(rate = c((4:14) * 12.5))
  seedrates.lm1 <- lm(grain ~ rate, data = seedrates)
  seedrates.lm2 <- lm(grain ~ rate + I(rate^2), data = seedrates)
  pred1 <- predict(seedrates.lm1, newdata = new.df,
                   interval="confidence")
  hat1<-data.frame(fit=pred1[,"fit"], lower=pred1[,"lwr"],
                   upper=pred1[,"upr"])
  pred2 <- predict(seedrates.lm2, newdata = new.df,
                   interval="confidence")
  hat2<-data.frame(fit=pred2[,"fit"], lower=pred2[,"lwr"],
                   upper=pred2[,"upr"])
  lines(new.df$rate, hat1$fit, lty=1)
  lines(new.df$rate, hat2$fit, lty=2, lwd=2)
  rate <- new.df$rate

  lines(lowess(rate,hat1$lower),lty=1, col="gray")
  lines(lowess(rate, hat1$upper),lty=1, col="gray")
  lines(lowess(rate,hat2$lower),lty=2, col="gray")
  lines(lowess(rate, hat2$upper),lty=2, col="gray")
  cat("\n", figtxt, "\n")
  if(device!="")dev.off()
}

g7.7 <-
function(dframe=fruitohms, lt=c(1,2), device=""){
    if(device!="")hardcopy(width=3.5, height=3.25, device=device)
    oldpar<-par(mfrow=c(2,2),mar=c(4.1,4.1,1.6,0.6), oma=c(0.6,0,0,0.6),
                mgp=c(2.5,.5,0), tcl=-0.4)
    on.exit(par(oldpar))
    if(!require(splines))return("The package 'splines' must be installed.")      
    CIplot <- 
      function(form=ohms~ns(juice,2), lty=2, lwd=2,
               label="A:", xlab="", ylab="Resistance (ohms)"){
        sides <- strsplit(deparse(form), split=" *~ *")[[1]]
        allnams <- all.names(form)
        varnams <- all.vars(form)
        pform <- formula(paste(varnams, collapse="~"))
        plot(pform, data=dframe,
             cex=0.8,xlab="", ylab=ylab, type="n")       
        basis <- allnams[! allnams %in% c("~", varnams)]
        if(basis %in% c("ns","bs", "poly")){
          fruit.lmb<-lm(form, data=dframe) 
          with(dframe,   
               points(pform, data=dframe, cex=0.65, col="grey40"))
          xx <- data.frame(juice=pretty(dframe[, varnams[2]], 50))
          xx$hat <- predict(fruit.lmb, newdata=xx, interval="confidence")
          x <- xx$juice
          with(xx, {
            lines(spline(x, hat[, "fit"]))
            lines(spline(x, hat[, "lwr"]), lty=lty, col="grey40")
            lines(spline(x, hat[, "upr"]), lty=lty, col="grey40")}
               )
          spmat <- with(dframe, eval(parse(text=sides[2])))
          knots <- attributes(spmat)$knots
          df <- dim(spmat)[2]
          if(!is.null(knots))abline(v=knots, col="gray")
          if(!is.null(knots))
            titl <- paste(label, " N-spline, ", length(knots),
                          " internal knots (d.f. = ", df, "+1)", sep="") else
          titl <- paste(label, " Polynomial (d.f. = ", df, "+1)",
                        sep="")                
          mtext(side=3,line=0.5, titl, adj=0, cex=.75)       
        }
      }
    CIplot(form=ohms~ns(juice,2), lty=2, lwd=2, label="A:")
    CIplot(form=ohms~ns(juice,3), lty=2, lwd=2, label="B:", ylab="")
    CIplot(form=ohms~poly(juice,2), lty=2, lwd=2, label="C:")
    CIplot(form=ohms~poly(juice,3), lty=2, lwd=2, label="D:", ylab="")
    if(device!="")dev.off()
  }

g7.8 <-
function(df=fruitohms, device=""){
    if(!require(splines))return("The package 'splines' must be installed.")    
    if(device!="")hardcopy(width=5.2, height=1.4, device=device)
    oldpar <- par(mfrow=c(1,4), mar=c(4.1,4.1,2.1,1.1),
                  mgp=c(2.25,.75,0),oma=c(0,0,0,1), pty="s")
    on.exit(par(oldpar))
    fruit.lmb2<-lm(ohms~ns(juice,2), data=df)
    options(digits=4)
    print(summary(fruit.lmb2))
    plot(fruit.lmb2, caption=c("Resids vs Fitted", 
                       "Normal Q-Q", "Scale-Location", "",
                       "Resids vs Leverage"), cook.levels=0.12)
    cat("\nDiagnostic plots.\n")
    if(device!="")dev.off()
  }

g7.9 <-
function(dset = fruitohms, device="", cex.eq=0.25)
{
  if(!require(splines))return("The package 'splines' must be installed.")    
  if(device!="")hardcopy(width=4.5, height=3, device=device, pointsize=7)
  oldpar <- par(mar=c(3.6,4.1,1.6,2.1), oma=c(0,.6,0,2.1),
                mgp=c(3,.75,0),pty="s", fig=c(0,0.475,0.375,1), tcl=-0.4)
  on.exit(par(oldpar))
  n <- dim(dset)[1]
  fruit.lmb2<-lm(ohms~ns(juice,2),data=dset)
  b <- round(coef(fruit.lmb2))
  modmat2 <- model.matrix(fruit.lmb2)[,2:3]
  plot(fruitohms$juice, modmat2[,1], type="p", ylim=range(modmat2),
       xlab="", ylab="Spline basis functions", las=1, xaxt="n")
  ord<-order(fruitohms$juice)
  lines(fruitohms$juice[ord], modmat2[ord,1])
  mtext(side = 3, line = 0.5, "A: Degree 2 N-spline",
        adj = 0, cex=.85)
  legend("topleft", inset=-c(0.1,0.04), paste("Intercept =", b[1]), bty="n")  
  points(fruitohms$juice, modmat2[,2], type="p", col="gray")
  lines(fruitohms$juice[ord], modmat2[ord,2], col="gray")
  par(mgp=c(3,.45,0))
  axis(4, at=modmat2[ord[n],1], labels=paste(b[2]), las=1,
       tick=FALSE)
  axis(4, at=modmat2[ord[n],2], labels=paste(b[3]), col.axis="gray",
       las=1, tick=FALSE)
  par(mgp=c(3,.75,0))
  par(mar=c(3.6,4.1,1.6,2.1), oma=c(0,.6,0,2.1),
      mgp=c(3,.75,0),pty="s", fig=c(0.475,.95,0.375,1),new=TRUE)  
  fruit.lmb3<-lm(ohms~ns(juice,3),data=dset)
  b <- round(coef(fruit.lmb3))
  modmat3 <- model.matrix(fruit.lmb3)[,2:4]
  plot(fruitohms$juice, modmat3[,1], type="p", ylim=range(modmat3),
       xlab="", ylab="", las=1, xaxt="n")
  lines(fruitohms$juice[ord], modmat3[ord,1])
  mtext(side = 3, line = 0.5, "B: Degree 3 N-spline",
        adj = 0,cex=.85)
  legend("topleft", inset=-c(0.1,0.04), paste("Intercept =", b[1]), bty="n")
  points(fruitohms$juice, modmat3[,2], type="p", col="gray30")
  lines(fruitohms$juice[ord], modmat3[ord,2], col="gray30")
  points(fruitohms$juice, modmat3[,3], type="p", col="gray")
  lines(fruitohms$juice[ord], modmat3[ord,3], col="gray", lty=2)
  par(mgp=c(3,.45,0))
  axis(4, at=modmat3[ord[n],1], labels=paste(b[2]), las=1, tick=FALSE)
  axis(4, at=modmat3[ord[n],2], labels=paste(b[3]), col.axis="gray30",
       las=1, tick=FALSE)
  axis(4, at=modmat3[ord[n],3], labels=paste(b[4]), col.axis="gray",
       las=1, tick=FALSE)
  par(pty="m")
  par(mar=c(3.6,4.1,1.6,2.1), oma=c(0,.6,0,2.1),
      mgp=c(3,.75,0), fig=c(0,.475,0,.55), new=TRUE)
  hat2 <- predict(fruit.lmb2)
  hat3 <- predict(fruit.lmb3)
  plot(hat2[ord] ~ juice[ord], data=fruitohms, xlab="", type="l",
       ylab="Fitted curve (ohms)")
  mtext(side = 1, line = 2.25, "Apparent juice content")
  par(mar=c(3.6,4.1,1.6,2.1), oma=c(0,.6,0,2.1),
      mgp=c(3,.75,0), fig=c(.475,.95,0,.55), new=TRUE)
  plot(hat3[ord] ~ juice[ord], data=fruitohms, ylab="", type="l",
       xlab="")
  mtext(side = 1, line = 2.25, "Apparent juice content")
  if(device!="")dev.off()
  invisible()
}

g7.10 <-
function(df=fruitohms, device=""){
    if(device!="")hardcopy(width=2.25, height=2.25, device=device)
    oldpar <- par(mar = c(4.1,4.1,1.1,1.1), mgp=c(2.5,0.75,0), 
                  tcl=-0.35, ask=FALSE)
    on.exit(par(oldpar))
    if(!require(MonoPoly))return('Package "MonoPoly" must be installed')
    u <- monpol(ohms~juice, data=fruitohms, degree=3)
    plot(ohms ~ juice, data=df, xlab="Apparent juice content (%)",
         ylab="Resistance (ohms)", col="gray40")
    ord <- with(fruitohms, order(juice))
    lines(fitted(u)[ord] ~ juice[ord], data=fruitohms, col=2)
    if(device!="")dev.off()
    invisible()
}

g7.11 <-
function(dset = dewpoint, device="")
{
    if(!require(splines))return("The package 'splines' must be installed.")      
    if(device!="")hardcopy(width=4.25, height=2.15, device=device)
    oldpar <- par(mfrow=c(1,2), mar=c(4.1,2.1,1.1,1.1),
                  mgp=c(2.5,.75,0), pty="s", tcl=-0.35,
                  oma=c(0,2.6,0,0))
        if(!require(mgcv))return("The package 'mgcv' must be installed.") 
ds.gam <- gam(dewpt ~ s(mintemp) + s(maxtemp), data=dewpoint) 
plot(ds.gam, se=2, ylab="")
    mtext(side=2, line=0.5, "Contribution to predicted dewpoint", outer=TRUE)
    ## Try also: plot(ds.gam, se=2, residuals=TRUE, pch=1, cex=0.4)
    figtxt <- paste("Representation of dew point (dewpoint) as the sum of",
                    "\nan effect due to maximum temperature, and an effect",
                    "\ndue to minimum temperature. The dashed lines are 95%",
                    "\nconfidence bounds.", sep = "")
par(oldpar)
    if(device!="")dev.off()
    invisible()
}

g7.12 <-
function(dset = dewpoint, device="")
{
    if(device!="")hardcopy(trellis=TRUE, color=FALSE, width=5,
                           height=2.5, device=device, pointsize=c(8,6))
    figtxt <- paste("Given plots of residuals against maximum temperature,",
                    "\nfor different ranges of minimum temperature.", sep = "")
    if(!require(splines))return("The package 'splines' must be installed.")  
    if(!require(lattice))return("The package 'lattice' must be installed.")      
    ds.gam <- gam(dewpt ~ s(mintemp) + s(maxtemp), data=dewpoint) 
    mintempRange <- equal.count(dewpoint$mintemp, number=3)
    ds.xy <- xyplot(residuals(ds.gam) ~ maxtemp|mintempRange,
                    data=dewpoint, aspect=1, layout=c(3,1),
                    scales=list(tck=0.6),
                    type=c("p","smooth"), xlab="Maximum temperature",
                    ylab="Residual", par.strip.text=list(cex=0.75), cex=0.65)
    print(ds.xy)
    cat(figtxt,"\n")
}

pkgs <- c("DAAG", "lattice", "splines", "mgcv", "MonoPoly")
z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)
## This is mgcv 1.8-2. For overview type 'help("mgcv-package")'.
if(any(!z)){
  notAvail <- paste(names(z)[!z], collapse=", ")
  warning(paste("The following packages should be installed:", notAvail))
}

g7.1()

plot of chunk unnamed-chunk-1

## Weights of sugar extracted from plants
g7.2()

plot of chunk unnamed-chunk-1

g7.3()
## 
## Call:
## lm(formula = tempDiff ~ CO2level + vapPress + vapPress:CO2level, 
##     data = dset)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6169 -0.4734  0.0131  0.4709  1.3075 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)               1.0016     1.0829    0.92   0.3590
## CO2levelmedium            1.8451     1.3703    1.35   0.1836
## CO2levelhigh              3.6874     1.3799    2.67   0.0099
## vapPress                 -0.0219     0.5206   -0.04   0.9666
## CO2levelmedium:vapPress  -0.7379     0.6659   -1.11   0.2726
## CO2levelhigh:vapPress    -1.4122     0.6649   -2.12   0.0381
## 
## Residual standard error: 0.682 on 56 degrees of freedom
## Multiple R-squared:  0.349,  Adjusted R-squared:  0.29 
## F-statistic: 5.99 on 5 and 56 DF,  p-value: 0.000165

plot of chunk unnamed-chunk-1

## A sequence of models fitted to the plot of tempDiff 
## vs vapPress, for low, medium and high levels of CO2
g7.4()

plot of chunk unnamed-chunk-1

## [1] "Diagnostic plots for the parallel line model."
g7.5()

plot of chunk unnamed-chunk-1

## Plot of number of grain per head versus seeding rate, 
## for the barley seeding rate data, with fitted 
## quadratic curve.
g7.6()