Chapter 3: Statistical Models
Packages required: “DAAG”

The script that follows is designed to be executed as it stands. It sets up
functions that create Ch 3 graphs, then runs those functions. If executed by
clicking on the RStudio “Compile Notebook …” command, it will be processed
through R Markdown, generating a document that includes code, output and
graphs generated by executing the functions. If some needed packages are
missing, warning messages will appear. See further:
http://rmarkdown.rstudio.com/r_notebook_format.html

g3.1 <-
function(fignum=3.2, device="")
{
  if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed")    
  if(device!="")hardcopy(width=2, height=2, device=device)
  oldpar <- par(mar = c(4.1,4.1,1.1,1.1), xaxs="i", yaxs="i",
                mgp = c(2.5, 0.5, 0), pty="s")
  on.exit(par(oldpar))
  plot(depression~weight, data = roller,
       xlim=c(0,13), ylim=c(0,32),  tcl=-0.3,
       xlab = "Weight of roller (t)",  ylab = "Depression (mm)", 
       pch = 16)
  abline(0, 2.5, lty=2)
  abline(-5, 2.75)
  figtxt <- paste("Depression (mm), versus weight of roller.", sep = "")
  cat("\n",figtxt,"\n")
  if(device!="")dev.off()
}

g3.2 <-
function(y = roller$depression, x = roller$weight,
           device="")
{
  if(device!="")hardcopy(width=4.25, height=2.25, device=device)
  oldpar <- par(mfrow = c(1, 2),
                mar = c(4.1,4.6,1.6,1.1),
                mgp = c( 2.5, 0.75, 0),oma=c(0,0,0,0),pty="s")
  on.exit(par(oldpar))
  pretext <- c(reg = "A", lo = "B")
  for(curve in c("reg", "lo")) {
    plot(x, y, xlab = "Roller weight (t)", ylab = 
         "Depression in lawn (mm)", pch = 4, tcl=-0.3)
    mtext(side = 3, line = 0.25, pretext[curve], adj = 0)
    topleft <- par()$usr[c(1, 4)]
    chw <- strwidth("O")
    chh <- strheight("O")
    points(topleft[1]+rep(0.75,2)*chw,topleft[2]-c(0.5,1.3)*chh,
           pch=c(4,1))
    text(topleft[1]+rep(1.25,2)*chw, topleft[2]-c(0.5,1.3)*chh,
         c("Data values", "Fitted values"),adj=0,cex=0.65)
    text(topleft[1]+1.25*chw, topleft[2]-2*chh,"(smooth)",adj=0,cex=.65)
    if(curve[1] == "reg") {
      u <- lm(y ~ -1 + x)
      abline(0, u$coef[1])
      yhat <- predict(u)
    }
    else {
      lawn.lm<-lm(y~x+I(x^2))
      yhat<-predict(lawn.lm)
      xnew<-pretty(x,20)
      b<-lawn.lm$coef
      ynew<-b[1]+b[2]*xnew+b[3]*xnew^2
      lines(xnew,ynew)
    }
    here <- y < yhat
    yyhat <- as.vector(rbind(y[here], yhat[here], rep(NA, sum(here))))
    xx <- as.vector(rbind(x[here], x[here], rep(NA, sum(here))))
    lines(xx, yyhat, lty = 2, col="gray")
    here <- y > yhat
    yyhat <- as.vector(rbind(y[here], yhat[here], rep(NA, sum(here))))
    xx <- as.vector(rbind(x[here], x[here], rep(NA, sum(here))))
    lines(xx, yyhat, lty = 1, col="gray")
    n <- length(y)
    ns <- min((1:n)[y - yhat >= 0.75*max(y - yhat)])
    ypos <- 0.5 * (y[ns] + yhat[ns])
    chw <- par()$cxy[1]
    text(x[ns] - 0.25*chw, ypos, "+ve residual", adj = 1,cex=0.65, col="gray30")
    points(x, yhat, pch = 1)
    ns <- (1:n)[y - yhat == min(y - yhat)][1]
    ypos <- 0.5 * (y[ns] + yhat[ns])
    text(x[ns] + 0.4*chw, ypos, "-ve residual", adj = 0,cex=0.65,col="gray30")
  }
  titl <- "Lawn depression (mm) versus roller weight (t)."
  titl <- paste(titl, 
                "\nIn (a) a line has been fitted, while in (b) the",
                "\nloess method was used to fit a smoothed curve.",
                "\nResiduals (the `rough') appear as vertical lines. ",
                "\nPositive residuals are black lines, while negative ",
                "\nresiduals are dotted.")
  if(device!="")dev.off()
  invisible()
}

g3.3 <-
function(device="")
{
  if(device!="")hardcopy(width=2, height=2, device=device)
  oldpar <- par(mar = c(4.1,4.1,1.1,1.1), mgp=c(2.5,0.5,0), cex.axis=0.85)
  on.exit(par(oldpar))
  z <- seq(-3,3,length=101)
  plot(z, dnorm(z), type="l", ylab="normal density", yaxs="i", bty="L",  tcl=-0.3,
       xlab="Distance, in SDs, from the mean")
  polygon(c(z[z <= 1.0],1.0),c(dnorm(z[z <= 1.0]), dnorm(-3)), col="grey")
  chh <- par()$cxy[2]
  arrows(-1.8, 0.32, -0.25, 0.2, length=0.07, xpd=T)
  cump <- round(pnorm(1), 3)
  text(-1.8, 0.32+0.75*chh, paste("pnorm(1)\n", "=", cump), xpd=T, cex=0.8)
  if(device!="")dev.off()
  invisible()
}

g3.4 <-
function(mu = 10, sigma = 1, n = 1, m = 50, four = 4, nrep = 5, 
           seed=21, fignum=3.5, totrows=1, device="")
{
  if(device!="")hardcopy(width=5.2, height=1.4, device=device,
                         pointsize=8) 
  if(!is.null(seed))set.seed(seed)
  oldpar <- par(mgp = c(2.25, 0.5, 0),mar=par()$mar-c(1,0,1.5,0),
                pty="s")
  on.exit(par(oldpar))
  if(is.null(totrows))
    totrows <- floor(sqrt(nrep))
  totcols <- ceiling(nrep/totrows)
  z <- range(pretty(mu + (c(-3.4, 3.4) * sigma), 50))
  xy <- data.frame(x=rep(0,nrep),y=rep(0,nrep),n=rep(n,nrep),
                   mm=rep(m,nrep),four=rep(four,nrep))


  fac <- factor(  paste("Simulation", 1:nrep),
                lev <- paste("Simulation", 1:nrep) )
  xlim<-z
  ## ylim<-c(0,dnorm(0)*sqrt(n))
  ylim <- c(0,1)
  xy <- split(xy,fac)
  xy<-lapply(1:length(xy),function(i){c(as.list(xy[[i]]), list(xlim=xlim,
                                                               ylim=ylim))})
  panel.mean <- function(data, mu = 10, sigma = 1, n2 = 1, 
                         mm = 100, nrows, ncols, ...)
    {
      vline <- function(x, y, lty = 1, col = 1)
        {
          lines(c(x, x), c(0, y), lty = lty, col = col)
        }
      n2<-data$n[1]
      mm<-data$mm[1]
      our<-data$four[1]
      ## Four characters in each unit interval of x
      nmid <- round(four * 4)
      nn <- array(0, 2 * nmid + 1)    
#########################################
      z <- mu+seq(from=-3.4*sigma, to=3.4*sigma, length=mm) 
      atx<-pretty(z)
      qz <- pnorm((z - mu)/sigma)
      dz <- dnorm((z - mu)/sigma)
      chw <- sigma/four   
      chh <- strheight("O")*0.75
      htfac <- (mm * chh)/four
      if(nrows==1&&ncols==1)
        lines(z, dz * htfac)
      if(nrows==1)axis(1,at=atx)
      y <- rnorm(mm, mu, sigma/sqrt(n2))
      pos <- round((y - mu)/sigma * four)

      for(i in 1:mm) {
        nn[nmid + pos[i]] <- nn[nmid + pos[i]] + 1
        xpos <- chw * pos[i]
        text(mu + xpos, nn[nmid + pos[i]] * chh - chh/4, "x")
      }
    }
  panelplot(xy,panel=panel.mean,totrows=totrows,totcols=totcols,
            oma=rep(2.5,4))
  if(device!="")dev.off()
}

g3.5 <-
function(device="")
{
  if(device!="")hardcopy(width=4.25, height=2, device=device,
                         trellis=TRUE, color=FALSE, pointsize=c(7,3))
  trellis.par.set(layout.heights=list(top.padding=0, main=0,
                    xlab=0, axis.xlab.padding=0, bottom.padding=0),
                  plot.symbol=list(cex=0.5))  
  sampdist(sampsize = c(3, 9, 30), seed = 23, nsamp = 1000, FUN = mean, 
            sampvals = function(n) exp(rnorm(n, mean = 0.5, sd = 0.3)), 
            tck = 0.6, plot.type = "density", layout = c(3,1))
  if(device!="")dev.off()
}

g3.6 <-
function(mu = 10, sigma = 1, n = 1, m = 50, four = 4, nrep = 5, 
           seed=21, fignum=3.6, totrows=1, device="")
{
  if(device!="")hardcopy(width=5.2, height=2, trellis=TRUE, color=FALSE,
                         device=device, pointsize=c(8,6))
  if(!is.null(seed))set.seed(seed)
  if(is.null(totrows))
    totrows <- floor(sqrt(nrep))
  totcols <- ceiling(nrep/totrows)
  z <- range(pretty(mu + (c(-3.4, 3.4) * sigma), 50))
  xlim<-z
  xy <- data.frame(y = rnorm(50*nrep, 10, 1), fac=factor(rep(1:nrep,
                                                rep(50, nrep))))
  qq <- qqmath(~y|fac, data=xy, par.strip.text=list(cex=0), layout=c(5,1),
               xlab="",ylab="", aspect=1, cex=0.5, scales=list(tck=0.4))
  print(qq)
  if(device!="")dev.off()
}

g3.7 <-
function(device=""){
    if(device!="")hardcopy(width=4.5, height=2.6, trellis=T,
                        device=device)
    if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed")        
    with(pair65,
         qreference(test = heated-ambient, nrep=8, nrows=2, xlab="")
    )
    if(device!="")dev.off()
  }

g3.8 <-
function(device="")
{
  if(device!="")hardcopy(width=4.25, height=2, device=device,
                         trellis=TRUE, color=FALSE, pointsize=c(7,3))
  trellis.par.set(layout.heights=list(top.padding=0, main=0,
                    xlab=0, axis.xlab.padding=0, bottom.padding=0),
                  plot.symbol=list(cex=0.5))
  sampdist(sampsize = c(3, 9, 30), seed = 23, nsamp = 1000, FUN = mean, 
           sampvals = function(n) exp(rnorm(n, mean = 0.5, sd = 0.3)), 
           tck = 0.6, plot.type = "qq", layout = c(3,1))
  if(device!="")dev.off()
}

pkgs <- c("DAAG")
z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)
if(any(!z)){
  notAvail <- paste(names(z)[!z], collapse=", ")
  warning(paste("The following package should be installed:", notAvail))
}

g3.1()

plot of chunk unnamed-chunk-1

## 
##  Depression (mm), versus weight of roller.
g3.2()

plot of chunk unnamed-chunk-1

g3.3()

plot of chunk unnamed-chunk-1

g3.4()

plot of chunk unnamed-chunk-1

g3.5()

plot of chunk unnamed-chunk-1

g3.6()

plot of chunk unnamed-chunk-1

g3.7()