Chapter 12, Figures 1 to 5: Multivariate Data Exploration and Discrimination
Packages required: “DAAG”, “lattice”, “ggplot2”, “MASS”

The script that follows is designed to be executed as it stands. It sets up functions that create Figures 12.1 to 12.5, then runs those functions.
If executed by clicking on the RStudio “Compile Notebook …” command, it
will be processed through R Markdown, generating a document that includes
code, output and graphs generated by executing the functions. If some
needed packages are missing, warning messages will appear. See further:
http://rmarkdown.rstudio.com/r_notebook_format.html

g12.1 <-
function(device=""){
    if(!require(lattice))return("Package 'lattice' must be installed.")     
    if(device!="") hardcopy(width=5.25, height=3.35, pointsize=c(7,4),
                            device=device, trellis=TRUE, color=TRUE)
    if(!require(DAAG))return("Package 'DAAG' must be installed.")     
    sitenames <- c("Cambarville","Bellbird","Whian Whian", "Byrangery",
                   "Conondale ","Allyn River", "Bulburin")    
    parset <- simpleTheme(pch=c(3,4,0,8,2,10,1), cex=0.85)
    par(fig=c(0, 0.5, 0, 1))
    plot.new()
    mtext(side=3, line=3, "A", adj=-0.55, cex=0.75) 
    par(fig=c(0.5, 1, 0, 1), new=TRUE)
    mtext(side=3, line=3, "B", adj=-0.55, cex=0.75) 
    colr <- c("red","blue")
    pchr <- c(3,4,0,8,2,10,1)
    ss <- expand.grid(site=1:7, sex=1:2)
    ss$sexsite <- paste(ss$sex, ss$site, sep="-")
    sexsite <- paste(possum$sex, possum$site, sep="-")
    print(splom(~ possum[, c(9:11)], panel = panel.superpose, cex=0.65,
                groups = sexsite, col = colr[ss$sex], pch = pchr[ss$site],
                varnames=c("tail\nlength","foot\nlength",
                  "ear conch\nlength"),
                par.settings=parset, axis.line.tck=0.6,
                key = list(points = list(pch=pchr),
                  text=list(sitenames),
                  columns=3, between=1,
                  between.columns=2), main=""),                
          position=c(0,0,0.502,1), newpage=FALSE)
    cloudgph <- cloud(earconch~taill+footlgth, data=possum, groups=site,
                      par.settings=parset, cex=0.8,
                      zoom=0.925, zlab=list("earconch", rot=90),                      
                      auto.key = list(text=sitenames,
                        columns=3, between=1, points=TRUE, 
                        between.columns=2))
    plot(cloudgph, position=c(0.5,0,1,1),newpage=FALSE)
    if(device!="")dev.off()
  }

g12.2 <-
function(colr=trellis.settings$superpose.symbol$col[c(2,5)],
           device=""){
    if(device!="")hardcopy(width=3.25, height=3.25, pointsize=c(7,4),
                           device=device)
    require(lattice)               
    here<-!is.na(possum$footlgth)  # We need to exclude missing values
    print(sum(!here))              # Check how many values are missing
    possum.prc <- princomp(possum[here,6:14])  # Principal components
    colr <- c("gray45", "black")
    pchr <- c(3,4,0,8,2,10,1)
    ss <- expand.grid(site=1:7, sex=1:2)
    ss$sexsite <- paste(ss$sex, ss$site, sep="-")
    sexsite <- paste(possum$sex, possum$site, sep="-")[here]
    print(xyplot(possum.prc$scores[,2] ~ possum.prc$scores[,1],
                 panel = panel.superpose, aspect="iso",
                 groups = sexsite, col = colr[ss$sex], pch = pchr[ss$site],
                 key = list(points = list(pch=pchr),
                   text=list(c("Cambarville","Bellbird","Whian Whian  ",
                     "Byrangery", "Conondale  ","Allyn River","Bulburin")),
                   columns=4, between=1, between.columns=2),
                 scales=list(tck=0.5),
                 xlab=list("1st Principal Component"),
                 ylab=list("2nd Principal Component"),
                 ))
    if(device!="")dev.off()
  }

g12.3 <-
function(colr=c("gray", "black"), device="", ntimes=3){
    if(device!="")hardcopy(width=5.5, height=2, trellis=TRUE,
                           device=device, pointsize=c(6,4))
    if(!require(DAAG))return("Package 'DAAG' must be installed.")   
    if(!require(ggplot2))return("Package 'ggplot2' must be installed.")     
    theme_set(theme_gray(base_size=7))
    ## Bootstrap principal components calculations: possum (DAAG)
    ## Sample from all rows where there are no missing values
    rowsfrom <- (1:dim(possum)[1])[complete.cases(possum[, 6:14])]
    n <- length(rowsfrom); ntimes <- 3 
    bootscores <- data.frame(matrix(0, nrow=ntimes*n, ncol=2)) 
    names(bootscores) <- c("scores1","scores2")
    allsamp <- numeric(ntimes*n)
    for (i in 1:ntimes){ 
      samprows <- sample(rowsfrom, n, replace=TRUE) 
      bootscores[n*(i-1)+(1:n), 1:2] <- 
        princomp(possum[samprows, 6:14])$scores[, 1:2] 
      allsamp[n*(i-1)+(1:n)] <- samprows
    }
    bootscores[, c("sex","Pop")] <- possum[allsamp, c("sex","Pop")]
    bootscores$sampleID <- factor(rep(1:ntimes, rep(n,ntimes)))
    print(quickplot(x=scores1, y=scores2, colour=sex, asp=1,
                    shape=Pop, facets=.~sampleID, data=bootscores)
          + scale_colour_manual(values=c("m"="black","f"="gray45")) 
          + xlab("First Principal Component") 
          + ylab("Second Principal Component"))
    if(device!="")dev.off()
  }

g12.4 <-
function(dset = leafshape17, show = "lines", color = F, device="")
{
  if(device!="") hardcopy(width=4.25, height=2.4, device=device)
  oldpar <- par(mfcol = c(1, 2), pty="s", mar=c(4.1,4.1,2.1,0.6),
                mgp=c(2.25, 0.5,0),
                tcl=-0.35, oma=c(0,.25,0,2))
  on.exit(par(oldpar))
  require(DAAG)
  fig1txt <- paste("(a) Untransformed scale")
  fig2txt <- paste("(b) Logarithmic scale, both axes")
  figtxt <- paste("Leaf length versus leaf width, for different species",
                  "\nat a North Queensland site.")
  xlab <- "Leaf width (mm)"
  ylab <- "Leaf length (mm)"
  plot(dset$bladewid, dset$bladelen, xlab = xlab, ylab = ylab, type = "n")
  points(dset$bladewid, dset$bladelen, pch=c(1,3)[dset$arch+1])
  mtext(side=3,line=0.25,"A", adj=0)
  mtext(side = 1, line = 5.5, fig1txt, adj = 0)
  plot(log10(dset$bladewid), log10(dset$bladelen), 
       pch=c(1,3)[dset$arch+1],axes = F, xlab = 
       xlab, ylab = ylab)
  xpos <- pretty(dset$bladewid)
  ypos <- pretty(dset$bladelen)
  lxpos<-log10(xpos)
  lypos<-log10(ypos)
  axis(1, at = lxpos, label = paste(xpos),cex=.65)
  axis(3, at = round(lxpos,2))
  axis(4, at = round(lypos,2))
  par(mgp = c(2.5, 0.75, 0))
  axis(2, at = lypos, label = paste(ypos), srt = 90)
  mtext(side=3,line=0.25,"B", adj=0)
  mtext(side = 4, line = 2.5, "log10(Leaf length)")
  mtext(side = 3, line = 2.25, "log10(Leaf width)")
  mtext(side = 1, line = 5.5, fig2txt, adj = 0)
  box()
  if(device!="")dev.off()
}

g12.5 <-
function(device=""){
    require(MASS)
    if(device!="") hardcopy(width=4.25, height=4.25,
                            device=device)
    here<- !is.na(possum$footlgth)
    possum<-possum[here,]
    possum.lda <- lda(site~hdlngth+skullw+totlngth+taill+
                      footlgth+earconch+eye+chest+belly, data=possum)
    scores <- predict(possum.lda)$x[,1:3]
    options(digits=4)
    pchr <- c(3,4,0,8,2,10,1)
    possum.splom <- 
splom(~ scores, 
      groups = possum$site, 
      varnames=c("LD1","LD2","LD3"),
      par.settings=simpleTheme(pch=pchr),
      key =list(text=list(c("Cambarville","Bellbird","Whian Whian","Byrangery",
                            "Conondale ","Allyn River", "Bulburin")), columns=4))
print(possum.splom)
    if(device!="")dev.off()
    invisible()
  }

## Figures 6 to 10 are in the script figs12-6to12.R

pkgs <- c("DAAG", "lattice", "ggplot2", "MASS")
z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)
if(any(!z)){
  notAvail <- paste(names(z)[!z], collapse=", ")
  warning(paste("The following packages should be installed:", notAvail))
} 

trellis.par.set(fontsize=list(text=10, points=6))

g12.1()

plot of chunk unnamed-chunk-1

g12.2()
## [1] 1

plot of chunk unnamed-chunk-1

g12.3()

plot of chunk unnamed-chunk-1

g12.4()

plot of chunk unnamed-chunk-1

g12.5()

plot of chunk unnamed-chunk-1