#' --- #' title: "Code for 'Data Analysis And Graphics Using R', 3rd edn, CUP, 2010" #' author: "John Maindonald and John Braun" #' date: "Oct 2, 2014" #' --- #' Chapter 4: A Review of Inference Concepts #' Packages required: "DAAG","grid","lattice" #' #' The script that follows is designed to be executed as it stands. It sets up #' functions that create Ch 4 graphs, then runs those functions. If executed by #' clicking on the RStudio "Compile Notebook ..." command, it will be processed #' through R Markdown, generating a document that includes code, output and #' graphs generated by executing the functions. If some needed packages are #' missing, warning messages will appear. See further: #' http://rmarkdown.rstudio.com/r_notebook_format.html g4.1 <- function(device=""){ if(device!="")hardcopy(width=4.5, pointsize=8, device=device, height=2.2) oldpar <- par(mfrow = c(1,2), mar=par()$mar-c(.5, 0, 1.5, 0), pty="s", mgp=c(2.5, .75, 0), tcl=-0.4, xpd=T) on.exit(par(oldpar)) x <- seq(from=-4.2, to = 4.2, length.out = 50) ylim <- c(0, dnorm(0)) ylim[2] <- ylim[2]+0.1*diff(ylim) h1 <- dnorm(x) h3 <- dt(x, 3) h8 <- dt(x,8) plot(x, h1, type="l", xlab = "", xaxs="i", ylab = "", yaxs="i", bty="L", ylim=ylim) mtext(side=3,line=0.75, "A", adj=-0.2) lines(x, h8, col="grey60") mtext(side=1, line=2.5, "No. of SEMs from mean") mtext(side=2, line=2.5, "Probability density") chh <- par()$cxy[2] topleft <- par()$usr[c(1,4)] + c(0, 0.4*chh) legend(topleft[1], topleft[2], col=c("black","grey60"), lty=c(1,1), legend=c("Normal","t (8 d.f.)"), bty="n", cex=0.8) plot(x, h1, type="l", xlab = "", xaxs="i", ylab = "", yaxs="i", bty="L", ylim=ylim) mtext(side=3,line=0.75, "B", adj=-0.2) lines(x, h3, col="grey60") mtext(side=1, line=2.5, "No. of SEMs from mean") mtext(side=2, line=2.5, "Probability density") legend(topleft[1], topleft[2], col=c("black","grey60"), lty=c(1,1), legend=c("Normal","t (3 d.f.)"), bty="n", cex=0.8) if(device!="")dev.off() } g4.2 <- function(cump=0.975, device=""){ if(device!="")hardcopy(width=4.5, height=2.2, pointsize=8, device=device) oldpar <- par(mfrow = c(1,2), mar=par()$mar-c(.5, 0, 1.5, 0), tcl=-0.4, pty="s", mgp=c(2.5, .75, 0)) on.exit(par(oldpar)) x <- seq(from=-3.9, to = 3.9, length.out = 50) ylim <- c(0, dnorm(0)) plotfun <- function(cump, dfun = dnorm, qfun=qnorm, ytxt = "Normal probability density", txt1="qnorm", txt2="") { h <- dfun(x) plot(x, h, type="l", xlab = "", xaxs="i", xaxt="n", ylab = ytxt, yaxs="i", bty="L", ylim=ylim) axis(1, at=c(-2, 0), cex=0.8) axis(1, at=c((-3):3), labels=F) tailp <- 1-cump z <- qfun(cump) ztail <- pretty(c(z,4),20) htail <- dfun(ztail) polygon(c(z,z,ztail,max(ztail)), c(0,dfun(z),htail,0), col="gray") text(0, 0.5*dfun(z)+0.08*dfun(0), paste(round(tailp, 3), " + ", round(1-2*tailp,3), "\n= ", round(cump, 3), sep=""), cex=0.8) lines(rep(z, 2), c(0, dfun(z))) lines(rep(-z, 2), c(0, dfun(z)), col="gray60") chh <- par()$cxy[2] arrows(z, -2*chh,z,-0.1*chh, length=0.1, xpd=T) text(z, -3*chh, paste(txt1, "(", cump, txt2, ")", "\n= ", round(z,2), sep=""), xpd=T) x1 <- z + .3 y1 <- dfun(x1)*0.35 y0 <- dfun(0)*0.2 arrows(-2.75, y0, -x1, y1, length=0.1, col="gray60") arrows(2.75, y0, x1, y1, length=0.1) text(-2.75, y0+0.5*chh, tailp, col="gray60") text(2.75, y0+0.5*chh, tailp) } ytxt <- "t probability density (8 d.f.)" plotfun(cump=cump) mtext(side=3, line=1.5, "A", adj=-0.2) ytxt <- "t probability density (8 d.f.)" plotfun(cump=cump, dfun=function(x)dt(x, 8), qfun=function(x)qt(x, 8), ytxt=ytxt, txt1="qt", txt2=", 8") mtext(side=3, line=1.5, "B", adj=-0.2) if(device!="")dev.off() } g4.3 <- function(device="") { if(device!="")hardcopy(width=4.75, height=2.5, pointsize=8, device=device) if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed") titl <- paste( "Second versus first member, for each pair. The first", "\npanel is for the elastic band data. The second (from", "\nDarwin) is for plants of the species Reseda lutea") oldpar <- par(mfrow = c(1, 2), mar = par()$mar - c(.5, -0.5, .5, 0), pty="s", tcl=-0.4, oma=c(0,0,0,.5),mgp = c(2, 0.5, 0)) on.exit(par(oldpar)) onesamp(dset = pair65, x = "ambient", y = "heated", xlab = "Amount of stretch (ambient)", ylab = "Amount of stretch (heated)") ## Data set mignonette holds Darwin's data on Reseda lutea, from ## Darwin, Charles 1877. The Effects of Cross and Self Fertilisation ## in the Vegetable Kingdom. Appleton and Company, New York, p.118. ## Data were in five pots, holding 5,5,5,5,4 pairs of plants ## respectively. onesamp(dset = mignonette, x = "self", y = "cross", xlab = "Height of Self-fertilised Plant", ylab = "Height of Cross-Fertilised Plant", dubious = 0) cat("\n", titl, "\n") if(device!="")dev.off() invisible() } g4.4 <- function(device="") { if(device!="")hardcopy(width=2.4, height=2.4, pointsize=8, device=device) oldpar<-par(mar=c(4.1,4.1,1.1,1.1), mfrow=c(1,1), mgp=c(2.5,0.75,0)) on.exit(par(oldpar)) if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed") x2 <- chisq.test(rareplants) x2E <- stack(data.frame(t(x2$expected))) habitat <- rep(c(1,2,3), 4) plot(x2E$values ~ habitat, xlab="habitat", ylab="Expected Number of Species", tcl=-0.4, axes=FALSE, xlim=c(0.5, 3.5), pch=16) text(x2E$values ~ habitat, labels=x2E$ind, pos=rep(c(4,4,2,2),3), cex=0.85) axis(1, at=seq(1,3), labels=c("D", "W", "WD")) axis(2) box() if(device!="")dev.off() invisible() } g4.5 <- function(device="") { if(device!="")hardcopy(width=4, height=1.75, pointsize=c(9,5), device=device, trellis=TRUE) if(!require(lattice))return("Package 'lattice' is not installed -- cannot proceed") tomato <- data.frame(weight= c(1.5, 1.9, 1.3, 1.5, 2.4, 1.5, # water 1.5, 1.2, 1.2, 2.1, 2.9, 1.6, # Nutrient 1.9, 1.6, 0.8, 1.15, 0.9, 1.6), # Nutrient+24D trt = rep(c("water", "Nutrient", "Nutrient+24D"), c(6, 6, 6))) tomato$trt <- relevel(tomato$trt, ref="water") gph <- with(tomato, stripplot(trt~weight, aspect=0.35, scale=list(tck=0.6))) print(gph) figtxt <- paste("Weights of tomato plants" ) cat(figtxt,"\n") if(device!="")dev.off() invisible() } g4.6 <- function(device=""){ if(device!="")hardcopy(width=4.2, height=2.0, device=device) tomato <- data.frame(weight= c(1.5, 1.9, 1.3, 1.5, 2.4, 1.5, # water 1.5, 1.2, 1.2, 2.1, 2.9, 1.6, # Nutrient 1.9, 1.6, 0.8, 1.15, 0.9, 1.6), # Nutrient+24D trt = rep(c("water", "Nutrient", "Nutrient+24D"), c(6, 6, 6))) tomato$trt <- relevel(tomato$trt, ref="water") tomato.aov <- aov(weight~trt, data=tomato) onewayPlot(obj=tomato.aov) if(device!="")dev.off() } g4.7A <- function(figtxt = "Stripchart of rice data", mc = F, device="") { if(device!="")hardcopy(width=3, height=2.4, pointsize=8, device=device) oldpar<-par(mar=par()$mar+c(-0.5,5,-1,0), mfrow=c(1,1)) on.exit(par(oldpar)) if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed") if(!require(lattice))return("Package 'lattice' is not installed -- cannot proceed") if(mc) { figtxt <- "Simulated Data" av <- mean(take$root) sd <- sqrt(var(take$root)) take$root <- rnorm(dim(take)[1], av, sd) } lev <- c("F10", "NH4Cl", "NH4NO3", "F10 +ANU843", "NH4Cl +ANU843", "NH4NO3 +ANU843") rice$trt <- factor(rice$trt, levels=lev) gph <- dotplot(trt ~ ShootDryMass, pch=1, cex=1, las=2, xlab="Shoot dry mass (g)", data=rice, panel=function(x,y,...){panel.dotplot(x,y,...) av <- sapply(split(x,y),mean); ypos <- unique(y) lpoints(ypos~av, pch=3, col="black", cex=1.25)}, main=list("A:", cex=1.25, just="left", x=0.1, font=1)) print(gph) if(device!="")dev.off() invisible() } g4.7B <- function(figtxt = "Interaction plot of rice data", mc = F, device="") { if(device!="")hardcopy(width=4.5, height=2, pointsize=7, device=device) par(mar = c(4.6,4.1,3.1,2.6), mgp=c(2.25,0.5,0)) with(rice, interaction.plot(fert, variety, ShootDryMass, legend = FALSE, xlab="Level of first factor")) xleg <- par()$usr[2] yleg <- par()$usr[4] - 0.75 * diff(par()$usr[3:4]) ylabs <- levels(rice$variety) leginfo <- legend(xleg, yleg, bty = "n", legend = ylabs, col = 1, lty = 2:1, xjust = 1, cex = 0.8)$rect chh <- par()$cxy[2] text(leginfo$left + 0.5 * leginfo$w, leginfo$top, " variety", adj = 0.5, cex = 0.8) mtext(side=3, line=1.25, adj=-0.15, "B:") if(device!="")dev.off() invisible() } g4.8 <- function(mc = F, type="box", device="") { if(device!="")hardcopy(width=2, height=2, pointsize=8, device=device) oldpar <- par(mar = c(4.1,4.1,1.1,1.1), pty="s", mgp=c(2.5,0.75,0)) on.exit(par(oldpar)) figtxt <- "Distance travelled, relative to distance up a 20 degree ramp." if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed") plot(distance.traveled ~ starting.point, data=modelcars, xlim=c(0,12.5), tcl=-0.35, xaxs="i", xlab = "Distance up ramp (cm)", ylab="Distance traveled (cm)") if(device!="")dev.off() invisible() } g4.9 <- function(x1=two65$ambient, x2=two65$heated, nsim=2000, plotit=T, fignum="4.10", device=""){ if(device!="")hardcopy(width=2.25, height=2.25, device=device) oldpar<-par(mar=par()$mar-c(1,0,1,0), tcl=-0.3) on.exit(par(oldpar)) if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed") n1 <- length(x1) n2<-length(x2) n<-n1+n2 x<-c(x1,x2) dbar <- mean(x2)-mean(x1) z <- array(,nsim) for(i in 1:nsim){ mn <- sample(n,n2,replace=FALSE) dbardash <- mean(x[mn]) - mean(x[-mn]) z[i] <- dbardash } pval <- (sum(z>abs(dbar)) + sum (z < -abs(dbar)))/nsim if(plotit){plot(density(z), xlab="", main="", yaxs="i", ylim=c(0,0.08), cex.axis=0.8) abline(v=dbar) abline(v=-dbar, lty=2) mtext(side=3,line=0.5, text=expression(bar(italic(x))[2]-bar(italic(x))[1]), at=dbar) mtext(side=3,line=0.5, text=expression(-(bar(italic(x))[2]- bar(italic(x))[1])), at=-dbar)} print(signif(pval,3)) ## Second try for(i in 1:nsim){ mn <- sample(n,n2,replace=FALSE) dbardash <- mean(x[mn]) - mean(x[-mn]) z[i] <- dbardash } pval <- (sum(z>abs(dbar)) + sum (z < -abs(dbar)))/nsim if(plotit){lines(density(z),lty=5,lwd=2) print(signif(pval,3)) } if(device!="")dev.off() } pkgs <- c("DAAG","grid","lattice") z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE) if(any(!z)){ notAvail <- paste(names(z)[!z], collapse=", ") warning(paste("The following packages should be installed:", notAvail)) } g4.1() g4.2() g4.3() g4.4() g4.5() g4.6() g4.7A() g4.7B() g4.8() g4.9()