Chapter 4: A Review of Inference Concepts
Packages required: “DAAG”,“grid”,“lattice”
The script that follows is designed to be executed as it stands. It sets up
functions that create Ch 4 graphs, then runs those functions. If executed by
clicking on the RStudio “Compile Notebook …” command, it will be processed
through R Markdown, generating a document that includes code, output and
graphs generated by executing the functions. If some needed packages are
missing, warning messages will appear. See further:
http://rmarkdown.rstudio.com/r_notebook_format.html
g4.1 <-
function(device=""){
if(device!="")hardcopy(width=4.5, pointsize=8,
device=device, height=2.2)
oldpar <- par(mfrow = c(1,2), mar=par()$mar-c(.5, 0, 1.5, 0),
pty="s", mgp=c(2.5, .75, 0), tcl=-0.4, xpd=T)
on.exit(par(oldpar))
x <- seq(from=-4.2, to = 4.2, length.out = 50)
ylim <- c(0, dnorm(0))
ylim[2] <- ylim[2]+0.1*diff(ylim)
h1 <- dnorm(x)
h3 <- dt(x, 3)
h8 <- dt(x,8)
plot(x, h1, type="l", xlab = "", xaxs="i", ylab = "", yaxs="i",
bty="L", ylim=ylim)
mtext(side=3,line=0.75, "A", adj=-0.2)
lines(x, h8, col="grey60")
mtext(side=1, line=2.5, "No. of SEMs from mean")
mtext(side=2, line=2.5, "Probability density")
chh <- par()$cxy[2]
topleft <- par()$usr[c(1,4)] + c(0, 0.4*chh)
legend(topleft[1], topleft[2], col=c("black","grey60"),
lty=c(1,1), legend=c("Normal","t (8 d.f.)"), bty="n", cex=0.8)
plot(x, h1, type="l", xlab = "", xaxs="i",
ylab = "", yaxs="i", bty="L", ylim=ylim)
mtext(side=3,line=0.75, "B", adj=-0.2)
lines(x, h3, col="grey60")
mtext(side=1, line=2.5, "No. of SEMs from mean")
mtext(side=2, line=2.5, "Probability density")
legend(topleft[1], topleft[2], col=c("black","grey60"),
lty=c(1,1), legend=c("Normal","t (3 d.f.)"), bty="n", cex=0.8)
if(device!="")dev.off()
}
g4.2 <-
function(cump=0.975, device=""){
if(device!="")hardcopy(width=4.5, height=2.2, pointsize=8,
device=device)
oldpar <- par(mfrow = c(1,2), mar=par()$mar-c(.5, 0, 1.5, 0), tcl=-0.4,
pty="s", mgp=c(2.5, .75, 0))
on.exit(par(oldpar))
x <- seq(from=-3.9, to = 3.9, length.out = 50)
ylim <- c(0, dnorm(0))
plotfun <- function(cump, dfun = dnorm, qfun=qnorm,
ytxt = "Normal probability density",
txt1="qnorm", txt2="")
{
h <- dfun(x)
plot(x, h, type="l", xlab = "", xaxs="i", xaxt="n",
ylab = ytxt, yaxs="i", bty="L", ylim=ylim)
axis(1, at=c(-2, 0), cex=0.8)
axis(1, at=c((-3):3), labels=F)
tailp <- 1-cump
z <- qfun(cump)
ztail <- pretty(c(z,4),20)
htail <- dfun(ztail)
polygon(c(z,z,ztail,max(ztail)), c(0,dfun(z),htail,0), col="gray")
text(0, 0.5*dfun(z)+0.08*dfun(0),
paste(round(tailp, 3), " + ", round(1-2*tailp,3),
"\n= ", round(cump, 3), sep=""), cex=0.8)
lines(rep(z, 2), c(0, dfun(z)))
lines(rep(-z, 2), c(0, dfun(z)), col="gray60")
chh <- par()$cxy[2]
arrows(z, -2*chh,z,-0.1*chh, length=0.1, xpd=T)
text(z, -3*chh, paste(txt1, "(", cump, txt2, ")", "\n= ",
round(z,2), sep=""), xpd=T)
x1 <- z + .3
y1 <- dfun(x1)*0.35
y0 <- dfun(0)*0.2
arrows(-2.75, y0, -x1, y1, length=0.1, col="gray60")
arrows(2.75, y0, x1, y1, length=0.1)
text(-2.75, y0+0.5*chh, tailp, col="gray60")
text(2.75, y0+0.5*chh, tailp)
}
ytxt <- "t probability density (8 d.f.)"
plotfun(cump=cump)
mtext(side=3, line=1.5, "A", adj=-0.2)
ytxt <- "t probability density (8 d.f.)"
plotfun(cump=cump, dfun=function(x)dt(x, 8),
qfun=function(x)qt(x, 8),
ytxt=ytxt, txt1="qt", txt2=", 8")
mtext(side=3, line=1.5, "B", adj=-0.2)
if(device!="")dev.off()
}
g4.3 <-
function(device="")
{
if(device!="")hardcopy(width=4.75, height=2.5, pointsize=8,
device=device)
if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed")
titl <- paste(
"Second versus first member, for each pair. The first",
"\npanel is for the elastic band data. The second (from",
"\nDarwin) is for plants of the species Reseda lutea")
oldpar <- par(mfrow = c(1, 2), mar = par()$mar - c(.5, -0.5, .5, 0), pty="s",
tcl=-0.4, oma=c(0,0,0,.5),mgp = c(2, 0.5, 0))
on.exit(par(oldpar))
onesamp(dset = pair65, x = "ambient", y = "heated", xlab =
"Amount of stretch (ambient)", ylab =
"Amount of stretch (heated)")
## Data set mignonette holds Darwin's data on Reseda lutea, from
## Darwin, Charles 1877. The Effects of Cross and Self Fertilisation
## in the Vegetable Kingdom. Appleton and Company, New York, p.118.
## Data were in five pots, holding 5,5,5,5,4 pairs of plants
## respectively.
onesamp(dset = mignonette, x = "self", y = "cross", xlab =
"Height of Self-fertilised Plant", ylab =
"Height of Cross-Fertilised Plant", dubious = 0)
cat("\n", titl, "\n")
if(device!="")dev.off()
invisible()
}
g4.4 <-
function(device="")
{
if(device!="")hardcopy(width=2.4, height=2.4, pointsize=8,
device=device)
oldpar<-par(mar=c(4.1,4.1,1.1,1.1), mfrow=c(1,1), mgp=c(2.5,0.75,0))
on.exit(par(oldpar))
if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed")
x2 <- chisq.test(rareplants)
x2E <- stack(data.frame(t(x2$expected)))
habitat <- rep(c(1,2,3), 4)
plot(x2E$values ~ habitat, xlab="habitat", ylab="Expected Number of Species",
tcl=-0.4, axes=FALSE, xlim=c(0.5, 3.5), pch=16)
text(x2E$values ~ habitat, labels=x2E$ind, pos=rep(c(4,4,2,2),3),
cex=0.85)
axis(1, at=seq(1,3), labels=c("D", "W", "WD"))
axis(2)
box()
if(device!="")dev.off()
invisible()
}
g4.5 <-
function(device="")
{
if(device!="")hardcopy(width=4, height=1.75, pointsize=c(9,5),
device=device, trellis=TRUE)
if(!require(lattice))return("Package 'lattice' is not installed -- cannot proceed")
tomato <-
data.frame(weight=
c(1.5, 1.9, 1.3, 1.5, 2.4, 1.5, # water
1.5, 1.2, 1.2, 2.1, 2.9, 1.6, # Nutrient
1.9, 1.6, 0.8, 1.15, 0.9, 1.6), # Nutrient+24D
trt = rep(c("water", "Nutrient", "Nutrient+24D"),
c(6, 6, 6)))
tomato$trt <- relevel(tomato$trt, ref="water")
gph <- with(tomato, stripplot(trt~weight, aspect=0.35,
scale=list(tck=0.6)))
print(gph)
figtxt <- paste("Weights of tomato plants"
)
cat(figtxt,"\n")
if(device!="")dev.off()
invisible()
}
g4.6 <-
function(device=""){
if(device!="")hardcopy(width=4.2, height=2.0, device=device)
tomato <-
data.frame(weight=
c(1.5, 1.9, 1.3, 1.5, 2.4, 1.5, # water
1.5, 1.2, 1.2, 2.1, 2.9, 1.6, # Nutrient
1.9, 1.6, 0.8, 1.15, 0.9, 1.6), # Nutrient+24D
trt = rep(c("water", "Nutrient", "Nutrient+24D"),
c(6, 6, 6)))
tomato$trt <- relevel(tomato$trt, ref="water")
tomato.aov <- aov(weight~trt, data=tomato)
onewayPlot(obj=tomato.aov)
if(device!="")dev.off()
}
g4.7A <-
function(figtxt = "Stripchart of rice data", mc
= F, device="")
{
if(device!="")hardcopy(width=3, height=2.4, pointsize=8,
device=device)
oldpar<-par(mar=par()$mar+c(-0.5,5,-1,0), mfrow=c(1,1))
on.exit(par(oldpar))
if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed")
if(!require(lattice))return("Package 'lattice' is not installed -- cannot proceed")
if(mc) {
figtxt <- "Simulated Data"
av <- mean(take$root)
sd <- sqrt(var(take$root))
take$root <- rnorm(dim(take)[1], av, sd)
}
lev <- c("F10", "NH4Cl", "NH4NO3", "F10 +ANU843", "NH4Cl +ANU843",
"NH4NO3 +ANU843")
rice$trt <- factor(rice$trt, levels=lev)
gph <- dotplot(trt ~ ShootDryMass, pch=1, cex=1, las=2,
xlab="Shoot dry mass (g)", data=rice,
panel=function(x,y,...){panel.dotplot(x,y,...)
av <- sapply(split(x,y),mean);
ypos <- unique(y)
lpoints(ypos~av, pch=3, col="black", cex=1.25)},
main=list("A:", cex=1.25, just="left", x=0.1, font=1))
print(gph)
if(device!="")dev.off()
invisible()
}
g4.7B <-
function(figtxt = "Interaction plot of rice data", mc
= F, device="")
{
if(device!="")hardcopy(width=4.5, height=2, pointsize=7,
device=device)
par(mar = c(4.6,4.1,3.1,2.6), mgp=c(2.25,0.5,0))
with(rice, interaction.plot(fert, variety, ShootDryMass,
legend = FALSE, xlab="Level of first factor"))
xleg <- par()$usr[2]
yleg <- par()$usr[4] - 0.75 * diff(par()$usr[3:4])
ylabs <- levels(rice$variety)
leginfo <- legend(xleg, yleg, bty = "n", legend = ylabs,
col = 1, lty = 2:1, xjust = 1, cex = 0.8)$rect
chh <- par()$cxy[2]
text(leginfo$left + 0.5 * leginfo$w, leginfo$top, " variety",
adj = 0.5, cex = 0.8)
mtext(side=3, line=1.25, adj=-0.15, "B:")
if(device!="")dev.off()
invisible()
}
g4.8 <-
function(mc = F, type="box", device="")
{
if(device!="")hardcopy(width=2, height=2, pointsize=8,
device=device)
oldpar <- par(mar = c(4.1,4.1,1.1,1.1), pty="s", mgp=c(2.5,0.75,0))
on.exit(par(oldpar))
figtxt <- "Distance travelled, relative to distance up a 20 degree ramp."
if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed")
plot(distance.traveled ~ starting.point, data=modelcars,
xlim=c(0,12.5), tcl=-0.35, xaxs="i",
xlab = "Distance up ramp (cm)", ylab="Distance traveled (cm)")
if(device!="")dev.off()
invisible()
}
g4.9 <-
function(x1=two65$ambient, x2=two65$heated, nsim=2000, plotit=T,
fignum="4.10", device=""){
if(device!="")hardcopy(width=2.25, height=2.25, device=device)
oldpar<-par(mar=par()$mar-c(1,0,1,0), tcl=-0.3)
on.exit(par(oldpar))
if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed")
n1 <- length(x1)
n2<-length(x2)
n<-n1+n2
x<-c(x1,x2)
dbar <- mean(x2)-mean(x1)
z <- array(,nsim)
for(i in 1:nsim){
mn <- sample(n,n2,replace=FALSE)
dbardash <- mean(x[mn]) - mean(x[-mn])
z[i] <- dbardash
}
pval <- (sum(z>abs(dbar)) + sum (z < -abs(dbar)))/nsim
if(plotit){plot(density(z), xlab="", main="", yaxs="i",
ylim=c(0,0.08), cex.axis=0.8)
abline(v=dbar)
abline(v=-dbar, lty=2)
mtext(side=3,line=0.5,
text=expression(bar(italic(x))[2]-bar(italic(x))[1]),
at=dbar)
mtext(side=3,line=0.5,
text=expression(-(bar(italic(x))[2]-
bar(italic(x))[1])), at=-dbar)}
print(signif(pval,3))
## Second try
for(i in 1:nsim){
mn <- sample(n,n2,replace=FALSE)
dbardash <- mean(x[mn]) - mean(x[-mn])
z[i] <- dbardash
}
pval <- (sum(z>abs(dbar)) + sum (z < -abs(dbar)))/nsim
if(plotit){lines(density(z),lty=5,lwd=2)
print(signif(pval,3))
}
if(device!="")dev.off()
}
pkgs <- c("DAAG","grid","lattice")
z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)
if(any(!z)){
notAvail <- paste(names(z)[!z], collapse=", ")
warning(paste("The following packages should be installed:", notAvail))
}
g4.1()
g4.2()
g4.3()
##
## heated 14.6 16.28 6.103
##
## One Sample t-test
##
## data: d
## t = 3.113, df = 8, p-value = 0.01438
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.642 11.025
## sample estimates:
## mean of x
## 6.333
##
## cross 4.719 3.13 5.918
##
## One Sample t-test
##
## data: d
## t = 2.117, df = 23, p-value = 0.0453
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.05823 5.05635
## sample estimates:
## mean of x
## 2.557
##
##
## Second versus first member, for each pair. The first
## panel is for the elastic band data. The second (from
## Darwin) is for plants of the species Reseda lutea
g4.4()
g4.5()
## Weights of tomato plants
g4.6()
## [1] 1 1 1 1
g4.7A()
g4.7B()
g4.8()
g4.9()
## [1] 0.0565
## [1] 0.059