Chapter 3: Statistical Models
Packages required: “DAAG”
The script that follows is designed to be executed as it stands. It sets up
functions that create Ch 3 graphs, then runs those functions. If executed by
clicking on the RStudio “Compile Notebook …” command, it will be processed
through R Markdown, generating a document that includes code, output and
graphs generated by executing the functions. If some needed packages are
missing, warning messages will appear. See further:
http://rmarkdown.rstudio.com/r_notebook_format.html
g3.1 <-
function(fignum=3.2, device="")
{
if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed")
if(device!="")hardcopy(width=2, height=2, device=device)
oldpar <- par(mar = c(4.1,4.1,1.1,1.1), xaxs="i", yaxs="i",
mgp = c(2.5, 0.5, 0), pty="s")
on.exit(par(oldpar))
plot(depression~weight, data = roller,
xlim=c(0,13), ylim=c(0,32), tcl=-0.3,
xlab = "Weight of roller (t)", ylab = "Depression (mm)",
pch = 16)
abline(0, 2.5, lty=2)
abline(-5, 2.75)
figtxt <- paste("Depression (mm), versus weight of roller.", sep = "")
cat("\n",figtxt,"\n")
if(device!="")dev.off()
}
g3.2 <-
function(y = roller$depression, x = roller$weight,
device="")
{
if(device!="")hardcopy(width=4.25, height=2.25, device=device)
oldpar <- par(mfrow = c(1, 2),
mar = c(4.1,4.6,1.6,1.1),
mgp = c( 2.5, 0.75, 0),oma=c(0,0,0,0),pty="s")
on.exit(par(oldpar))
pretext <- c(reg = "A", lo = "B")
for(curve in c("reg", "lo")) {
plot(x, y, xlab = "Roller weight (t)", ylab =
"Depression in lawn (mm)", pch = 4, tcl=-0.3)
mtext(side = 3, line = 0.25, pretext[curve], adj = 0)
topleft <- par()$usr[c(1, 4)]
chw <- strwidth("O")
chh <- strheight("O")
points(topleft[1]+rep(0.75,2)*chw,topleft[2]-c(0.5,1.3)*chh,
pch=c(4,1))
text(topleft[1]+rep(1.25,2)*chw, topleft[2]-c(0.5,1.3)*chh,
c("Data values", "Fitted values"),adj=0,cex=0.65)
text(topleft[1]+1.25*chw, topleft[2]-2*chh,"(smooth)",adj=0,cex=.65)
if(curve[1] == "reg") {
u <- lm(y ~ -1 + x)
abline(0, u$coef[1])
yhat <- predict(u)
}
else {
lawn.lm<-lm(y~x+I(x^2))
yhat<-predict(lawn.lm)
xnew<-pretty(x,20)
b<-lawn.lm$coef
ynew<-b[1]+b[2]*xnew+b[3]*xnew^2
lines(xnew,ynew)
}
here <- y < yhat
yyhat <- as.vector(rbind(y[here], yhat[here], rep(NA, sum(here))))
xx <- as.vector(rbind(x[here], x[here], rep(NA, sum(here))))
lines(xx, yyhat, lty = 2, col="gray")
here <- y > yhat
yyhat <- as.vector(rbind(y[here], yhat[here], rep(NA, sum(here))))
xx <- as.vector(rbind(x[here], x[here], rep(NA, sum(here))))
lines(xx, yyhat, lty = 1, col="gray")
n <- length(y)
ns <- min((1:n)[y - yhat >= 0.75*max(y - yhat)])
ypos <- 0.5 * (y[ns] + yhat[ns])
chw <- par()$cxy[1]
text(x[ns] - 0.25*chw, ypos, "+ve residual", adj = 1,cex=0.65, col="gray30")
points(x, yhat, pch = 1)
ns <- (1:n)[y - yhat == min(y - yhat)][1]
ypos <- 0.5 * (y[ns] + yhat[ns])
text(x[ns] + 0.4*chw, ypos, "-ve residual", adj = 0,cex=0.65,col="gray30")
}
titl <- "Lawn depression (mm) versus roller weight (t)."
titl <- paste(titl,
"\nIn (a) a line has been fitted, while in (b) the",
"\nloess method was used to fit a smoothed curve.",
"\nResiduals (the `rough') appear as vertical lines. ",
"\nPositive residuals are black lines, while negative ",
"\nresiduals are dotted.")
if(device!="")dev.off()
invisible()
}
g3.3 <-
function(device="")
{
if(device!="")hardcopy(width=2, height=2, device=device)
oldpar <- par(mar = c(4.1,4.1,1.1,1.1), mgp=c(2.5,0.5,0), cex.axis=0.85)
on.exit(par(oldpar))
z <- seq(-3,3,length=101)
plot(z, dnorm(z), type="l", ylab="normal density", yaxs="i", bty="L", tcl=-0.3,
xlab="Distance, in SDs, from the mean")
polygon(c(z[z <= 1.0],1.0),c(dnorm(z[z <= 1.0]), dnorm(-3)), col="grey")
chh <- par()$cxy[2]
arrows(-1.8, 0.32, -0.25, 0.2, length=0.07, xpd=T)
cump <- round(pnorm(1), 3)
text(-1.8, 0.32+0.75*chh, paste("pnorm(1)\n", "=", cump), xpd=T, cex=0.8)
if(device!="")dev.off()
invisible()
}
g3.4 <-
function(mu = 10, sigma = 1, n = 1, m = 50, four = 4, nrep = 5,
seed=21, fignum=3.5, totrows=1, device="")
{
if(device!="")hardcopy(width=5.2, height=1.4, device=device,
pointsize=8)
if(!is.null(seed))set.seed(seed)
oldpar <- par(mgp = c(2.25, 0.5, 0),mar=par()$mar-c(1,0,1.5,0),
pty="s")
on.exit(par(oldpar))
if(is.null(totrows))
totrows <- floor(sqrt(nrep))
totcols <- ceiling(nrep/totrows)
z <- range(pretty(mu + (c(-3.4, 3.4) * sigma), 50))
xy <- data.frame(x=rep(0,nrep),y=rep(0,nrep),n=rep(n,nrep),
mm=rep(m,nrep),four=rep(four,nrep))
fac <- factor( paste("Simulation", 1:nrep),
lev <- paste("Simulation", 1:nrep) )
xlim<-z
## ylim<-c(0,dnorm(0)*sqrt(n))
ylim <- c(0,1)
xy <- split(xy,fac)
xy<-lapply(1:length(xy),function(i){c(as.list(xy[[i]]), list(xlim=xlim,
ylim=ylim))})
panel.mean <- function(data, mu = 10, sigma = 1, n2 = 1,
mm = 100, nrows, ncols, ...)
{
vline <- function(x, y, lty = 1, col = 1)
{
lines(c(x, x), c(0, y), lty = lty, col = col)
}
n2<-data$n[1]
mm<-data$mm[1]
our<-data$four[1]
## Four characters in each unit interval of x
nmid <- round(four * 4)
nn <- array(0, 2 * nmid + 1)
#########################################
z <- mu+seq(from=-3.4*sigma, to=3.4*sigma, length=mm)
atx<-pretty(z)
qz <- pnorm((z - mu)/sigma)
dz <- dnorm((z - mu)/sigma)
chw <- sigma/four
chh <- strheight("O")*0.75
htfac <- (mm * chh)/four
if(nrows==1&&ncols==1)
lines(z, dz * htfac)
if(nrows==1)axis(1,at=atx)
y <- rnorm(mm, mu, sigma/sqrt(n2))
pos <- round((y - mu)/sigma * four)
for(i in 1:mm) {
nn[nmid + pos[i]] <- nn[nmid + pos[i]] + 1
xpos <- chw * pos[i]
text(mu + xpos, nn[nmid + pos[i]] * chh - chh/4, "x")
}
}
panelplot(xy,panel=panel.mean,totrows=totrows,totcols=totcols,
oma=rep(2.5,4))
if(device!="")dev.off()
}
g3.5 <-
function(device="")
{
if(device!="")hardcopy(width=4.25, height=2, device=device,
trellis=TRUE, color=FALSE, pointsize=c(7,3))
trellis.par.set(layout.heights=list(top.padding=0, main=0,
xlab=0, axis.xlab.padding=0, bottom.padding=0),
plot.symbol=list(cex=0.5))
sampdist(sampsize = c(3, 9, 30), seed = 23, nsamp = 1000, FUN = mean,
sampvals = function(n) exp(rnorm(n, mean = 0.5, sd = 0.3)),
tck = 0.6, plot.type = "density", layout = c(3,1))
if(device!="")dev.off()
}
g3.6 <-
function(mu = 10, sigma = 1, n = 1, m = 50, four = 4, nrep = 5,
seed=21, fignum=3.6, totrows=1, device="")
{
if(device!="")hardcopy(width=5.2, height=2, trellis=TRUE, color=FALSE,
device=device, pointsize=c(8,6))
if(!is.null(seed))set.seed(seed)
if(is.null(totrows))
totrows <- floor(sqrt(nrep))
totcols <- ceiling(nrep/totrows)
z <- range(pretty(mu + (c(-3.4, 3.4) * sigma), 50))
xlim<-z
xy <- data.frame(y = rnorm(50*nrep, 10, 1), fac=factor(rep(1:nrep,
rep(50, nrep))))
qq <- qqmath(~y|fac, data=xy, par.strip.text=list(cex=0), layout=c(5,1),
xlab="",ylab="", aspect=1, cex=0.5, scales=list(tck=0.4))
print(qq)
if(device!="")dev.off()
}
g3.7 <-
function(device=""){
if(device!="")hardcopy(width=4.5, height=2.6, trellis=T,
device=device)
if(!require(DAAG))return("Package 'DAAG' is not installed -- cannot proceed")
with(pair65,
qreference(test = heated-ambient, nrep=8, nrows=2, xlab="")
)
if(device!="")dev.off()
}
g3.8 <-
function(device="")
{
if(device!="")hardcopy(width=4.25, height=2, device=device,
trellis=TRUE, color=FALSE, pointsize=c(7,3))
trellis.par.set(layout.heights=list(top.padding=0, main=0,
xlab=0, axis.xlab.padding=0, bottom.padding=0),
plot.symbol=list(cex=0.5))
sampdist(sampsize = c(3, 9, 30), seed = 23, nsamp = 1000, FUN = mean,
sampvals = function(n) exp(rnorm(n, mean = 0.5, sd = 0.3)),
tck = 0.6, plot.type = "qq", layout = c(3,1))
if(device!="")dev.off()
}
pkgs <- c("DAAG")
z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)
if(any(!z)){
notAvail <- paste(names(z)[!z], collapse=", ")
warning(paste("The following package should be installed:", notAvail))
}
g3.1()
##
## Depression (mm), versus weight of roller.
g3.2()
g3.3()
g3.4()
g3.5()
g3.6()
g3.7()
g3.8()