Chapter 5: Regression with a Single Predictor
Packages required: “DAAG”, “grid”, “lattice”, “MCMCpack”, “boot”
The script that follows is designed to be executed as it stands. It sets up
functions that create Ch 5 graphs, then runs those functions. If executed by
clicking on the RStudio “Compile Notebook …” command, it will be processed
through R Markdown, generating a document that includes code, output and
graphs generated by executing the functions. If some needed packages are
missing, warning messages will appear. See further:
http://rmarkdown.rstudio.com/r_notebook_format.html
g5.1 <-
function(y = roller$depression, x = roller$weight, curve = c("reg"),
fignum=5.2, device="")
{
if(device!="")hardcopy(width=3.8, height=2.1,
device=device)
titl <- paste("Diagnostic plots for the regression of Figure 5.1:",
"\nPanel (A) plots residuals against fitted values.",
"\nPanel (B) is a normal probability plot of residuals.",
"\nIf residuals follow a normal distribution",
"the points \nshould fall close to a line.")
oldpar <- par(mfrow = c(1, 2), mgp = c(2.25, 0.5, 0), pty="s",
mar = par()$mar - c(1, 0.5, 0.5, 0), tcl=-0.35)
on.exit(par(oldpar))
u <- lm(depression ~ weight, data = roller)
res <- resid(u)
hat <- fitted(u)
plot(hat, res, xlab = "Fitted values", ylab = "Residuals", pch = 16)
mtext(side = 3, line = 0.25, "A", adj = 0)
abline(h = 0, lty = 2)
qqnorm(res, xlab = "Quantiles of Normal Distribution", ylab =
"Ordered data values", pch = 16, cex.main=1.15)
mtext(side = 3, line = 0.25, "B", adj = 0)
cat(titl, "\n")
if(device!="")dev.off()
invisible(u)
}
g5.2 <-
function(device=""){
if(device!="")hardcopy(width=5, height=3, trellis=TRUE,
device=device, pointsize=c(9,5))
`qreference` <-
function (test = NULL, m = 50, nrep = 6,
distribution = function(x) qnorm(x, mean = ifelse(is.null(test), 0, mean(test)), sd = ifelse(is.null(test),
1, sd(test))),
seed = NULL, nrows = NULL, cex.strip = 0.75,
xlab = NULL, ylab = NULL)
{
library(lattice)
if (!is.null(seed))
set.seed(seed)
if (!is.null(test)) {
testnam <- deparse(substitute(test))
m <- length(test)
av <- mean(test)
sdev <- sd(test)
fac <- factor(c(rep(testnam, m), paste("reference", rep(1:(nrep -
1), rep(m, (nrep - 1))))))
fac <- relevel(fac, ref = testnam)
}
if (is.null(nrows))
nrows <- floor(sqrt(nrep))
ncols <- ceiling(nrep/nrows)
if (is.null(test)) {
xy <- data.frame(y = distribution(runif(m * nrep)), fac = factor(rep(1:nrep,
rep(m, nrep))), id = factor(rep(1, m * nrep)))
colpch <- c("black")
qq <- qqmath(~y | fac, data = xy, par.strip.text = list(cex = 0),
distribution = distribution, layout = c(ncols, nrows),
xlab = "", ylab = "", aspect = 1, pch = 16)
}
else {
if (length(test) > 0) {
yy <- quantile(test, c(0.25, 0.75))
xx <- distribution(c(0.25, 0.75))
r <- diff(yy)/diff(xx)
}
xy <- data.frame(y = c(test, distribution(runif(m * (nrep -
1)))), fac = fac, id = factor(rep(1:2, c(m, m * (nrep -
1)))))
colpch <- c("black")
if (is.null(xlab))
xlab <- paste("Quantiles of", deparse(substitute(distribution)))
if (is.null(ylab))
ylab <- ""
qq <- qqmath(~y | fac, data = xy, layout = c(ncols, nrows),
groups = id, aspect = 1, xlab = xlab, ylab = "",
scales=list(tck=0.5),
pch = 16, distribution = distribution,
par.strip.text = list(cex = cex.strip))
}
print(qq)
}
if(!exists("roller.lm"))roller.lm <- lm(depression ~ weight, data=roller)
qreference(residuals(roller.lm), nrep=8, nrows=2, xlab="")
if(device!="")dev.off()
}
g5.3 <-
function(y = ironslag$chemical, x = ironslag$magnetic, device="")
{
if(device!="")hardcopy(width=3.3, height=3.3,
device=device, pointsize=8)
oldpar <- par(mfrow = c(2, 2), mar = c(4.6, 4.1, 2.1,1.6), pty="s",
mgp = c(2.5, 0.75, 0), cex = 0.85, mex = 0.85)
on.exit(par(oldpar))
mtext3 <- function(item="A", txt=leg[1],
xleft=par()$usr[1]){
mtext(side = 3, line = 0.75, item, adj = -0.175, cex=0.9)
mtext(side = 3, line = 0.25, txt, cex = 0.75, at=xleft, adj = 0)
}
titl <- paste(reg =
"Chemical vs Magnetic Test of Iron Content in Slag.",
"\nThe fitted curves used the Splus loess smooth.",
"\nIn (d) the downward slope suggests a lower variance",
"\nfor larger fitted values."
)
leg <- c("Fitted line and loess curve",
"Residuals from line, with smooth",
"Observed, vs prediction from line",
"Is variance constant about line?")
plot(x, y, xlab = "Magnetic", ylab = "Chemical", type="n")
u <- lm(y ~ x)
abline(u$coef[1], u$coef[2])
yhat <- predict(u)
lines(x, yhat)
mtext3(xleft=par()$usr[1]+0*par()$cxy[1])
print(panel.smooth(x, y, span = 0.95, lty = 2, lwd = 1.5, pch=0))
res <- residuals(u)
plot(x, res, xlab = "Magnetic", ylab = "Residual", type = "n")
mtext3(item="B", txt=leg[2], xleft=par()$usr[1]+0*par()$cxy[1])
print(panel.smooth(x, res, span = 0.95))
points(x, res, pch = 1, cex = 0.9, lwd = 1)
hat <- fitted(u)
plot(hat, y, xlab = "Predicted chemical", ylab = "Chemical",
type = "n")
print(panel.smooth(hat, y, span = 0.95))
mtext3(item="C", txt=leg[3], xleft=par()$usr[1]+0*par()$cxy[1])
yabs <- sqrt(abs(res))
plot(hat, yabs, xlab = "Predicted chemical", ylab = "", type = "n")
mtext(side = 2, line = 2.5, expression(sqrt(abs(residual))))
mtext3(item="D", txt=leg[4], xleft=par()$usr[1]+0*par()$cxy[1])
print(panel.smooth(hat, yabs, span = 0.95))
cat(titl, "\n")
if(device!="")dev.off()
invisible()
}
g5.4 <-
function(form=chemical ~ magnetic, data=ironslag, device="")
{
if(device!="")hardcopy(width=3.8, height=1.8, device=device)
library(DAAG)
oldpar <- par(mfrow = c(1, 2), mar = c(4.6, 4.1,2.1,1.6), tcl=-0.35,
pty="s", mgp = c( 2.25, 0.5, 0))
on.exit(par(oldpar))
leg <- c("Residuals from fitted lowess curve.",
"Is variance about curve constant?")
yvar <- all.vars(form)[1]
xvar <- all.vars(form)[2]
x <- data[,xvar]
y <- data[,yvar]
xy <- lowess(y ~ x)
chemfit <- approx(xy$x, xy$y, xout=x, ties="ordered")$y
resval <- y - chemfit
par(mfrow=c(1,2))
sqrtabs2 <- sqrt(abs(resval))
plot(resval ~ x, xlab = "Magnetic", ylab = "Residual", pch = 1)
abline(h=0, lty=2)
mtext(side = 3, line = 0.75, "A", adj=-0.125, cex=0.9)
mtext(side = 3, line = 0.25, leg[1], adj = 0, cex=0.75)
plot(sqrtabs2 ~ chemfit, xlab = "Predicted chemical", ylab =
expression(sqrt(abs(residual))), type = "n")
panel.smooth(chemfit, sqrtabs2, cex = 1.0)
mtext(side = 3, line = 0.75, "B", adj=-0.125, cex=0.9)
mtext(side = 3, line = 0.25, leg[2], adj = 0, cex=0.75)
if(device!="")dev.off()
invisible()
}
g5.5 <-
function(y = softbacks$weight, x = softbacks$volume, curve = c("reg"),
show.fits = T, device="")
{
if(device!="")hardcopy(width=2.25, height=2.25,
device=device)
titl <- switch(curve[1],
reg = paste("Weight versus volume for softcover books,",
"\nwith fitted line."),
lo = paste(
"Weight versus volume for softcover books,",
"\nwith fitted line and S-PLUS loess smooth curve."))
oldpar <- par(mar = c(4.1,4.1,1.1,1.1), mgp = c(2.5, 0.5, 0),
pty="s", tcl=-0.35)
on.exit(par(oldpar))
u <- lm(weight ~ volume, data = softbacks)
cat("\nCoefficients\n\n")
options(digits=3)
print(summary(u)$coef)
cat("\n\n")
print(anova(u))
yhat <- predict(u)
r <- cor(x, y)
xlim <- range(x)
xlim[2] <- xlim[2]+diff(xlim)*0.08
plot(x, y, xlab = "Volume (cc)", xlim=xlim,
ylab = "Weight (g)", pch = 1, ylim = range(c(y, yhat)))
if(match("reg", curve, nomatch = 0)) {
abline(u$coef[1], u$coef[2], lty = 1)
z <- summary(u)$coef
if(show.fits) {
points(x, yhat, pch = 1, cex = 0.75)
res <- resid(u)
for(i in 1:length(res)) {
resi <- res[i]
izzy <- as.numeric(resi > 0)
xi <- x[i]
yhati <- yhat[i]
yi <- y[i]
lines(rep(xi, 2), c(yhati, yi),
col=2-izzy, lty=2-izzy)
eps <- par()$cxy[1] * 0.2
if(i == 6) {
adji <- 1
eps <- - eps
}
else adji <- 0
text(xi + eps, yhati + resi/2, paste(round(resi, 1)),
adj = adji, cex = 0.65)
}
}
bottomright <- par()$usr[c(2, 3)]
chw <- par()$cxy[1]
chh <- par()$cxy[2]
btxt <- c(paste("a =", format(round(z[1, 1], 3)),
" SE =", format(round(z[1, 2], 3))),
paste("b =", format(round(z[2, 1], 3)),
" SE =", format(round(z[2, 2], 3))))
legend(bottomright[1], bottomright[2],
legend=btxt, xjust=1, yjust=0, cex=0.8, bty="n")
}
cat("\n",titl,"\n")
if(device!="")dev.off()
invisible(u)
}
g5.6 <-
function(device=""){
if(device!="")hardcopy(width=5.5, height=1.4, device=device)
oldpar <- par(mfrow=c(1,4), mar=c(4.1,4.1,2.1,0.6),
mgp=c(2.0,.5,0),oma=c(0,0,0,1), pty="s")
if(!require(DAAG))return("The package 'DAAG' must be installed.")
softbacks.lm<-lm(weight~volume,data=softbacks)
on.exit(par(oldpar))
plot(softbacks.lm, which=1:4, caption=
c("A: Residuals vs Fitted",
"B: Normal Q-Q", "C: Scale-Location",
"D: Cook's distance"))
figtxt <- "Diagnostic plots for previous figure."
cat(figtxt, "\n")
if(device!="")dev.off()
invisible(softbacks.lm)
}
g5.7 <-
function(y = roller$depression, x = roller$weight, pred = T,
device="")
{
if(device!="")hardcopy(width=2, height=2, device=device)
titl <- paste("Lawn Depression, for Various Weights of Roller,",
"\nwith fitted line and showing 95% pointwise",
"\nconfidence bounds for points on the fitted line.")
oldpar <- par(mar = c(4.1, 4.1, 1.1, 1.1),
mgp = c(2.5, 0.5, 0), pty="s", tcl=-0.35)
on.exit(par(oldpar))
xlim <- c(0, max(x))
ylim <- c(0, max(y))
plot(x, y, xlab = "Weight of Roller (tonnes)", xlim=xlim, ylim=ylim,
ylab = "Depression in Lawn (mm)", pch = 16)
u <- lm(depression ~ weight, data = roller)
abline(u$coef[1], u$coef[2], lty = 1)
z <- summary(u)$coef
y12 <- par()$usr[3:4]
if(pred) {
assign("xy", data.frame(weight = pretty(roller$weight,
20)))
hat <- predict(u, newdata = xy, interval="confidence")
ci<-data.frame(lower=hat[,"lwr"],upper=hat[,"upr"])
here <- ci$lower >= y12[1]
lines(xy$weight[here], ci$lower[here], lty = 2,lwd=2,col="grey")
here <- ci$upper < y12[2]
lines(xy$weight[here], ci$upper[here], lty = 2,lwd=2,col="grey")
}
cat(titl, "\n")
if(device!="")dev.off()
invisible(u)
}
g5.8 <-
function(color = F, device="")
{
if(device!="")hardcopy(width=3.4, height=2, device=device)
figtxt<-paste("Two rubber band experiments, with different ranges",
"\nof x-values. The dashed curves are pointwise 95%",
"\nconfidence bounds for the fitted line.")
oldpar <- par(tcl=-0.35, pty="s")
on.exit(par(oldpar))
panelci<-function(data,...)
{
nrows<-list(...)$nrows
ncols<-list(...)$ncols
if(ncols==1)axis(2)
if(ncols==1)axis(1) else axis(3)
x<-data$stretch; y<-data$distance
u <- lm(y ~ x)
upred <- predict(u, interval="confidence")
ci <- data.frame(fit=upred[,"fit"],lower=upred[,"lwr"],
upper=upred[,"upr"])
ord<-order(x)
lines(x[ord], ci$fit[ord], lty=1, lwd=2)
lines(lowess(x[ord], ci$upper[ord]), lty=2, lwd=2, col="grey")
lines(lowess(x[ord], ci$lower[ord]), lty=2, lwd=2, col="grey")
}
xy<-rbind(elastic2,elastic1)
nam <- c("Range of stretch 30-60 mm","Range of stretch 42-54 mm")
trial<-rep(nam, c(dim(elastic2)[1],dim(elastic1)[1]))
xlim<-range(elastic2$stretch)
ylim<-range(elastic2$distance)
xy<-split(xy,trial)
xy<-lapply(1:length(xy),function(i){c(as.list(xy[[i]]), list(xlim=xlim,
ylim=ylim))})
# xy<-lapply(xy,function(z){z$xlim<-xlim; z$ylim<-ylim; z})
names(xy) <- nam
panelplot(xy,panel=panelci,totrows=1,totcols=2,
par.strip.text=list(cex=.8), oma=c(4,4,2.5,2))
cat(figtxt, "\n")
mtext(side = 2, line = 2, "Distance moved (cm)", outer=TRUE)
mtext(side=1,line=3.5,"Amount of stretch (mm)")
if(device!="")dev.off()
}
g5.9 <-
function(df = houseprices, m = 3, x = "area", y = "sale.price", seed=47,
dots=F, coltypes = c("gray40","gray40","gray20"), device="")
{
if(device!="")hardcopy(width=2.25, height=2.25, device=device)
if(!is.null(seed))set.seed(seed)
oldpar<-par(mar=c(4.6, 4.6, 2.1, 1.1), mgp=c(2.5, 0.75, 0),
pty="s", tcl=-0.35)
on.exit(par(oldpar))
ltypes <- 1:3
ptypes <- 2:4
if(is.null(coltypes))coltypes <- c(2, 3, 6)
options(digits=3)
n <- dim(df)[1]
rand <- sample(n)%%m+1
xv <- df[, x]
yv <- df[, y]
plot(xv, yv, xlab = "Floor area", ylab = "Sale price", type = "n")
xval <- pretty(xv, n = 20)
houseprices.lm <- lm(yv ~ xv, data=df)
print(anova(houseprices.lm))
cat("\n")
sumss<-0
sumdf<-0
par(lwd=2)
for(i in sort(unique(rand))) {
cat("\nfold", i, "\n")
n.in <- (1:n)[rand != i]
n.out <- (1:n)[rand == i]
cat("Observations in test set:", n.out, "\n")
ab <- lm(yv ~ xv, subset = n.in)$coef
z <- xv[n.out]
points(xv[n.out], yv[n.out], col=coltypes[i], pch = ptypes[i], cex = 1.0)
if(dots)
points(xv[n.out], yv[n.out], col = coltypes[i], pch = 16)
pred <- ab[1] + ab[2] * z
resid <- yv[n.out] - pred
xy <- data.frame(rbind(z,pred, yv[n.out], resid
), row.names=c("Floor area","Predicted price",
"Observed price","Residual"))
yval <- ab[1] + ab[2] * xval
lines(xval, yval, lwd = 2, col = coltypes[i], lty=ltypes[i])
num <- length(n.out)
print(xy,collab=rep("",num))
ss <- sum(resid^2)
sumss<-sumss+ss
sumdf<-sumdf+num
ms <- ss/num
cat("\nSum of squares =", round(ss, 2), " Mean square =",
round(ms, 2), " n =", num, "\n")
}
print(c("Overall ms"=sumss/sumdf))
topleft<-par()$usr[c(1,4)] + c(-1, 1.75)*par()$cxy
par(lwd=1, xpd=T)
legend(topleft[1],topleft[2],legend=paste("Fold",1:m," "),pch=ptypes,
lty=ltypes,col=coltypes, cex=0.75, horiz=T, bty="n", xjust=0)
par(col = 1, xpd=F)
if(device!="")dev.off()
}
g5.10 <-
function(device=""){
if(device!="")hardcopy(width=4, height=2, device=device)
oldpar <- par(mar = c(4.1,4.1,1.6,2.1), mgp=c(2.5,0.75,0),
pty="s", tcl=-0.35)
on.exit(par(oldpar))
par(mfrow=c(1,2))
houseprices2.fn<-function (houseprices,index)
{
house.resample<-houseprices[index,]
house.lm<-lm(sale.price~area,data=house.resample)
houseprices$sale.price-predict(house.lm,houseprices)
# resampled prediction errors
}
if(!require(boot))return("The package 'boot' must be installed.")
set.seed(1111)
n<-length(houseprices$area)
R<-200
houseprices.lm<-lm(sale.price~area,data=houseprices)
houseprices2.boot<-boot(houseprices,R=R,statistic=houseprices2.fn)
house.fac<-factor(rep(1:n,rep(R,n)))
plot(house.fac,as.vector(houseprices2.boot$t),
ylab="", xlab="House")
mtext(side=2, line=3, "Prediction Errors", cex=0.9)
mtext(side = 3, line = 0.25, "A", adj = 0)
boot.se <- apply(houseprices2.boot$t,2,sd)
model.se <- predict.lm(houseprices.lm,se.fit=T)$se.fit
plot(boot.se/model.se, ylab="", xlab="House",pch=16)
mtext(side=2, line=2.5, "Ratio of SEs\nBootstrap to Model-Based",
cex=0.9)
mtext(side = 3, line = 0.25, "B", adj = 0)
abline(1,0)
if(device!="")dev.off()
invisible()
}
g5.11 <-
function(sd = 2, npoints=5, nrep=4, slope=0.8,
confint = F, stats = F, device="", type = "")
{
if(device!="")hardcopy(width=4.5, height=2.6, device=device, pointsize=9)
figtxt <- paste("Test for Linear Trend, versus Multiple Comparisons. The",
"\nsix panels are six simulated results from a straight",
"\nline model with slope ", slope,", sd=", sd,
", and ", nrep, " reps per level.", sep="")
oldpar <- par(mfrow = c(2, 3), mar = c(3.1,3.1,2.6, 0.6), oma=c(0.5,0.5,1.25,0),
mgp = c(2, 0.5, 0), xpd=TRUE)
on.exit(par(oldpar))
x <- rep(1:npoints, rep(nrep, npoints))
for(i in 1:6) {
y <- 100 + slope * x + rnorm(20, 0, sd)
if(i<=3)xtxt <- "" else xtxt <- "Treatment level"
if(i%in%c(1,4))ytxt <- "Response" else ytxt <- ""
stripchart(y~x, xlab = xtxt, ylab = ytxt,
vertical=TRUE, cex=1.15, xlim=c(.5,5.5))
u <- lm(y ~ factor(x))
z <- summary.aov(u)
p.aov <- z[[1]][1,"Pr(>F)"]
u <- lm(y ~ x)
z1 <- summary(u)
p.slope <- z1$coef[2, 4]
if(stats)
print(z1)
if(i==1)mtext(side=3, line=2.5, "p-values are -", adj=0, cex=0.8)
mtext(side = 3, line = .25, paste("Linear trend:",
format(p.slope, digits = 2), "\naov: ",
format(p.aov, digits = 2), sep = ""), adj = 1,cex=.8)
}
cat(figtxt, "\n")
if(device!="")dev.off()
invisible()
}
g5.12 <-
function(device=""){
if(device!="")hardcopy(width=2.25, height=2.25, device=device)
oldpar <- par(pty="s", tcl=-0.35)
on.exit(par(oldpar))
simulateLinear(sd = 2, npoints=5, nrep=4, nsets=200, seed=21)
invisible()
if(device!="")dev.off()
}
g5.13 <-
function(tcol = 2, col="gray40", device="")
{
if(device!="")hardcopy(width=4.4, height=3.2, device=device)
titl <- paste("The above panels show (as solid curves) some alternative",
"\npatterns of response. The dotted line shows the power",
"\nfamily transformation that makes the response linear.",
"\nThe formula for this power family transformation appears",
"\nabove the graph."
)
oldpar <- par(mfrow = c(2, 3), pty="s",
mar = par()$mar - c( 1, 1, 1.0, 1),
mgp = c(1.5, 0.5, 0), oma=c(0,1,0,1))
on.exit(par(oldpar))
powerplot(expr="sqrt(x)", xlab="")
powerplot(expr="x^0.25", xlab="", ylab="")
powerplot(expr="log(x)", xlab="", ylab="")
powerplot(expr="x^2")
powerplot(expr="x^4", ylab="")
powerplot(expr="exp(x)", ylab="")
if(device!="")dev.off()
par(mfrow=c(1,1))
invisible()
}
g5.14 <-
function(yvar = "heart", xvar="weight",
species = "Cape fur seal", dset = cfseal,
stats = F, device="")
{
if(device!="")hardcopy(width=2.25, height=2.25, device=device)
library(DAAG)
pretext <- c(heart = "Heart weight", brain = "Brain weight",
liver = "Liver weight", weight="Body weight")[yvar]
xtext <- c(heart = "Heart weight", brain = "Brain weight",
liver = "Liver weight", weight="Body weight")[xvar]
oldpar <- par(mar = c(4.1,4.1,2.1,1.1), mgp=c(2.5,0.75,0),
pty="s", tcl=-0.35)
on.exit(par(oldpar))
x <- log(dset[,xvar])
y <- log(dset[, yvar])
ylim <- range(y)
if (yvar == "heart") ylim<- log(c(82.5,1100))
xlim <- range(x)
if(xvar == "weight") xlim <- log(c(17,180))
ylab <- switch(yvar,
heart = "Heart weight (g, log scale)",
brain = "Brain weight (g, log scale)",
liver = "Liver weight (g, log scale)",
weight = "Body weight (kg, log scale)")
xlab <- switch(xvar,
heart = "Heart weight (g, log scale)",
brain = "Brain weight (g, log scale)",
liver = "Liver weight (g, log scale)",
weight = "Body weight (kg, log scale)")
xtik <- pretty(exp(x),10)
ytik <- pretty(exp(y), 10)
xtik <- xtik[xtik >= exp(xlim[1]) & xtik <= exp(xlim[2])]
ytik <- ytik[ytik >= exp(ylim[1]) & ytik <= exp(ylim[2])]
plot(x, y, xlab = xlab, ylab = ylab, axes = F, xlim =
xlim, ylim = ylim, pch = 16, cex=0.85)
axis(1, at = log(xtik), labels = paste(xtik))
axis(2, at = log(ytik), labels = paste(ytik))
box()
form1 <- formula(y ~ x)
u <- lm(form1, data = dset)
abline(u$coef[1], u$coef[2])
usum <- summary(u)$coef
options(digits=3)
print(usum)
if(yvar == "brain") {
u2 <- lm(form1, data = dset, subset = brain < log(1.7))
redrange <- range(x[y < max(y)])
yval <- u2$coef[1] + u2$coef[2] * redrange
lines(redrange, yval, lty = 2)
if(stats)
print(summary(u2))
}
cwh <- par()$cxy
eqn <- paste("log y =", round(usum[1, 1], 2), " [SE ",
round(usum[1, 2], 3), "] + ", round(usum[2, 1], 3),
" [", round(usum[2, 2], 2), "] log x")
mtext(side=3, line=0.25, eqn, adj = 0.5, cex = 0.75)
figtxt <- paste(pretext, " versus ", xtext,
" for ", length(y), " members \n of the species ",
species, ".", collapse = "")
if(yvar == "logbrain")
figtxt <- paste(figtxt, " The dotted line shows the",
"\neffect of omitting the data point",
"for the largest animal.")
cat(figtxt, "\n")
if(device!="")dev.off()
if(stats)
summary(u)
}
g5.15 <-
function(dset1 = pair65, dset2 = leafshape, device="", col="gray30")
{
if(device!="")hardcopy(width=4.2, height=2.1, device=device)
oldpar <- par(mfrow = c(1, 2), mar = c(4.1,4.1,2.1,2.1), mgp=c(2.5,.75,0),
pty="s", tcl=-0.35)
library(DAAG)
on.exit(par(oldpar))
x <- dset1$ambient
y <- dset1$heated
ylab <- "Stretch (heated band)"
plot(x, y, xlab = "Stretch (band held at ambient)",
ylab = ylab, pch = 16)
topleft <- par()$usr[c(1, 4)] + c(0.5, -0.5) * par()$cxy
text(topleft[1], topleft[2], paste("r =", round(cor(x, y), 2)),
adj = 0)
mtext(side = 3, line = 0.4, "A", adj = 0)
u1 <- lm(heated ~ ambient, data = dset1)
abline(u1$coef[1], u1$coef[2])
u2 <- lm(ambient ~ heated, data = dset1)
abline( - u2$coef[1]/u2$coef[2], 1/u2$coef[2], lty = 2)
x <- dset2$logpet
y <- dset2$loglen
plot(x, y, xlab = "Petiole length (mm)", ylab = "Leaf length (mm)",
axes = F, type="n")
points(x, y, cex=0.65, col=col)
topleft <- par()$usr[c(1, 4)] + c(0.5, -0.5) * par()$cxy
text(topleft[1], topleft[2], paste("r =", round(cor(x, y), 3)),
adj = 0)
mtext(side = 3, line = 0.4, "B", adj = 0)
xlab <- pretty(exp(x))
xlab <- c(0.5, 1, xlab)
xlab <- xlab[xlab > 0]
ylab <- pretty(exp(y))
ylab <- ylab[ylab > 0]
axis(1, at = log(xlab), labels = paste(xlab))
axis(2, at = log(ylab), labels = paste(ylab))
box()
u1 <- lm(y ~ x)
u2 <- lm(x ~ y)
abline(u1$coef[1], u1$coef[2])
abline( - u2$coef[1]/u2$coef[2], 1/u2$coef[2], lty = 2)
figtxt <- paste("Plot (A) is for a dataset that compared the stretch",
"\nof an elastic band that was heated with that for an",
"\nelastic band that was held at ambient temperature,",
"\nwhile (B) is for a leaf dataset. In each plot we",
"\nshow the regression line for y on x (solid line),",
"\nand the regression line for x on y (dotted line).",
"\nIn (A) the lines are quite similar, while in (B)",
"\nwhere the correlation is smaller, the lines are",
"\nquite different.")
cat(figtxt, "\n")
if(device!="")dev.off()
}
g5.16 <-
function(min=30, device=""){
if(!require(MCMCpack))return("The package MCMCpack must be installed.")
if(device!="")hardcopy(width=5, height=4, color=FALSE,
device=device, pointsize=c(8,5))
opar <- par(mgp=c(2,.75,0), mar=c(4.1,3.6,2.6,1.1), oma=rep(0,4),
no.readonly=TRUE)
mat <- matrix(c(1:6), byrow=TRUE, ncol=2)
layout(mat, widths=rep(c(2,1),3), heights=rep(1,8))
roller.mcmc <- MCMCregress(depression ~ weight, data=roller)
plot(roller.mcmc, auto.layout=FALSE, ask=FALSE, col="gray40")
par(opar)
if (device != "") dev.off()
}
pkgs <- c("DAAG","grid","lattice", "MCMCpack", "boot")
z <- sapply(pkgs, require, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:DAAG':
##
## hills
##
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2014 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
if(any(!z)){
notAvail <- paste(names(z)[!z], collapse=", ")
warning(paste("The following packages should be installed:", notAvail))
}
g5.1()
## Diagnostic plots for the regression of Figure 5.1:
## Panel (A) plots residuals against fitted values.
## Panel (B) is a normal probability plot of residuals.
## If residuals follow a normal distribution the points
## should fall close to a line.
g5.2()
g5.3()
## NULL
## NULL
## NULL
## NULL
## Chemical vs Magnetic Test of Iron Content in Slag.
## The fitted curves used the Splus loess smooth.
## In (d) the downward slope suggests a lower variance
## for larger fitted values.
g5.4()
g5.5()
##
## Coefficients
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.372 97.559 0.424 0.686293
## volume 0.686 0.106 6.475 0.000644
##
##
## Analysis of Variance Table
##
## Response: weight
## Df Sum Sq Mean Sq F value Pr(>F)
## volume 1 437878 437878 41.9 0.00064
## Residuals 6 62669 10445
##
## Weight versus volume for softcover books,
## with fitted line.
g5.6()
## Diagnostic plots for previous figure.
g5.7()
## Lawn Depression, for Various Weights of Roller,
## with fitted line and showing 95% pointwise
## confidence bounds for points on the fitted line.
g5.8()
## Two rubber band experiments, with different ranges
## of x-values. The dashed curves are pointwise 95%
## confidence bounds for the fitted line.
g5.9()
## Analysis of Variance Table
##
## Response: yv
## Df Sum Sq Mean Sq F value Pr(>F)
## xv 1 18566 18566 8 0.014
## Residuals 13 30179 2321
##
##
## fold 1
## Observations in test set: 1 2 8 12 15
## X1 X2 X3 X4 X5
## Floor area 694.00 905.0 714.0 696.0 1191
## Predicted price 196.31 229.5 199.5 196.6 274
## Observed price 192.00 215.0 220.0 255.0 375
## Residual -4.31 -14.5 20.5 58.4 101
##
## Sum of squares = 14160 Mean square = 2832 n = 5
##
## fold 2
## Observations in test set: 3 4 5 7 11
## X1 X2 X3 X4 X5
## Floor area 802.0 1366.0 716 821.0 790.0
## Predicted price 234.6 360.9 215 238.9 231.9
## Observed price 215.0 274.0 113 212.0 221.5
## Residual -19.6 -86.9 -103 -26.9 -10.4
##
## Sum of squares = 19315 Mean square = 3863 n = 5
##
## fold 3
## Observations in test set: 6 9 10 13 14
## X1 X2 X3 X4 X5
## Floor area 963.0 1018.0 887.0 771 1006.0
## Predicted price 247.5 258.2 232.6 210 255.9
## Observed price 185.0 276.0 260.0 260 293.0
## Residual -62.5 17.8 27.4 50 37.1
##
## Sum of squares = 8848 Mean square = 1770 n = 5
## Overall ms
## 2822
g5.10()
g5.11()
## Test for Linear Trend, versus Multiple Comparisons. The
## six panels are six simulated results from a straight
## line model with slope 0.8, sd=2, and 4 reps per level.
g5.12()
##
## Proportion of datasets where linear p-value < aov p-value = 0.87
g5.13()
g5.14()
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.20 0.2113 5.7 4.12e-06
## x 1.13 0.0547 20.6 1.87e-18
## Heart weight versus Body weight for 30 members
## of the species Cape fur seal .
g5.15()
## Plot (A) is for a dataset that compared the stretch
## of an elastic band that was heated with that for an
## elastic band that was held at ambient temperature,
## while (B) is for a leaf dataset. In each plot we
## show the regression line for y on x (solid line),
## and the regression line for x on y (dotted line).
## In (A) the lines are quite similar, while in (B)
## where the correlation is smaller, the lines are
## quite different.
g5.16()