## Chapter 6: Multiple Linear Regression 

## Sec 6.1: Basic Ideas: a Book Weight Example 
## Plot weight vs volume: data frame allbacks (DAAG) 
par(pty="s")
library(DAAG)
## Loading required package: lattice
plot(weight ~ volume, data=allbacks, pch=c(16,1)[unclass(cover)]) 
 # unclass(cover) gives the integer codes that identify levels 
with(allbacks, text(weight ~ volume, labels=paste(1:15),  
                    pos=c(2,4)[unclass(cover)])) 

plot of chunk unnamed-chunk-1

summary(allbacks.lm <- lm(weight ~ volume+area, data=allbacks)) 
## 
## Call:
## lm(formula = weight ~ volume + area, data = allbacks)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -104.1  -30.0  -15.5   16.8  212.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  22.4134    58.4025    0.38  0.70786
## volume        0.7082     0.0611   11.60  7.1e-08
## area          0.4684     0.1019    4.59  0.00062
## 
## Residual standard error: 77.7 on 12 degrees of freedom
## Multiple R-squared:  0.928,  Adjusted R-squared:  0.917 
## F-statistic: 77.9 on 2 and 12 DF,  p-value: 1.34e-07
## coefficient estimates and SEs only: summary(allbacks.lm)$coef 
par(pty="m")

## Footnote Code
## 5% critical value; t-statistic with 12 d.f.  
qt(0.975, 12) 
## [1] 2.179
anova(allbacks.lm) 
## Analysis of Variance Table
## 
## Response: weight
##           Df Sum Sq Mean Sq F value  Pr(>F)
## volume     1 812132  812132   134.7   7e-08
## area       1 127328  127328    21.1 0.00062
## Residuals 12  72373    6031
## Footnote Code
## Correlation of volume with area  
with(allbacks, cor(volume,area)) 
## [1] 0.001535
model.matrix(allbacks.lm) 
##    (Intercept) volume area
## 1            1    885  382
## 2            1   1016  468
## 3            1   1125  387
## 4            1    239  371
## 5            1    701  371
## 6            1    641  367
## 7            1   1228  396
## 8            1    412    0
## 9            1    953    0
## 10           1    929    0
## 11           1   1492    0
## 12           1    419    0
## 13           1   1010    0
## 14           1    595    0
## 15           1   1034    0
## attr(,"assign")
## [1] 0 1 2
## ss 6.1.1: Omission of the intercept term 
allbacks.lm0 <- lm(weight ~ -1+volume+area, data=allbacks) 
summary(allbacks.lm0) 
## 
## Call:
## lm(formula = weight ~ -1 + volume + area, data = allbacks)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -112.5  -28.7  -10.5   24.6  213.8 
## 
## Coefficients:
##        Estimate Std. Error t value Pr(>|t|)
## volume   0.7289     0.0277   26.34  1.1e-12
## area     0.4809     0.0934    5.15  0.00019
## 
## Residual standard error: 75.1 on 13 degrees of freedom
## Multiple R-squared:  0.991,  Adjusted R-squared:  0.99 
## F-statistic:  748 on 2 and 13 DF,  p-value: 3.8e-14
## Display correlations between estimates of model coefficients 
summary(allbacks.lm, corr=TRUE) 
## 
## Call:
## lm(formula = weight ~ volume + area, data = allbacks)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -104.1  -30.0  -15.5   16.8  212.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  22.4134    58.4025    0.38  0.70786
## volume        0.7082     0.0611   11.60  7.1e-08
## area          0.4684     0.1019    4.59  0.00062
## 
## Residual standard error: 77.7 on 12 degrees of freedom
## Multiple R-squared:  0.928,  Adjusted R-squared:  0.917 
## F-statistic: 77.9 on 2 and 12 DF,  p-value: 1.34e-07
## 
## Correlation of Coefficients:
##        (Intercept) volume
## volume -0.88             
## area   -0.32        0.00
## ss 6.1.2: Diagnostic plots 
par(mfrow=c(2,2), pty="s")    # Get all 4 plots on one page 
plot(allbacks.lm0)  

plot of chunk unnamed-chunk-1

par(mfrow=c(1,1), pty="m") 

allbacks.lm13 <- lm(weight ~ -1+volume+area, data=allbacks[-13, ])  
summary(allbacks.lm13) 
## 
## Call:
## lm(formula = weight ~ -1 + volume + area, data = allbacks[-13, 
##     ])
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -61.72 -25.29   3.43  31.24  58.86 
## 
## Coefficients:
##        Estimate Std. Error t value Pr(>|t|)
## volume   0.6949     0.0163    42.6  1.8e-14
## area     0.5539     0.0527    10.5  2.1e-07
## 
## Residual standard error: 41 on 12 degrees of freedom
## Multiple R-squared:  0.997,  Adjusted R-squared:  0.997 
## F-statistic: 2.25e+03 on 2 and 12 DF,  p-value: 3.52e-16
## Sec 6.2: The Interpretation of Model Coefficients 
## ss 6.2.1: Times for Northern Irish hill races 
## Footnote Code
## : data frame nihills (DAAG) 
## Panel A: Scatterplot matrix, untransformed data, data frame nihills (DAAG) 
library(lattice); library(DAAG) 
splom(~ nihills[, c("dist","climb","time")], cex.labels=1.2, 
      varnames=c("dist\n(miles)","climb\n(feet)", "time\n(hours)")) 

plot of chunk unnamed-chunk-1

## Panel B: log transformed data 
splom(~ log(nihills[, c("dist","climb","time")]), cex.labels=1.2, 
      varnames=c("dist\n(log miles)", "climb\n(log feet)", "time\n(log hours)")) 

plot of chunk unnamed-chunk-1

nihills.lm <- lm(log(time) ~ log(dist) + log(climb), data = nihills) 
par(mfrow=c(2,2), pty="s") 
plot(nihills.lm)