## Chapter 2: Styles of Data Analysis 

## Sec 2.1: Revealing Views of the Data 
## ss 2.1.1: Views of a single sample 
##                    Histograms and density plots 
library(DAAG)        # Ensure that the DAAG package is attached 
## Loading required package: lattice
## Form the subset of possum that holds data on females only 
ftotlngth <- with(possum, totlngth[sex=="f"]) 

## Footnote Code
## To get a 1 by 4 layout, precede with 
opar <- par(mfrow = c(1,4), pty="s") 

hist(ftotlngth, breaks = 72.5 + (0:5) * 5, ylim = c(0, 22), 
     xlab="Total length (cm)", main ="A: Breaks at 72.5, 77.5, ...") 
hist(ftotlngth, breaks = 75 + (0:5) * 5, ylim = c(0, 22), 
     xlab="Total length (cm)", main="B: Breaks at 75, 80, ...") 

dens <- density(ftotlngth)  
xlim <- range(dens$x); ylim <- range(dens$y) 
hist(ftotlngth, breaks = 72.5 + (0:5) * 5, probability = T, 
     xlim = xlim, ylim = ylim, xlab="Total length (cm)", main ="C: Breaks as in A") 
lines(dens) 
hist(ftotlngth, breaks = 75 + (0:5) * 5, probability = T, 
     xlim = xlim, ylim = ylim, xlab="Total length (cm)", main="D: Breaks as in B") 
lines(dens) 

plot of chunk unnamed-chunk-1

par(opar)
par(mfrow=c(1,1))

##                     The stem-and-leaf display 
with(ais, stem(ht[sport=="Row"])) 
## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   15 | 6
##   16 | 
##   16 | 5
##   17 | 4
##   17 | 5678899
##   18 | 00000011223
##   18 | 55666668899
##   19 | 123
##   19 | 58
## Footnote Code
## Use quantile() to obtain the quartiles of ht: data frame ais (DAAG package) 
quantile(ais$ht[ais$sport=="Row"], prob=c(.25,.5,.75)) 
##   25%   50%   75% 
## 179.3 181.8 186.3
 # For the 50th percentile (the 2nd quartile), an alternative is median() 

##                              Boxplots 
## Base graphics boxplot function 
boxplot(ftotlngth, horizontal=TRUE) 

plot of chunk unnamed-chunk-1

## Alternative: lattice graphics bwplot function 
bwplot(~ftotlngth, data=fossum) 

plot of chunk unnamed-chunk-1

## ss 2.1.2: Patterns in univariate time series 
## Panel A 
plot(log10(measles), xlab="", ylim=log10 (c(1,5000*1000)), 
     ylab=" Deaths; Population (log scale)", yaxt="n") 
ytiks <- c(1, 10, 100, 1000, 1000000, 5000000) 
## London population in thousands 
londonpop <-  
  ts(c(1088,1258,1504,1778,2073,2491,2921,3336,3881,4266, 
       4563,4541,4498,4408), start=1801, end=1931, deltat=10)  
points(log10(londonpop*1000), pch=16, cex=.5) 
axis(2, at=log10(ytiks), labels=paste(ytiks), las=2) 

plot of chunk unnamed-chunk-1

## Panel B 
plot(window(measles, start=1840, end=1882), ylim=c (0, 4600),  
     yaxt="n") 
axis(2, at=(0:4)* 1000, labels=paste(0:4), las=2) 

plot of chunk unnamed-chunk-1

## Both graphs on the one graphics page
## Panel A:  
par(fig=c(0, 1, .38, 1), cex=0.8)   # 38% to 100% of page, in y-direction 
plot(log10(measles), ylab="log10(Deaths)",  
     ylim=log10(c(1,5000*1000))) 
mtext(side=3, line=0.5, "A (1629-1939)", adj=0) 
## Panel B: window from 1840 to 1882; more complete code 
par(fig=c(0, 1, 0, .4), new=TRUE)  # 0% to 38% of height of figure region 
plot(window(measles, start=1840, end=1882), ylab="Deaths") 
mtext(side=3, line=0.5, "B (1841-1881)", adj=0) 

plot of chunk unnamed-chunk-1

par(fig=c(0, 1, 0, 1), cex=1)     # Restore default figure region 

## ss 2.1.3: Patterns in bivariate data 
## Plot four vs one: data frame milk (DAAG) 
xyrange <- range(milk) 
par(pty="s")      # square plotting region 
plot(four ~ one, data = milk, xlim = xyrange, ylim = xyrange,  
     pch = 16)    
rug(milk$one)              # x-axis rug (default is side=1) 
rug(milk$four, side = 2)   # y-axis rug 
abline(0, 1) 

plot of chunk unnamed-chunk-1

par(pty="m")

##                The fitting of a smooth trend curve 
## Plot ohms vs juice: data frame fruitohms (DAAG) 
plot(ohms ~ juice, xlab="Apparent juice content (%)", 
     ylab="Resistance (ohms)", data=fruitohms) 
## Add a smooth curve, as in Panel B 
with(fruitohms, lines(lowess(juice, ohms), lwd=2))