intro.R

## Chapter 1: A Brief Introduction to R 

## Sec 1.1: An Overview of R 
## ss 1.1.1: A Short R Session 
##                      { R} must be installed! 
##             Using the console (or command line) window 
2+2

## [1] 4

2*3*4*5          # * denotes 'multiply'

## [1] 120

sqrt(10)         # the square root of 10

## [1] 3.162

pi               # R knows about pi

## [1] 3.142

2*pi*6378        # Circumference of earth at equator (km)

## [1] 40074

3*4^ 
2

## [1] 48

3*4^2; (3*4)^2

## [1] 48

## [1] 144

##                 Entry of data at the command line 
Year <- c(1800, 1850, 1900, 1950, 2000) 
Carbon <- c(8, 54, 534, 1630, 6611) 
## Now plot Carbon as a function of Year 
plot(Carbon ~ Year, pch=16)

plot of chunk unnamed-chunk-1

##              Collection of vectors into a data frame 
fossilfuel <- data.frame(year=Year, carbon=Carbon) 
fossilfuel         # Display the contents of the data frame.

##   year carbon
## 1 1800      8
## 2 1850     54
## 3 1900    534
## 4 1950   1630
## 5 2000   6611

rm(Year, Carbon)     # The rm() function removes unwanted objects 

plot(carbon ~ year, data=fossilfuel, pch=16)

plot of chunk unnamed-chunk-1

##        The R Commander Graphical User Interface (GUI) to R 
##      The working directory and the contents of the workspace 
getwd()

## [1] "/Users/johnm1/r3/scripts"

ls()

## [1] "fossilfuel" "metadata"

##                           Quitting { R} 
# q() 

##                      Installation of packages 
install.packages("DAAG")

## 
## The downloaded binary packages are in
##  /var/folders/00/_kpyywm16hnbs2c0dvlf0mwr0000gq/T//RtmpWtVh3X/downloaded_packages

install.packages(c("magic", "schoolmath"), dependencies=TRUE)

## 
## The downloaded binary packages are in
##  /var/folders/00/_kpyywm16hnbs2c0dvlf0mwr0000gq/T//RtmpWtVh3X/downloaded_packages

## ss 1.1.2: The uses of R 
##   { R} offers an extensive collection of functions and abilities 
range(fossilfuel$carbon)

## [1]    8 6611

## 4 cities 
fourcities <- c("Toronto", "Canberra", "New York", "London") 
## display in alphabetical order 
sort(fourcities)

## [1] "Canberra" "London"   "New York" "Toronto"

## Find the number of characters in "Toronto" 
nchar("Toronto")

## [1] 7

## Find the number of characters in all four city names at once 
nchar(fourcities)

## [1] 7 8 8 6

##        { R} will give numerical or graphical data summaries 
summary(cars)

##      speed           dist    
##  Min.   : 4.0   Min.   :  2  
##  1st Qu.:12.0   1st Qu.: 26  
##  Median :15.0   Median : 36  
##  Mean   :15.4   Mean   : 43  
##  3rd Qu.:19.0   3rd Qu.: 56  
##  Max.   :25.0   Max.   :120

hist(cars$speed)

plot of chunk unnamed-chunk-1

##            { R} is an interactive programming language 
celsius <- (0:4)*10 
fahrenheit <- 9/5*celsius+32 
conversion <- data.frame(Celsius=celsius, Fahrenheit=fahrenheit) 
print(conversion)

##   Celsius Fahrenheit
## 1       0         32
## 2      10         50
## 3      20         68
## 4      30         86
## 5      40        104

## ss 1.1.3: Online Help 
?plot                # Equivalent to help(plot) 

apropos("sort")        # Try, also, apropos ("sor")

## [1] "is.unsorted"  "sort"         "sort.default" "sort.int"    
## [5] "sort.list"    "sort.POSIXlt" "sortedXyData"

 # List all functions where "sort" is part of the name  
help.search("sort")    # Note that the argument is 'sort' 
 # List functions with 'sort' in the help page title or as an alias 

example(image)

## 
## image> require(grDevices) # for colours
## 
## image> x <- y <- seq(-4*pi, 4*pi, len = 27)
## 
## image> r <- sqrt(outer(x^2, y^2, "+"))
## 
## image> image(z = z <- cos(r^2)*exp(-r/6), col  = gray((0:32)/32))

plot of chunk unnamed-chunk-1

## 
## image> image(z, axes = FALSE, main = "Math can be beautiful ...",
## image+       xlab = expression(cos(r^2) * e^{-r/6}))

plot of chunk unnamed-chunk-1

## 
## image> contour(z, add = TRUE, drawlabels = FALSE)
## 
## image> # Volcano data visualized as matrix. Need to transpose and flip
## image> # matrix horizontally.
## image> image(t(volcano)[ncol(volcano):1,])

plot of chunk unnamed-chunk-1

## 
## image> # A prettier display of the volcano
## image> x <- 10*(1:nrow(volcano))
## 
## image> y <- 10*(1:ncol(volcano))
## 
## image> image(x, y, volcano, col = terrain.colors(100), axes = FALSE)

plot of chunk unnamed-chunk-1

## 
## image> contour(x, y, volcano, levels = seq(90, 200, by = 5),
## image+         add = TRUE, col = "peru")
## 
## image> axis(1, at = seq(100, 800, by = 100))
## 
## image> axis(2, at = seq(100, 600, by = 100))
## 
## image> box()
## 
## image> title(main = "Maunga Whau Volcano", font.main = 4)

par(ask=FALSE)       # turn off the prompts  

##            Wide-ranging information access and searches 
##                 Help in finding the right package 
## ss 1.1.4: Input of data from a file 
library(DAAG)

## Loading required package: lattice
## 
## Attaching package: 'DAAG'
## 
## The following object is masked _by_ '.GlobalEnv':
## 
##     fossilfuel

datafile('fuel')   ## Places file in working directory,

## 
## Data written to file: fuel.txt

                   ## ready for input.
fossilfuel <- read.table("fuel.txt", header=TRUE) 

write.csv(fossilfuel, file='fuel.csv')
fossilfuel <- read.table("fuel.csv", header=TRUE, sep=",") 

## ss 1.1.5: R Packages 
library(DAAG) 
sessionInfo()

## R version 3.1.1 Patched (2014-08-22 r66458)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
## 
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] DAAG_1.20       lattice_0.20-29
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.4        evaluate_0.5.5      formatR_0.10       
##  [4] grid_3.1.1          htmltools_0.2.4     knitr_1.6          
##  [7] latticeExtra_0.6-26 RColorBrewer_1.0-5  rmarkdown_0.2.49   
## [10] stringr_0.6.2       tools_3.1.1

##               Data sets that accompany { R} packages 
data(package="datasets")  # Specify 'package', not 'library'. 

## ss 1.1.6: Further steps in learning R 

## Sec 1.2: Vectors, Factors and Univariate Time Series 
## ss 1.2.1: Vectors 
c(2, 3, 5, 2, 7, 1)

## [1] 2 3 5 2 7 1

c(T, F, F, F, T, T, F)

## [1]  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE

c("Canberra", "Sydney", "Canberra", "Sydney")

## [1] "Canberra" "Sydney"   "Canberra" "Sydney"

## ss 1.2.2: Concatenation -- joining vector objects 
x <- c(2, 3, 5, 2, 7, 1)  # x then holds values 2, 3, 5, 2, 7, 1 
x

## [1] 2 3 5 2 7 1

y <- c(10, 15, 12) 
y

## [1] 10 15 12

z <- c(x, y) 
z

## [1]  2  3  5  2  7  1 10 15 12

## ss 1.2.3: The use of relational operators to compare vector elements 
x <- c(3, 11, 8, 15, 12) 
x > 8

## [1] FALSE  TRUE FALSE  TRUE  TRUE

x != 8

## [1]  TRUE  TRUE FALSE  TRUE  TRUE

## ss 1.2.4: The use of square brackets to extract subsets of vectors 
x <- c(3, 11, 8, 15, 12)  
x[c(2,4)]                 # Elements in positions 2

## [1] 11 15

x[-c(2,3)]         # Remove the elements in positions 2 and 3

## [1]  3 15 12

x > 10

## [1] FALSE  TRUE FALSE  TRUE  TRUE

x[x > 10]

## [1] 11 15 12

heights <- c(Andreas=178, John=185, Jeff=183) 
heights[c("John","Jeff")]

## John Jeff 
##  185  183

## ss 1.2.5: Patterned data 
5:15

##  [1]  5  6  7  8  9 10 11 12 13 14 15

seq(from=5, to=22, by=3)  # The first value is 5.  The final

## [1]  5  8 11 14 17 20

rep(c(2,3,5), 4)

##  [1] 2 3 5 2 3 5 2 3 5 2 3 5

c(rep("female", 3), rep("male", 2))

## [1] "female" "female" "female" "male"   "male"

## ss 1.2.6: Missing values 
library(DAAG) 
nbranch <- subset(rainforest, species=="Acacia mabellae")$branch 
nbranch            # Number of small branches (2cm or less)

##  [1] NA 35 41 50 NA NA NA NA NA  4 30 13 10 17 46 92

mean(nbranch)

## [1] NA

mean(nbranch, na.rm=TRUE)

## [1] 33.8

NA == 35

## [1] NA

## Replace all NAs by -999  
nbranch[is.na(nbranch)] <- -999 
nbranch

##  [1] -999   35   41   50 -999 -999 -999 -999 -999    4   30   13   10   17
## [15]   46   92

## There is now no protection against use of the -999 values as 
## if they were legitimate numeric values 
mean(nbranch)

## [1] -353.5

## ss 1.2.7: Factors 
## Create character vector 
gender <- c(rep("female",691), rep("male",692)) 
levels(gender)     # For a character vector, this returns NULL

## NULL

## From character vector, create factor 
gender <- factor(gender) 
levels(gender)

## [1] "female" "male"

levels(gender)

## [1] "female" "male"

gender <- factor(gender, levels=c("male", "female")) 

## ss 1.2.8: Time series 
## Footnote Code
## Alternatively, obtain from data frame jobs (DAAG) 
library(DAAG) 
numjobs <- jobs$Prairies 

numjobs <- c(982,981,984,982,981,983,983,983,983,979,973,979, 
             974,981,985,987,986,980,983,983,988,994,990,999) 

numjobs <- ts(numjobs, start=1995, frequency = 12) 
plot(numjobs)

plot of chunk unnamed-chunk-1

first15 <- window(numjobs, start=1995.75, end=1996.25) 


## Sec 1.3: Data Frames and Matrices 
Cars93.summary

##         Min.passengers Max.passengers No.of.cars abbrev
## Compact              4              6         16      C
## Large                6              6         11      L
## Midsize              4              6         22      M
## Small                4              5         21     Sm
## Sporty               2              4         14     Sp
## Van                  7              8          9      V

Cars93.summary <- edit(Cars93.summary) 

##    Displaying the first few, or last few, rows of a data frame 
head(Cars93.summary, n=3)  # Display the first 3 rows (the default is 6)

##         Min.passengers Max.passengers No.of.cars abbrev
## Compact              4              6         16      C
## Large                6              6         11      L
## Midsize              4              6         22      M

 # . . . 

##                        Column and row names 
rownames(Cars93.summary)   # Extract names of rows

## [1] "Compact" "Large"   "Midsize" "Small"   "Sporty"  "Van"

colnames(Cars93.summary)   # Extract column names

## [1] "Min.passengers" "Max.passengers" "No.of.cars"     "abbrev"

## names(Cars93.summary)[3] <- "numCars" 
## names(Cars93.summary) <- c("minPass","maxPass","numCars","code") 

##                       Subsets of data frames 
Cars93.summary[1:3, 2:3]   # Rows 1-3 and columns 2-3

##         Max.passengers No.of.cars
## Compact              6         16
## Large                6         11
## Midsize              6         22

Cars93.summary[, 2:3]      # Columns 2-3 (all rows)

##         Max.passengers No.of.cars
## Compact              6         16
## Large                6         11
## Midsize              6         22
## Small                5         21
## Sporty               4         14
## Van                  8          9

Cars93.summary[, c("No.of.cars", "abbrev")]   # Cols 2-3, by name

##         No.of.cars abbrev
## Compact         16      C
## Large           11      L
## Midsize         22      M
## Small           21     Sm
## Sporty          14     Sp
## Van              9      V

Cars93.summary[, -c(2,3)]  # omit columns 2 and 3

##         Min.passengers abbrev
## Compact              4      C
## Large                6      L
## Midsize              4      M
## Small                4     Sm
## Sporty               2     Sp
## Van                  7      V

subset(Cars93.summary,  
       subset=c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE))

##         Min.passengers Max.passengers No.of.cars abbrev
## Compact              4              6         16      C
## Large                6              6         11      L

##             Data frames are a specialized type of list 
## Cities with more than 2.5 million inhabitants 
USACanada <- list(USACities=c("NY", "LA", "Chicago"),  
                  CanadaCities=c("Toronto", "Montreal"), 
                  millionsPop=c(USA=305.9, Canada=31.6)) 
 
USACanada

## $USACities
## [1] "NY"      "LA"      "Chicago"
## 
## $CanadaCities
## [1] "Toronto"  "Montreal"
## 
## $millionsPop
##    USA Canada 
##  305.9   31.6

## ss 1.3.1: Accessing the columns of data frames -- {with()} and {attach()} 
## cfseal (DAAG) has data on Cape Fur seals 
with(cfseal, c(mean(weight), median(weight)))

## [1] 54.79 46.25

with(pair65,       # stretch of rubber bands, from DAAG 
  {lenchange <- heated-ambient 
   c(mean(lenchange), median(lenchange)) 
})

## [1] 6.333 6.000

## year 
attach(fossilfuel)  # Attach data frame fossilfuel 
year

## [1] 1800 1850 1900 1950 2000

detach(fossilfuel)  # Detach data frame 

## ss 1.3.2: Aggregation, stacking and unstacking 
chickwtAvs <- with(chickwts,  
                   aggregate(weight, by=list(feed), mean)) 
names(chickwtAvs) <- c("Feed Group", "Mean Weight") 
chickwtAvs

##   Feed Group Mean Weight
## 1     casein       323.6
## 2  horsebean       160.2
## 3    linseed       218.8
## 4   meatmeal       276.9
## 5    soybean       246.4
## 6  sunflower       328.9

library(DAAG) 
head(jobs,3)

##     BC Alberta Prairies Ontario Quebec Atlantic  Date
## 1 1752    1366      982    5239   3196      947 95.00
## 2 1737    1369      981    5233   3205      946 95.08
## 3 1765    1380      984    5212   3191      954 95.17

  # . . . 
Jobs <- stack(jobs, select = 1:6)  
  # stack() concatenates selected data frame columns into a 
  # single column named "values", & adds a factor named "ind" 
  # that has the names of the concatenated columns as levels. 
head(Jobs,3)

##   values ind
## 1   1752  BC
## 2   1737  BC
## 3   1765  BC

  # . . . 

## ss 1.3.3: *~Data frames and matrices 
fossilfuelmat <- matrix(c(1800, 1850, 1900, 1950, 2000,  
                          8, 54, 534, 1630, 6611), nrow=5) 
colnames(fossilfuel) <- c("year", "carbon") 

fossilfuelmat <- cbind(year=c(1800, 1850, 1900, 1950, 2000), 
                       carbon=c(8, 54, 534, 1630, 6611)) 


## Sec 1.4: Functions, Operators and Loops 
## ss 1.4.1: Common useful built-in functions 
# all()       # returns TRUE if all values are TRUE 
# any()       # returns TRUE if any values are TRUE 
# args()      # information on the arguments to a function 
# cat()       # prints multiple objects, one after the other 
# cumprod()   # cumulative product 
# cumsum()    # cumulative sum 
# diff()      # form vector of first differences  
#             # N. B. diff(x) has one less element than x 
# history()   # displays previous commands used 
# is.factor() # returns TRUE if the argument is a factor 
# is.na()     # returns TRUE if the argument is an NA 
#             # NB also is.logical(), is.matrix(), etc. 
# length()    # number of elements in a vector or of a list 
# ls()        # list names of objects in the workspace 
# mean()      # mean of the elements of a vector 
# median()    # median of the elements of a vector 
# order()     # x[order(x)] sorts x (by default, NAs are last) 
# print()     # prints a single R object 
# range()     # minimum and maximum value elements of vector 
# sort()      # sort elements into order, by default omitting NAs 
# rev()       # reverse the order of vector elements 
# str()       # information on an R object 
# unique()    # form the vector of distinct values 
# which()     # locates 'TRUE' indices of logical vectors  
# which.max() # locates (first) maximum of a numeric vector 
# which.min() # locates (first) minimum of a numeric vector 
# with()      # do computation using columns of specified data frame 

##                       The {print()} function 
x <- 2    # Assign to x the value 2; nothing is printed 
x         # Equivalent to print(x)

## [1] 2

x*5       # Equivalent to print(x*5)

## [1] 10

(x <- 2)  # Equivalent to: x <- 2; print(x)

## [1] 2

##      Calculations in parallel across all elements of a vector 
##        Data summary functions -- {table()}and { {sapply()} 
## table()              # Form a table of counts 
## xtabs()              # Form a table of totals 

library(DAAG)      # tinting is from DAAG 
table(Sex=tinting$sex, AgeGroup=tinting$agegp)

##    AgeGroup
## Sex younger older
##   f      63    28
##   m      28    63

sapply(jobs[, -7], range)

##        BC Alberta Prairies Ontario Quebec Atlantic
## [1,] 1737    1366      973    5212   3167      941
## [2,] 1840    1436      999    5360   3257      968

##                         Utility functions 
ls(pattern="p")      # List object names that include the letter "p"

## character(0)

ls(pattern="^p")     # List object names that start with "p"

## character(0)

## ss 1.4.2: Generic functions, and the class of an object 
## ss 1.4.3: User-written functions 
mean.and.sd <- function(x){ 
    av <- mean(x) 
    sdev <- sd(x) 
    c(mean=av, SD=sdev) 
    } 

distance <- c(148,182,173,166,109,141,166) 
mean.and.sd(distance)

##   mean     SD 
## 155.00  24.68

mean.and.sd <- function(x = rnorm(10)){ 
    av <- mean(x) 
    sdev <- sd(x) 
    c(mean=av, SD=sdev) 
    } 

mean.and.sd()

##    mean      SD 
## -0.9827  0.7479

##                     The structure of functions 
## Footnote Code
## Thus, to return the mean, SD and name of the input vector 
## replace c(mean=av, SD=sdev) by 
## list(mean=av, SD=sdev, dataset = deparse(substitute(x))) 

## ss 1.4.4: If statements 
Carbon <- fossilfuel$carbon 
if (mean(Carbon) > median(Carbon)) print("Mean > Median") else  
    print("Median <= Mean")

## [1] "Median <= Mean"

dist <- c(148, 182, 173, 166, 109, 141, 166) 
dist.sort <- if (dist[1] < 150) 
                sort(dist, decreasing=TRUE) else sort(dist) 
dist.sort

## [1] 182 173 166 166 148 141 109

## ss 1.4.5: Selection and matching 
x <- rep(1:5, rep(3,5)) 
x[x %in% c(2,4)]

## [1] 2 2 2 4 4 4

match(x, c(2,4), nomatch=0)

##  [1] 0 0 0 1 1 1 0 0 0 2 2 2 0 0 0

## ss 1.4.6: Functions for working with missing values 
##         Identification of rows that include missing values 
## Which rows have missing values: data frame science (DAAG) 
science[!complete.cases(science), ]

##     State PrivPub school class  sex like Class
## 671   ACT  public     19     1 <NA>    5  19.1
## 672   ACT  public     19     1 <NA>    5  19.1

dim(science)

## [1] 1385    7

Science <- na.omit(science) 
dim(Science)

## [1] 1383    7

## ss 1.4.7: \kern-4pt* Looping 
for (i in 1:3) print(i)

## [1] 1
## [1] 2
## [1] 3

## Relative population increase in Australian states: 1917-1997 
## Data frame austpop (DAAG) 
relGrowth <- numeric(8)   # numeric(8) creates a numeric vector 
                          # with 8 elements, all set equal to 0 
for (j in seq(from=2, to=9)) { 
    relGrowth[j-1] <- (austpop[9, j]-austpop[1, j])/ 
    austpop[1, j]} 
names(relGrowth) <- names(austpop[c(-1,-10)]) 
  # We have used names() to name the elements of relGrowth 
relGrowth          # Output is with options(digits=3)

##     NSW     Vic     Qld      SA      WA     Tas      NT     ACT 
##   2.295   2.268   3.980   2.364   4.876   1.456  36.400 102.333

## Sec 1.5: Graphics in R 
demo(graphics)

## 
## 
##  demo(graphics)
##  ---- ~~~~~~~~
## 
## > #  Copyright (C) 1997-2009 The R Core Team
## > 
## > require(datasets)
## 
## > require(grDevices); require(graphics)
## 
## > ## Here is some code which illustrates some of the differences between
## > ## R and S graphics capabilities.  Note that colors are generally specified
## > ## by a character string name (taken from the X11 rgb.txt file) and that line
## > ## textures are given similarly.  The parameter "bg" sets the background
## > ## parameter for the plot and there is also an "fg" parameter which sets
## > ## the foreground color.
## > 
## > 
## > x <- stats::rnorm(50)
## 
## > opar <- par(bg = "white")
## 
## > plot(x, ann = FALSE, type = "n")

plot of chunk unnamed-chunk-1

## 
## > abline(h = 0, col = gray(.90))
## 
## > lines(x, col = "green4", lty = "dotted")
## 
## > points(x, bg = "limegreen", pch = 21)
## 
## > title(main = "Simple Use of Color In a Plot",
## +       xlab = "Just a Whisper of a Label",
## +       col.main = "blue", col.lab = gray(.8),
## +       cex.main = 1.2, cex.lab = 1.0, font.main = 4, font.lab = 3)
## 
## > ## A little color wheel.    This code just plots equally spaced hues in
## > ## a pie chart.    If you have a cheap SVGA monitor (like me) you will
## > ## probably find that numerically equispaced does not mean visually
## > ## equispaced.  On my display at home, these colors tend to cluster at
## > ## the RGB primaries.  On the other hand on the SGI Indy at work the
## > ## effect is near perfect.
## > 
## > par(bg = "gray")
## 
## > pie(rep(1,24), col = rainbow(24), radius = 0.9)

plot of chunk unnamed-chunk-1

## 
## > title(main = "A Sample Color Wheel", cex.main = 1.4, font.main = 3)
## 
## > title(xlab = "(Use this as a test of monitor linearity)",
## +       cex.lab = 0.8, font.lab = 3)
## 
## > ## We have already confessed to having these.  This is just showing off X11
## > ## color names (and the example (from the postscript manual) is pretty "cute".
## > 
## > pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12)
## 
## > names(pie.sales) <- c("Blueberry", "Cherry",
## +              "Apple", "Boston Cream", "Other", "Vanilla Cream")
## 
## > pie(pie.sales,
## +     col = c("purple","violetred1","green3","cornsilk","cyan","white"))

plot of chunk unnamed-chunk-1

## 
## > title(main = "January Pie Sales", cex.main = 1.8, font.main = 1)
## 
## > title(xlab = "(Don't try this at home kids)", cex.lab = 0.8, font.lab = 3)
## 
## > ## Boxplots:  I couldn't resist the capability for filling the "box".
## > ## The use of color seems like a useful addition, it focuses attention
## > ## on the central bulk of the data.
## > 
## > par(bg="cornsilk")
## 
## > n <- 10
## 
## > g <- gl(n, 100, n*100)
## 
## > x <- rnorm(n*100) + sqrt(as.numeric(g))
## 
## > boxplot(split(x,g), col="lavender", notch=TRUE)

plot of chunk unnamed-chunk-1

## 
## > title(main="Notched Boxplots", xlab="Group", font.main=4, font.lab=1)
## 
## > ## An example showing how to fill between curves.
## > 
## > par(bg="white")
## 
## > n <- 100
## 
## > x <- c(0,cumsum(rnorm(n)))
## 
## > y <- c(0,cumsum(rnorm(n)))
## 
## > xx <- c(0:n, n:0)
## 
## > yy <- c(x, rev(y))
## 
## > plot(xx, yy, type="n", xlab="Time", ylab="Distance")

plot of chunk unnamed-chunk-1

## 
## > polygon(xx, yy, col="gray")
## 
## > title("Distance Between Brownian Motions")
## 
## > ## Colored plot margins, axis labels and titles.    You do need to be
## > ## careful with these kinds of effects.    It's easy to go completely
## > ## over the top and you can end up with your lunch all over the keyboard.
## > ## On the other hand, my market research clients love it.
## > 
## > x <- c(0.00, 0.40, 0.86, 0.85, 0.69, 0.48, 0.54, 1.09, 1.11, 1.73, 2.05, 2.02)
## 
## > par(bg="lightgray")
## 
## > plot(x, type="n", axes=FALSE, ann=FALSE)

plot of chunk unnamed-chunk-1

## 
## > usr <- par("usr")
## 
## > rect(usr[1], usr[3], usr[2], usr[4], col="cornsilk", border="black")
## 
## > lines(x, col="blue")
## 
## > points(x, pch=21, bg="lightcyan", cex=1.25)
## 
## > axis(2, col.axis="blue", las=1)
## 
## > axis(1, at=1:12, lab=month.abb, col.axis="blue")
## 
## > box()
## 
## > title(main= "The Level of Interest in R", font.main=4, col.main="red")
## 
## > title(xlab= "1996", col.lab="red")
## 
## > ## A filled histogram, showing how to change the font used for the
## > ## main title without changing the other annotation.
## > 
## > par(bg="cornsilk")
## 
## > x <- rnorm(1000)
## 
## > hist(x, xlim=range(-4, 4, x), col="lavender", main="")

plot of chunk unnamed-chunk-1

## 
## > title(main="1000 Normal Random Variates", font.main=3)
## 
## > ## A scatterplot matrix
## > ## The good old Iris data (yet again)
## > 
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", font.main=4, pch=19)

plot of chunk unnamed-chunk-1

## 
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", pch=21,
## +       bg = c("red", "green3", "blue")[unclass(iris$Species)])

plot of chunk unnamed-chunk-1

## 
## > ## Contour plotting
## > ## This produces a topographic map of one of Auckland's many volcanic "peaks".
## > 
## > x <- 10*1:nrow(volcano)
## 
## > y <- 10*1:ncol(volcano)
## 
## > lev <- pretty(range(volcano), 10)
## 
## > par(bg = "lightcyan")
## 
## > pin <- par("pin")
## 
## > xdelta <- diff(range(x))
## 
## > ydelta <- diff(range(y))
## 
## > xscale <- pin[1]/xdelta
## 
## > yscale <- pin[2]/ydelta
## 
## > scale <- min(xscale, yscale)
## 
## > xadd <- 0.5*(pin[1]/scale - xdelta)
## 
## > yadd <- 0.5*(pin[2]/scale - ydelta)
## 
## > plot(numeric(0), numeric(0),
## +      xlim = range(x)+c(-1,1)*xadd, ylim = range(y)+c(-1,1)*yadd,
## +      type = "n", ann = FALSE)

plot of chunk unnamed-chunk-1

## 
## > usr <- par("usr")
## 
## > rect(usr[1], usr[3], usr[2], usr[4], col="green3")
## 
## > contour(x, y, volcano, levels = lev, col="yellow", lty="solid", add=TRUE)
## 
## > box()
## 
## > title("A Topographic Map of Maunga Whau", font= 4)
## 
## > title(xlab = "Meters North", ylab = "Meters West", font= 3)
## 
## > mtext("10 Meter Contour Spacing", side=3, line=0.35, outer=FALSE,
## +       at = mean(par("usr")[1:2]), cex=0.7, font=3)
## 
## > ## Conditioning plots
## > 
## > par(bg="cornsilk")
## 
## > coplot(lat ~ long | depth, data = quakes, pch = 21, bg = "green3")

plot of chunk unnamed-chunk-1

## 
## > par(opar)

## ss 1.5.1: The function {plot( )} and allied functions 
plot(Brainwt ~ Bodywt, data=primates)  # plot(y ~ x) syntax

plot of chunk unnamed-chunk-1

with(primates, plot(Bodywt, Brainwt))    # plot(x, y) syntax

plot of chunk unnamed-chunk-1

##           Adding points, lines, text and axis annotation 
## Place labels on points 
plot(Brainwt ~ Bodywt, xlim=c(0, 300), data=primates) 
  # Specify xlim so that there is room for the labels 
with(primates,  
     text(Brainwt ~ Bodywt, labels=row.names(primates), pos=4))

plot of chunk unnamed-chunk-1

  # pos=4 places text to the right of the points. Other 
  # possibilities are: 1: below; 2: to the left; 3: above 

## Footnote Code
## Plot Brainwt vs Bodywt, primates data frame 
plot(Brainwt ~ Bodywt, xlim=c(0, 300), ylim=c(0,1500), data=primates) 
yoff <- c(-.125,0,0,.125,0)*par()$cxy[2] 
with(primates, text(x=Bodywt, y=Brainwt+yoff, labels=row.names(primates), pos=4))

plot of chunk unnamed-chunk-1

##                 Fine control -- parameter settings 
oldpar <- par(cex=1.25)    
# Use par(oldpar) to restore previous settings 

## ss 1.5.2: The use of color 
theta <- (1:50)*0.92 
plot(theta, sin(theta), col=1:50, pch=16, cex=4) 
points(theta, cos(theta), col=51:100, pch=15, cex=4)

plot of chunk unnamed-chunk-1

palette()            # Names of the colors in the current palette

## [1] "black"   "red"     "green3"  "blue"    "cyan"    "magenta" "yellow" 
## [8] "gray"

plot(theta, sin(theta), col=colors()[1:50], pch=16, cex=4) 
points(theta, cos(theta), col=colors()[51:100], pch=15, cex=4)

plot of chunk unnamed-chunk-1

## ss 1.5.3: The importance of aspect ratio 
## Plot sin(theta) vs theta, at regularly spaced values of theta 
## sin() expects angles to be in radians 
 # multiply angles in degrees by pi/180 to get radians 
plot((0:20)*pi/10, sin((0:20)*pi/10))

plot of chunk unnamed-chunk-1

plot((1:50)*0.92, sin((1:50)*0.92))

plot of chunk unnamed-chunk-1

par(mfrow=c(3,1))    # Gives a 3 by 1 layout of plots 
plot((1:50)*0.92, sin((1:50)*0.92)) 
par(mfrow=c(1,1)) 

## ss 1.5.4: Dimensions and other settings for graphics devices 
## ss 1.5.5: The plotting of expressions and mathematical symbols 
symbols(x=1.5, y=0, circles=1.2, xlim=c(0,3), ylim=c(-1.5,1.5), 
        bg="gray", inches=FALSE)  
  # inches=FALSE ensures that radius is in x-axis units 
text(1.5, 0.5, expression("Area" == pi*phantom("'")*italic(r)^2)) 
  # Use '==' to insert '='. 
  # Text or symbols that appear either side of '*' are juxtaposed. 
  # Notice the use of phantom("'") to insert a small space. 

## Footnote Code
## To add the double-headed arrow and associated label, specify: 
arrows(1.5, 0, 2.7, 0, length=.1, code=3)   # code=3: arrows at both ends 
  # length is the length of the arrow head (in inches!) 
text(2.1, -strheight("R"), expression(italic(r) == 1.2))

plot of chunk unnamed-chunk-1

## ss 1.5.6: Identification and location on the figure region 
plot(Brainwt ~ Bodywt, data=primates) 
with(primates, 
     identify(Brainwt ~ Bodywt, labels=row.names(primates), n=2))

plot of chunk unnamed-chunk-1

## integer(0)

  # Now click near 2 plotted points 

## ss 1.5.7: Plot methods for objects other than vectors 
## Use plot() with data frame trees (datasets) 
plot(trees)          # Gives a 3 x 3 layout of pairwise

plot of chunk unnamed-chunk-1

                     # scatterplots among the three variables 

## ss 1.5.8: Lattice (trellis) Graphics 
##  Lattice graphics versus base graphics -- {xyplot()} versus {plot()} 
## Plot Brainwt vs Bodywt, data frame primates (DAAG) 
plot(Brainwt ~ Bodywt, data=primates)       # base graphics

plot of chunk unnamed-chunk-1

 # 'base' graphics use the abilities of the graphics package 
library(lattice) 
xyplot(Brainwt ~ Bodywt, data=primates)     # lattice

invisible(plot(Brainwt ~ Bodywt, data=primates))     # Graph appears

plot of chunk unnamed-chunk-1

invisible(xyplot(Brainwt ~ Bodywt, data=primates))   # No graph 

print(xyplot(ACT ~ year, data=austpop))

plot of chunk unnamed-chunk-1

##           Panels of scatterplots -- the use of xyplot() 
trellis.device(color=FALSE) 
xyplot(ht ~ wt | sport, groups=sex, pch=c(4,1), aspect=1, data=ais, 
       auto.key=list(columns=2), subset=sport%in%c("Row","Swim")) 
dev.off()            # Close device

## pdf 
##   2

trellis.device()     # Start new device, by default with color=TRUE 

##                    Plotting columns in parallel 
xyplot(Prairies+Atlantic ~ Date, outer=TRUE, data=jobs) 

##                     Selected lattice functions 
# dotplot(factor ~ numeric,..)   # 1-dim. Display 
# stripplot(factor ~ numeric,..) # 1-dim. Display 
# barchart(character ~ numeric,..) 
# histogram( ~ numeric,..) 
# densityplot( ~ numeric,..)     # Density plot 
# bwplot(factor ~ numeric,..)    # Box and whisker plot 
# qqmath(factor ~ numeric,..)    # normal probability plots 
# splom( ~ dataframe,..)         # Scatterplot matrix  
# parallel( ~ dataframe,..)      # Parallel coordinate plots 
# cloud(numeric ~ numeric * numeric, ...)      # 3D surface 
# wireframe(numeric ~ numeric * numeric, ...)  # 3D scatterplot 

## ss 1.5.9: Good and bad graphs 
## Footnote Code
## Example of plotting with different shades of gray 
plot(1:4, 1:4, pch=16, col=c("gray20","gray40","gray60","gray80"), cex=3) 

## ss 1.5.10: Further information on graphics 

## Sec 1.6: Additional Points on the Use of R 
##                  *Workspace management strategies 
##                  Forward slashes and backslashes 
##           Setting the number of decimal places in output 
sqrt(10)

## [1] 3.162

options(digits=2)  # Change until further notice, 
sqrt(10)

## [1] 3.2

##                       Other option settings 
##                          Cosmetic issues 
##             \kern-4pt*\, Common sources of difficulty 
##                    Variable names in data sets 

## Sec 1.7: Recap 
(x <- 2)     # Equivalent to: x <- 2; print(x)

## [1] 2

## Sec 1.8: Further Reading and Study 
attach(Manitoba.lakes) 
plot(log2(area) ~ elevation, pch=16, xlim=c(170,280)) 
  # NB: Doubling the area increases log2(area) by 1.0 
text(log2(area) ~ elevation,  
     labels=row.names(Manitoba.lakes), pos=4) 
text(log2(area) ~ elevation, labels=area, pos=2) 
title("Manitoba's Largest Lakes") 
detach(Manitoba.lakes) 

rep(c(2,3,5), c(4,3,2))

## [1] 2 2 2 2 3 3 3 5 5

rep(c(2,3,5), 4:2)

## [1] 2 2 2 2 3 3 3 5 5

1000*((1+0.075)^5 - 1)  # Interest on $1000, compounded

## [1] 436

                        # annually at 7.5% p.a. for five years 

gender <- factor(c(rep("female", 91), rep("male", 92))) 
table(gender)

## gender
## female   male 
##     91     92

gender <- factor(gender, levels=c("male", "female")) 
table(gender)

## gender
##   male female 
##     92     91

gender <- factor(gender, levels=c("Male", "female"))  
                     # Note the mistake: "Male" should be "male" 
table(gender)

## gender
##   Male female 
##      0     91

table(gender, exclude=NULL)

## gender
##   Male female   <NA> 
##      0     91     92

rm(gender)           # Remove gender 

par(mfrow=c(2,2))    # 2 by 2 layout on the page 
library(MASS)        # Animals is in the MASS package

## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:DAAG':
## 
##     hills

plot(brain ~ body, data=Animals) 
plot(sqrt(brain) ~ sqrt(body), data=Animals) 
plot(I(brain^0.1) ~ I(body^0.1), data=Animals) 
  # I() forces its argument to be treated "as is" 
plot(log(brain) ~ log(body), data=Animals) 
par(mfrow=c(1,1))    # Restore to 1 figure per page 

plot(BDI ~ age, data=socsupport) 
plot(BDI ~ unclass(age), data=socsupport) 

gender1 <- with(socsupport, abbreviate(gender, 1)) 
table(gender1)       # Examine the result

## gender1
##  f  m 
## 71 24

country3 <-  with(socsupport, abbreviate(country, 3)) 
table(country3)      # Examine the result

## country3
## ast oth 
##  85  10

num <-  with(socsupport, seq(along=gender))   # Generate row numbers  
lab <- paste(gender1, country3, num, sep=":") 

x <-  c(8, 54, 534, 1630, 6611) 
seq(1, length(x))

## [1] 1 2 3 4 5

seq(along=x)

## [1] 1 2 3 4 5

head(vlt, 4)       # first few rows of vlt

##   window1 window2 window3 prize night
## 1       2       0       0     0     1
## 2       0       5       1     0     1
## 3       0       0       0     0     1
## 4       2       0       0     0     1

  # . . . 

vltcv <- stack(vlt[, 1:3]) 
head(vltcv)          # first few rows of vltcv

##   values     ind
## 1      2 window1
## 2      0 window1
## 3      0 window1
## 4      2 window1
## 5      0 window1
## 6      0 window1

table(vltcv$values, vltcv$ind)

##    
##     window1 window2 window3
##   0     141     192     200
##   1     132      23      99
##   2      30      65      11
##   3      14      46       3
##   5       7      10      14
##   6       6       1       6
##   7      15       8      12

  # More cryptically, table(vltcv) gives the same result.  

oldpar <- par(mfrow=c(2,4)) 
for (i in 2:9){ 
plot(austpop[, 1], log(austpop[, i]), xlab="Year",  
     ylab=names(austpop)[i], pch=16, ylim=c(0,10))} 
par(oldpar)

intro.R

johnm1

Wed Aug 27 21:58:54 2014