## Chapter 1: A Brief Introduction to R
## Sec 1.1: An Overview of R
## ss 1.1.1: A Short R Session
## { R} must be installed!
## Using the console (or command line) window
2+2
## [1] 4
2*3*4*5 # * denotes 'multiply'
## [1] 120
sqrt(10) # the square root of 10
## [1] 3.162
pi # R knows about pi
## [1] 3.142
2*pi*6378 # Circumference of earth at equator (km)
## [1] 40074
3*4^
2
## [1] 48
3*4^2; (3*4)^2
## [1] 48
## [1] 144
## Entry of data at the command line
Year <- c(1800, 1850, 1900, 1950, 2000)
Carbon <- c(8, 54, 534, 1630, 6611)
## Now plot Carbon as a function of Year
plot(Carbon ~ Year, pch=16)

## Collection of vectors into a data frame
fossilfuel <- data.frame(year=Year, carbon=Carbon)
fossilfuel # Display the contents of the data frame.
## year carbon
## 1 1800 8
## 2 1850 54
## 3 1900 534
## 4 1950 1630
## 5 2000 6611
rm(Year, Carbon) # The rm() function removes unwanted objects
plot(carbon ~ year, data=fossilfuel, pch=16)

## The R Commander Graphical User Interface (GUI) to R
## The working directory and the contents of the workspace
getwd()
## [1] "/Users/johnm1/r3/scripts"
ls()
## [1] "fossilfuel" "metadata"
## Quitting { R}
# q()
## Installation of packages
install.packages("DAAG")
##
## The downloaded binary packages are in
## /var/folders/00/_kpyywm16hnbs2c0dvlf0mwr0000gq/T//RtmpWtVh3X/downloaded_packages
install.packages(c("magic", "schoolmath"), dependencies=TRUE)
##
## The downloaded binary packages are in
## /var/folders/00/_kpyywm16hnbs2c0dvlf0mwr0000gq/T//RtmpWtVh3X/downloaded_packages
## ss 1.1.2: The uses of R
## { R} offers an extensive collection of functions and abilities
range(fossilfuel$carbon)
## [1] 8 6611
## 4 cities
fourcities <- c("Toronto", "Canberra", "New York", "London")
## display in alphabetical order
sort(fourcities)
## [1] "Canberra" "London" "New York" "Toronto"
## Find the number of characters in "Toronto"
nchar("Toronto")
## [1] 7
## Find the number of characters in all four city names at once
nchar(fourcities)
## [1] 7 8 8 6
## { R} will give numerical or graphical data summaries
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2
## 1st Qu.:12.0 1st Qu.: 26
## Median :15.0 Median : 36
## Mean :15.4 Mean : 43
## 3rd Qu.:19.0 3rd Qu.: 56
## Max. :25.0 Max. :120
hist(cars$speed)

## { R} is an interactive programming language
celsius <- (0:4)*10
fahrenheit <- 9/5*celsius+32
conversion <- data.frame(Celsius=celsius, Fahrenheit=fahrenheit)
print(conversion)
## Celsius Fahrenheit
## 1 0 32
## 2 10 50
## 3 20 68
## 4 30 86
## 5 40 104
## ss 1.1.3: Online Help
?plot # Equivalent to help(plot)
apropos("sort") # Try, also, apropos ("sor")
## [1] "is.unsorted" "sort" "sort.default" "sort.int"
## [5] "sort.list" "sort.POSIXlt" "sortedXyData"
# List all functions where "sort" is part of the name
help.search("sort") # Note that the argument is 'sort'
# List functions with 'sort' in the help page title or as an alias
example(image)
##
## image> require(grDevices) # for colours
##
## image> x <- y <- seq(-4*pi, 4*pi, len = 27)
##
## image> r <- sqrt(outer(x^2, y^2, "+"))
##
## image> image(z = z <- cos(r^2)*exp(-r/6), col = gray((0:32)/32))

##
## image> image(z, axes = FALSE, main = "Math can be beautiful ...",
## image+ xlab = expression(cos(r^2) * e^{-r/6}))

##
## image> contour(z, add = TRUE, drawlabels = FALSE)
##
## image> # Volcano data visualized as matrix. Need to transpose and flip
## image> # matrix horizontally.
## image> image(t(volcano)[ncol(volcano):1,])

##
## image> # A prettier display of the volcano
## image> x <- 10*(1:nrow(volcano))
##
## image> y <- 10*(1:ncol(volcano))
##
## image> image(x, y, volcano, col = terrain.colors(100), axes = FALSE)

##
## image> contour(x, y, volcano, levels = seq(90, 200, by = 5),
## image+ add = TRUE, col = "peru")
##
## image> axis(1, at = seq(100, 800, by = 100))
##
## image> axis(2, at = seq(100, 600, by = 100))
##
## image> box()
##
## image> title(main = "Maunga Whau Volcano", font.main = 4)
par(ask=FALSE) # turn off the prompts
## Wide-ranging information access and searches
## Help in finding the right package
## ss 1.1.4: Input of data from a file
library(DAAG)
## Loading required package: lattice
##
## Attaching package: 'DAAG'
##
## The following object is masked _by_ '.GlobalEnv':
##
## fossilfuel
datafile('fuel') ## Places file in working directory,
##
## Data written to file: fuel.txt
## ready for input.
fossilfuel <- read.table("fuel.txt", header=TRUE)
write.csv(fossilfuel, file='fuel.csv')
fossilfuel <- read.table("fuel.csv", header=TRUE, sep=",")
## ss 1.1.5: R Packages
library(DAAG)
sessionInfo()
## R version 3.1.1 Patched (2014-08-22 r66458)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DAAG_1.20 lattice_0.20-29
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.4 evaluate_0.5.5 formatR_0.10
## [4] grid_3.1.1 htmltools_0.2.4 knitr_1.6
## [7] latticeExtra_0.6-26 RColorBrewer_1.0-5 rmarkdown_0.2.49
## [10] stringr_0.6.2 tools_3.1.1
## Data sets that accompany { R} packages
data(package="datasets") # Specify 'package', not 'library'.
## ss 1.1.6: Further steps in learning R
## Sec 1.2: Vectors, Factors and Univariate Time Series
## ss 1.2.1: Vectors
c(2, 3, 5, 2, 7, 1)
## [1] 2 3 5 2 7 1
c(T, F, F, F, T, T, F)
## [1] TRUE FALSE FALSE FALSE TRUE TRUE FALSE
c("Canberra", "Sydney", "Canberra", "Sydney")
## [1] "Canberra" "Sydney" "Canberra" "Sydney"
## ss 1.2.2: Concatenation -- joining vector objects
x <- c(2, 3, 5, 2, 7, 1) # x then holds values 2, 3, 5, 2, 7, 1
x
## [1] 2 3 5 2 7 1
y <- c(10, 15, 12)
y
## [1] 10 15 12
z <- c(x, y)
z
## [1] 2 3 5 2 7 1 10 15 12
## ss 1.2.3: The use of relational operators to compare vector elements
x <- c(3, 11, 8, 15, 12)
x > 8
## [1] FALSE TRUE FALSE TRUE TRUE
x != 8
## [1] TRUE TRUE FALSE TRUE TRUE
## ss 1.2.4: The use of square brackets to extract subsets of vectors
x <- c(3, 11, 8, 15, 12)
x[c(2,4)] # Elements in positions 2
## [1] 11 15
x[-c(2,3)] # Remove the elements in positions 2 and 3
## [1] 3 15 12
x > 10
## [1] FALSE TRUE FALSE TRUE TRUE
x[x > 10]
## [1] 11 15 12
heights <- c(Andreas=178, John=185, Jeff=183)
heights[c("John","Jeff")]
## John Jeff
## 185 183
## ss 1.2.5: Patterned data
5:15
## [1] 5 6 7 8 9 10 11 12 13 14 15
seq(from=5, to=22, by=3) # The first value is 5. The final
## [1] 5 8 11 14 17 20
rep(c(2,3,5), 4)
## [1] 2 3 5 2 3 5 2 3 5 2 3 5
c(rep("female", 3), rep("male", 2))
## [1] "female" "female" "female" "male" "male"
## ss 1.2.6: Missing values
library(DAAG)
nbranch <- subset(rainforest, species=="Acacia mabellae")$branch
nbranch # Number of small branches (2cm or less)
## [1] NA 35 41 50 NA NA NA NA NA 4 30 13 10 17 46 92
mean(nbranch)
## [1] NA
mean(nbranch, na.rm=TRUE)
## [1] 33.8
NA == 35
## [1] NA
## Replace all NAs by -999
nbranch[is.na(nbranch)] <- -999
nbranch
## [1] -999 35 41 50 -999 -999 -999 -999 -999 4 30 13 10 17
## [15] 46 92
## There is now no protection against use of the -999 values as
## if they were legitimate numeric values
mean(nbranch)
## [1] -353.5
## ss 1.2.7: Factors
## Create character vector
gender <- c(rep("female",691), rep("male",692))
levels(gender) # For a character vector, this returns NULL
## NULL
## From character vector, create factor
gender <- factor(gender)
levels(gender)
## [1] "female" "male"
levels(gender)
## [1] "female" "male"
gender <- factor(gender, levels=c("male", "female"))
## ss 1.2.8: Time series
## Footnote Code
## Alternatively, obtain from data frame jobs (DAAG)
library(DAAG)
numjobs <- jobs$Prairies
numjobs <- c(982,981,984,982,981,983,983,983,983,979,973,979,
974,981,985,987,986,980,983,983,988,994,990,999)
numjobs <- ts(numjobs, start=1995, frequency = 12)
plot(numjobs)

first15 <- window(numjobs, start=1995.75, end=1996.25)
## Sec 1.3: Data Frames and Matrices
Cars93.summary
## Min.passengers Max.passengers No.of.cars abbrev
## Compact 4 6 16 C
## Large 6 6 11 L
## Midsize 4 6 22 M
## Small 4 5 21 Sm
## Sporty 2 4 14 Sp
## Van 7 8 9 V
Cars93.summary <- edit(Cars93.summary)
## Displaying the first few, or last few, rows of a data frame
head(Cars93.summary, n=3) # Display the first 3 rows (the default is 6)
## Min.passengers Max.passengers No.of.cars abbrev
## Compact 4 6 16 C
## Large 6 6 11 L
## Midsize 4 6 22 M
# . . .
## Column and row names
rownames(Cars93.summary) # Extract names of rows
## [1] "Compact" "Large" "Midsize" "Small" "Sporty" "Van"
colnames(Cars93.summary) # Extract column names
## [1] "Min.passengers" "Max.passengers" "No.of.cars" "abbrev"
## names(Cars93.summary)[3] <- "numCars"
## names(Cars93.summary) <- c("minPass","maxPass","numCars","code")
## Subsets of data frames
Cars93.summary[1:3, 2:3] # Rows 1-3 and columns 2-3
## Max.passengers No.of.cars
## Compact 6 16
## Large 6 11
## Midsize 6 22
Cars93.summary[, 2:3] # Columns 2-3 (all rows)
## Max.passengers No.of.cars
## Compact 6 16
## Large 6 11
## Midsize 6 22
## Small 5 21
## Sporty 4 14
## Van 8 9
Cars93.summary[, c("No.of.cars", "abbrev")] # Cols 2-3, by name
## No.of.cars abbrev
## Compact 16 C
## Large 11 L
## Midsize 22 M
## Small 21 Sm
## Sporty 14 Sp
## Van 9 V
Cars93.summary[, -c(2,3)] # omit columns 2 and 3
## Min.passengers abbrev
## Compact 4 C
## Large 6 L
## Midsize 4 M
## Small 4 Sm
## Sporty 2 Sp
## Van 7 V
subset(Cars93.summary,
subset=c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE))
## Min.passengers Max.passengers No.of.cars abbrev
## Compact 4 6 16 C
## Large 6 6 11 L
## Data frames are a specialized type of list
## Cities with more than 2.5 million inhabitants
USACanada <- list(USACities=c("NY", "LA", "Chicago"),
CanadaCities=c("Toronto", "Montreal"),
millionsPop=c(USA=305.9, Canada=31.6))
USACanada
## $USACities
## [1] "NY" "LA" "Chicago"
##
## $CanadaCities
## [1] "Toronto" "Montreal"
##
## $millionsPop
## USA Canada
## 305.9 31.6
## ss 1.3.1: Accessing the columns of data frames -- {with()} and {attach()}
## cfseal (DAAG) has data on Cape Fur seals
with(cfseal, c(mean(weight), median(weight)))
## [1] 54.79 46.25
with(pair65, # stretch of rubber bands, from DAAG
{lenchange <- heated-ambient
c(mean(lenchange), median(lenchange))
})
## [1] 6.333 6.000
## year
attach(fossilfuel) # Attach data frame fossilfuel
year
## [1] 1800 1850 1900 1950 2000
detach(fossilfuel) # Detach data frame
## ss 1.3.2: Aggregation, stacking and unstacking
chickwtAvs <- with(chickwts,
aggregate(weight, by=list(feed), mean))
names(chickwtAvs) <- c("Feed Group", "Mean Weight")
chickwtAvs
## Feed Group Mean Weight
## 1 casein 323.6
## 2 horsebean 160.2
## 3 linseed 218.8
## 4 meatmeal 276.9
## 5 soybean 246.4
## 6 sunflower 328.9
library(DAAG)
head(jobs,3)
## BC Alberta Prairies Ontario Quebec Atlantic Date
## 1 1752 1366 982 5239 3196 947 95.00
## 2 1737 1369 981 5233 3205 946 95.08
## 3 1765 1380 984 5212 3191 954 95.17
# . . .
Jobs <- stack(jobs, select = 1:6)
# stack() concatenates selected data frame columns into a
# single column named "values", & adds a factor named "ind"
# that has the names of the concatenated columns as levels.
head(Jobs,3)
## values ind
## 1 1752 BC
## 2 1737 BC
## 3 1765 BC
# . . .
## ss 1.3.3: *~Data frames and matrices
fossilfuelmat <- matrix(c(1800, 1850, 1900, 1950, 2000,
8, 54, 534, 1630, 6611), nrow=5)
colnames(fossilfuel) <- c("year", "carbon")
fossilfuelmat <- cbind(year=c(1800, 1850, 1900, 1950, 2000),
carbon=c(8, 54, 534, 1630, 6611))
## Sec 1.4: Functions, Operators and Loops
## ss 1.4.1: Common useful built-in functions
# all() # returns TRUE if all values are TRUE
# any() # returns TRUE if any values are TRUE
# args() # information on the arguments to a function
# cat() # prints multiple objects, one after the other
# cumprod() # cumulative product
# cumsum() # cumulative sum
# diff() # form vector of first differences
# # N. B. diff(x) has one less element than x
# history() # displays previous commands used
# is.factor() # returns TRUE if the argument is a factor
# is.na() # returns TRUE if the argument is an NA
# # NB also is.logical(), is.matrix(), etc.
# length() # number of elements in a vector or of a list
# ls() # list names of objects in the workspace
# mean() # mean of the elements of a vector
# median() # median of the elements of a vector
# order() # x[order(x)] sorts x (by default, NAs are last)
# print() # prints a single R object
# range() # minimum and maximum value elements of vector
# sort() # sort elements into order, by default omitting NAs
# rev() # reverse the order of vector elements
# str() # information on an R object
# unique() # form the vector of distinct values
# which() # locates 'TRUE' indices of logical vectors
# which.max() # locates (first) maximum of a numeric vector
# which.min() # locates (first) minimum of a numeric vector
# with() # do computation using columns of specified data frame
## The {print()} function
x <- 2 # Assign to x the value 2; nothing is printed
x # Equivalent to print(x)
## [1] 2
x*5 # Equivalent to print(x*5)
## [1] 10
(x <- 2) # Equivalent to: x <- 2; print(x)
## [1] 2
## Calculations in parallel across all elements of a vector
## Data summary functions -- {table()}and { {sapply()}
## table() # Form a table of counts
## xtabs() # Form a table of totals
library(DAAG) # tinting is from DAAG
table(Sex=tinting$sex, AgeGroup=tinting$agegp)
## AgeGroup
## Sex younger older
## f 63 28
## m 28 63
sapply(jobs[, -7], range)
## BC Alberta Prairies Ontario Quebec Atlantic
## [1,] 1737 1366 973 5212 3167 941
## [2,] 1840 1436 999 5360 3257 968
## Utility functions
ls(pattern="p") # List object names that include the letter "p"
## character(0)
ls(pattern="^p") # List object names that start with "p"
## character(0)
## ss 1.4.2: Generic functions, and the class of an object
## ss 1.4.3: User-written functions
mean.and.sd <- function(x){
av <- mean(x)
sdev <- sd(x)
c(mean=av, SD=sdev)
}
distance <- c(148,182,173,166,109,141,166)
mean.and.sd(distance)
## mean SD
## 155.00 24.68
mean.and.sd <- function(x = rnorm(10)){
av <- mean(x)
sdev <- sd(x)
c(mean=av, SD=sdev)
}
mean.and.sd()
## mean SD
## -0.9827 0.7479
## The structure of functions
## Footnote Code
## Thus, to return the mean, SD and name of the input vector
## replace c(mean=av, SD=sdev) by
## list(mean=av, SD=sdev, dataset = deparse(substitute(x)))
## ss 1.4.4: If statements
Carbon <- fossilfuel$carbon
if (mean(Carbon) > median(Carbon)) print("Mean > Median") else
print("Median <= Mean")
## [1] "Median <= Mean"
dist <- c(148, 182, 173, 166, 109, 141, 166)
dist.sort <- if (dist[1] < 150)
sort(dist, decreasing=TRUE) else sort(dist)
dist.sort
## [1] 182 173 166 166 148 141 109
## ss 1.4.5: Selection and matching
x <- rep(1:5, rep(3,5))
x[x %in% c(2,4)]
## [1] 2 2 2 4 4 4
match(x, c(2,4), nomatch=0)
## [1] 0 0 0 1 1 1 0 0 0 2 2 2 0 0 0
## ss 1.4.6: Functions for working with missing values
## Identification of rows that include missing values
## Which rows have missing values: data frame science (DAAG)
science[!complete.cases(science), ]
## State PrivPub school class sex like Class
## 671 ACT public 19 1 <NA> 5 19.1
## 672 ACT public 19 1 <NA> 5 19.1
dim(science)
## [1] 1385 7
Science <- na.omit(science)
dim(Science)
## [1] 1383 7
## ss 1.4.7: \kern-4pt* Looping
for (i in 1:3) print(i)
## [1] 1
## [1] 2
## [1] 3
## Relative population increase in Australian states: 1917-1997
## Data frame austpop (DAAG)
relGrowth <- numeric(8) # numeric(8) creates a numeric vector
# with 8 elements, all set equal to 0
for (j in seq(from=2, to=9)) {
relGrowth[j-1] <- (austpop[9, j]-austpop[1, j])/
austpop[1, j]}
names(relGrowth) <- names(austpop[c(-1,-10)])
# We have used names() to name the elements of relGrowth
relGrowth # Output is with options(digits=3)
## NSW Vic Qld SA WA Tas NT ACT
## 2.295 2.268 3.980 2.364 4.876 1.456 36.400 102.333
## Sec 1.5: Graphics in R
demo(graphics)
##
##
## demo(graphics)
## ---- ~~~~~~~~
##
## > # Copyright (C) 1997-2009 The R Core Team
## >
## > require(datasets)
##
## > require(grDevices); require(graphics)
##
## > ## Here is some code which illustrates some of the differences between
## > ## R and S graphics capabilities. Note that colors are generally specified
## > ## by a character string name (taken from the X11 rgb.txt file) and that line
## > ## textures are given similarly. The parameter "bg" sets the background
## > ## parameter for the plot and there is also an "fg" parameter which sets
## > ## the foreground color.
## >
## >
## > x <- stats::rnorm(50)
##
## > opar <- par(bg = "white")
##
## > plot(x, ann = FALSE, type = "n")

##
## > abline(h = 0, col = gray(.90))
##
## > lines(x, col = "green4", lty = "dotted")
##
## > points(x, bg = "limegreen", pch = 21)
##
## > title(main = "Simple Use of Color In a Plot",
## + xlab = "Just a Whisper of a Label",
## + col.main = "blue", col.lab = gray(.8),
## + cex.main = 1.2, cex.lab = 1.0, font.main = 4, font.lab = 3)
##
## > ## A little color wheel. This code just plots equally spaced hues in
## > ## a pie chart. If you have a cheap SVGA monitor (like me) you will
## > ## probably find that numerically equispaced does not mean visually
## > ## equispaced. On my display at home, these colors tend to cluster at
## > ## the RGB primaries. On the other hand on the SGI Indy at work the
## > ## effect is near perfect.
## >
## > par(bg = "gray")
##
## > pie(rep(1,24), col = rainbow(24), radius = 0.9)

##
## > title(main = "A Sample Color Wheel", cex.main = 1.4, font.main = 3)
##
## > title(xlab = "(Use this as a test of monitor linearity)",
## + cex.lab = 0.8, font.lab = 3)
##
## > ## We have already confessed to having these. This is just showing off X11
## > ## color names (and the example (from the postscript manual) is pretty "cute".
## >
## > pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12)
##
## > names(pie.sales) <- c("Blueberry", "Cherry",
## + "Apple", "Boston Cream", "Other", "Vanilla Cream")
##
## > pie(pie.sales,
## + col = c("purple","violetred1","green3","cornsilk","cyan","white"))

##
## > title(main = "January Pie Sales", cex.main = 1.8, font.main = 1)
##
## > title(xlab = "(Don't try this at home kids)", cex.lab = 0.8, font.lab = 3)
##
## > ## Boxplots: I couldn't resist the capability for filling the "box".
## > ## The use of color seems like a useful addition, it focuses attention
## > ## on the central bulk of the data.
## >
## > par(bg="cornsilk")
##
## > n <- 10
##
## > g <- gl(n, 100, n*100)
##
## > x <- rnorm(n*100) + sqrt(as.numeric(g))
##
## > boxplot(split(x,g), col="lavender", notch=TRUE)

##
## > title(main="Notched Boxplots", xlab="Group", font.main=4, font.lab=1)
##
## > ## An example showing how to fill between curves.
## >
## > par(bg="white")
##
## > n <- 100
##
## > x <- c(0,cumsum(rnorm(n)))
##
## > y <- c(0,cumsum(rnorm(n)))
##
## > xx <- c(0:n, n:0)
##
## > yy <- c(x, rev(y))
##
## > plot(xx, yy, type="n", xlab="Time", ylab="Distance")

##
## > polygon(xx, yy, col="gray")
##
## > title("Distance Between Brownian Motions")
##
## > ## Colored plot margins, axis labels and titles. You do need to be
## > ## careful with these kinds of effects. It's easy to go completely
## > ## over the top and you can end up with your lunch all over the keyboard.
## > ## On the other hand, my market research clients love it.
## >
## > x <- c(0.00, 0.40, 0.86, 0.85, 0.69, 0.48, 0.54, 1.09, 1.11, 1.73, 2.05, 2.02)
##
## > par(bg="lightgray")
##
## > plot(x, type="n", axes=FALSE, ann=FALSE)

##
## > usr <- par("usr")
##
## > rect(usr[1], usr[3], usr[2], usr[4], col="cornsilk", border="black")
##
## > lines(x, col="blue")
##
## > points(x, pch=21, bg="lightcyan", cex=1.25)
##
## > axis(2, col.axis="blue", las=1)
##
## > axis(1, at=1:12, lab=month.abb, col.axis="blue")
##
## > box()
##
## > title(main= "The Level of Interest in R", font.main=4, col.main="red")
##
## > title(xlab= "1996", col.lab="red")
##
## > ## A filled histogram, showing how to change the font used for the
## > ## main title without changing the other annotation.
## >
## > par(bg="cornsilk")
##
## > x <- rnorm(1000)
##
## > hist(x, xlim=range(-4, 4, x), col="lavender", main="")

##
## > title(main="1000 Normal Random Variates", font.main=3)
##
## > ## A scatterplot matrix
## > ## The good old Iris data (yet again)
## >
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", font.main=4, pch=19)

##
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", pch=21,
## + bg = c("red", "green3", "blue")[unclass(iris$Species)])

##
## > ## Contour plotting
## > ## This produces a topographic map of one of Auckland's many volcanic "peaks".
## >
## > x <- 10*1:nrow(volcano)
##
## > y <- 10*1:ncol(volcano)
##
## > lev <- pretty(range(volcano), 10)
##
## > par(bg = "lightcyan")
##
## > pin <- par("pin")
##
## > xdelta <- diff(range(x))
##
## > ydelta <- diff(range(y))
##
## > xscale <- pin[1]/xdelta
##
## > yscale <- pin[2]/ydelta
##
## > scale <- min(xscale, yscale)
##
## > xadd <- 0.5*(pin[1]/scale - xdelta)
##
## > yadd <- 0.5*(pin[2]/scale - ydelta)
##
## > plot(numeric(0), numeric(0),
## + xlim = range(x)+c(-1,1)*xadd, ylim = range(y)+c(-1,1)*yadd,
## + type = "n", ann = FALSE)

##
## > usr <- par("usr")
##
## > rect(usr[1], usr[3], usr[2], usr[4], col="green3")
##
## > contour(x, y, volcano, levels = lev, col="yellow", lty="solid", add=TRUE)
##
## > box()
##
## > title("A Topographic Map of Maunga Whau", font= 4)
##
## > title(xlab = "Meters North", ylab = "Meters West", font= 3)
##
## > mtext("10 Meter Contour Spacing", side=3, line=0.35, outer=FALSE,
## + at = mean(par("usr")[1:2]), cex=0.7, font=3)
##
## > ## Conditioning plots
## >
## > par(bg="cornsilk")
##
## > coplot(lat ~ long | depth, data = quakes, pch = 21, bg = "green3")

##
## > par(opar)
## ss 1.5.1: The function {plot( )} and allied functions
plot(Brainwt ~ Bodywt, data=primates) # plot(y ~ x) syntax

with(primates, plot(Bodywt, Brainwt)) # plot(x, y) syntax

## Adding points, lines, text and axis annotation
## Place labels on points
plot(Brainwt ~ Bodywt, xlim=c(0, 300), data=primates)
# Specify xlim so that there is room for the labels
with(primates,
text(Brainwt ~ Bodywt, labels=row.names(primates), pos=4))

# pos=4 places text to the right of the points. Other
# possibilities are: 1: below; 2: to the left; 3: above
## Footnote Code
## Plot Brainwt vs Bodywt, primates data frame
plot(Brainwt ~ Bodywt, xlim=c(0, 300), ylim=c(0,1500), data=primates)
yoff <- c(-.125,0,0,.125,0)*par()$cxy[2]
with(primates, text(x=Bodywt, y=Brainwt+yoff, labels=row.names(primates), pos=4))

## Fine control -- parameter settings
oldpar <- par(cex=1.25)
# Use par(oldpar) to restore previous settings
## ss 1.5.2: The use of color
theta <- (1:50)*0.92
plot(theta, sin(theta), col=1:50, pch=16, cex=4)
points(theta, cos(theta), col=51:100, pch=15, cex=4)

palette() # Names of the colors in the current palette
## [1] "black" "red" "green3" "blue" "cyan" "magenta" "yellow"
## [8] "gray"
plot(theta, sin(theta), col=colors()[1:50], pch=16, cex=4)
points(theta, cos(theta), col=colors()[51:100], pch=15, cex=4)

## ss 1.5.3: The importance of aspect ratio
## Plot sin(theta) vs theta, at regularly spaced values of theta
## sin() expects angles to be in radians
# multiply angles in degrees by pi/180 to get radians
plot((0:20)*pi/10, sin((0:20)*pi/10))

plot((1:50)*0.92, sin((1:50)*0.92))

par(mfrow=c(3,1)) # Gives a 3 by 1 layout of plots
plot((1:50)*0.92, sin((1:50)*0.92))
par(mfrow=c(1,1))
## ss 1.5.4: Dimensions and other settings for graphics devices
## ss 1.5.5: The plotting of expressions and mathematical symbols
symbols(x=1.5, y=0, circles=1.2, xlim=c(0,3), ylim=c(-1.5,1.5),
bg="gray", inches=FALSE)
# inches=FALSE ensures that radius is in x-axis units
text(1.5, 0.5, expression("Area" == pi*phantom("'")*italic(r)^2))
# Use '==' to insert '='.
# Text or symbols that appear either side of '*' are juxtaposed.
# Notice the use of phantom("'") to insert a small space.
## Footnote Code
## To add the double-headed arrow and associated label, specify:
arrows(1.5, 0, 2.7, 0, length=.1, code=3) # code=3: arrows at both ends
# length is the length of the arrow head (in inches!)
text(2.1, -strheight("R"), expression(italic(r) == 1.2))

## ss 1.5.6: Identification and location on the figure region
plot(Brainwt ~ Bodywt, data=primates)
with(primates,
identify(Brainwt ~ Bodywt, labels=row.names(primates), n=2))

## integer(0)
# Now click near 2 plotted points
## ss 1.5.7: Plot methods for objects other than vectors
## Use plot() with data frame trees (datasets)
plot(trees) # Gives a 3 x 3 layout of pairwise

# scatterplots among the three variables
## ss 1.5.8: Lattice (trellis) Graphics
## Lattice graphics versus base graphics -- {xyplot()} versus {plot()}
## Plot Brainwt vs Bodywt, data frame primates (DAAG)
plot(Brainwt ~ Bodywt, data=primates) # base graphics

# 'base' graphics use the abilities of the graphics package
library(lattice)
xyplot(Brainwt ~ Bodywt, data=primates) # lattice

invisible(plot(Brainwt ~ Bodywt, data=primates)) # Graph appears

invisible(xyplot(Brainwt ~ Bodywt, data=primates)) # No graph
print(xyplot(ACT ~ year, data=austpop))

## Panels of scatterplots -- the use of xyplot()
trellis.device(color=FALSE)
xyplot(ht ~ wt | sport, groups=sex, pch=c(4,1), aspect=1, data=ais,
auto.key=list(columns=2), subset=sport%in%c("Row","Swim"))
dev.off() # Close device
## pdf
## 2
trellis.device() # Start new device, by default with color=TRUE
## Plotting columns in parallel
xyplot(Prairies+Atlantic ~ Date, outer=TRUE, data=jobs)
## Selected lattice functions
# dotplot(factor ~ numeric,..) # 1-dim. Display
# stripplot(factor ~ numeric,..) # 1-dim. Display
# barchart(character ~ numeric,..)
# histogram( ~ numeric,..)
# densityplot( ~ numeric,..) # Density plot
# bwplot(factor ~ numeric,..) # Box and whisker plot
# qqmath(factor ~ numeric,..) # normal probability plots
# splom( ~ dataframe,..) # Scatterplot matrix
# parallel( ~ dataframe,..) # Parallel coordinate plots
# cloud(numeric ~ numeric * numeric, ...) # 3D surface
# wireframe(numeric ~ numeric * numeric, ...) # 3D scatterplot
## ss 1.5.9: Good and bad graphs
## Footnote Code
## Example of plotting with different shades of gray
plot(1:4, 1:4, pch=16, col=c("gray20","gray40","gray60","gray80"), cex=3)
## ss 1.5.10: Further information on graphics
## Sec 1.6: Additional Points on the Use of R
## *Workspace management strategies
## Forward slashes and backslashes
## Setting the number of decimal places in output
sqrt(10)
## [1] 3.162
options(digits=2) # Change until further notice,
sqrt(10)
## [1] 3.2
## Other option settings
## Cosmetic issues
## \kern-4pt*\, Common sources of difficulty
## Variable names in data sets
## Sec 1.7: Recap
(x <- 2) # Equivalent to: x <- 2; print(x)
## [1] 2
## Sec 1.8: Further Reading and Study
attach(Manitoba.lakes)
plot(log2(area) ~ elevation, pch=16, xlim=c(170,280))
# NB: Doubling the area increases log2(area) by 1.0
text(log2(area) ~ elevation,
labels=row.names(Manitoba.lakes), pos=4)
text(log2(area) ~ elevation, labels=area, pos=2)
title("Manitoba's Largest Lakes")
detach(Manitoba.lakes)
rep(c(2,3,5), c(4,3,2))
## [1] 2 2 2 2 3 3 3 5 5
rep(c(2,3,5), 4:2)
## [1] 2 2 2 2 3 3 3 5 5
1000*((1+0.075)^5 - 1) # Interest on $1000, compounded
## [1] 436
# annually at 7.5% p.a. for five years
gender <- factor(c(rep("female", 91), rep("male", 92)))
table(gender)
## gender
## female male
## 91 92
gender <- factor(gender, levels=c("male", "female"))
table(gender)
## gender
## male female
## 92 91
gender <- factor(gender, levels=c("Male", "female"))
# Note the mistake: "Male" should be "male"
table(gender)
## gender
## Male female
## 0 91
table(gender, exclude=NULL)
## gender
## Male female <NA>
## 0 91 92
rm(gender) # Remove gender
par(mfrow=c(2,2)) # 2 by 2 layout on the page
library(MASS) # Animals is in the MASS package
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:DAAG':
##
## hills
plot(brain ~ body, data=Animals)
plot(sqrt(brain) ~ sqrt(body), data=Animals)
plot(I(brain^0.1) ~ I(body^0.1), data=Animals)
# I() forces its argument to be treated "as is"
plot(log(brain) ~ log(body), data=Animals)
par(mfrow=c(1,1)) # Restore to 1 figure per page
plot(BDI ~ age, data=socsupport)
plot(BDI ~ unclass(age), data=socsupport)
gender1 <- with(socsupport, abbreviate(gender, 1))
table(gender1) # Examine the result
## gender1
## f m
## 71 24
country3 <- with(socsupport, abbreviate(country, 3))
table(country3) # Examine the result
## country3
## ast oth
## 85 10
num <- with(socsupport, seq(along=gender)) # Generate row numbers
lab <- paste(gender1, country3, num, sep=":")
x <- c(8, 54, 534, 1630, 6611)
seq(1, length(x))
## [1] 1 2 3 4 5
seq(along=x)
## [1] 1 2 3 4 5
head(vlt, 4) # first few rows of vlt
## window1 window2 window3 prize night
## 1 2 0 0 0 1
## 2 0 5 1 0 1
## 3 0 0 0 0 1
## 4 2 0 0 0 1
# . . .
vltcv <- stack(vlt[, 1:3])
head(vltcv) # first few rows of vltcv
## values ind
## 1 2 window1
## 2 0 window1
## 3 0 window1
## 4 2 window1
## 5 0 window1
## 6 0 window1
table(vltcv$values, vltcv$ind)
##
## window1 window2 window3
## 0 141 192 200
## 1 132 23 99
## 2 30 65 11
## 3 14 46 3
## 5 7 10 14
## 6 6 1 6
## 7 15 8 12
# More cryptically, table(vltcv) gives the same result.
oldpar <- par(mfrow=c(2,4))
for (i in 2:9){
plot(austpop[, 1], log(austpop[, i]), xlab="Year",
ylab=names(austpop)[i], pch=16, ylim=c(0,10))}
par(oldpar)