## Chapter 1: A Brief Introduction to R 

## Sec 1.1: An Overview of R 
## ss 1.1.1: A Short R Session 
##                      { R} must be installed! 
##             Using the console (or command line) window 
2+2 
 

2*3*4*5          # * denotes 'multiply' 
sqrt(10)         # the square root of 10 
pi               # R knows about pi 
2*pi*6378        # Circumference of earth at equator (km) 

3*4^ 
2 

3*4^2; (3*4)^2 

##                 Entry of data at the command line 
Year <- c(1800, 1850, 1900, 1950, 2000) 
Carbon <- c(8, 54, 534, 1630, 6611) 
## Now plot Carbon as a function of Year 
plot(Carbon ~ Year, pch=16) 

##              Collection of vectors into a data frame 
fossilfuel <- data.frame(year=Year, carbon=Carbon) 
fossilfuel         # Display the contents of the data frame. 

rm(Year, Carbon)     # The rm() function removes unwanted objects 

plot(carbon ~ year, data=fossilfuel, pch=16) 

##        The R Commander Graphical User Interface (GUI) to R 
##      The working directory and the contents of the workspace 
getwd() 

ls() 

##                           Quitting { R} 
# q() 

##                      Installation of packages 
install.packages("DAAG") 
install.packages(c("magic", "schoolmath"), dependencies=TRUE) 

## ss 1.1.2: The uses of R 
##   { R} offers an extensive collection of functions and abilities 
range(fossilfuel$carbon) 

## 4 cities 
fourcities <- c("Toronto", "Canberra", "New York", "London") 
## display in alphabetical order 
sort(fourcities) 
## Find the number of characters in "Toronto" 
nchar("Toronto") 
 
## Find the number of characters in all four city names at once 
nchar(fourcities) 

##        { R} will give numerical or graphical data summaries 
summary(cars) 

hist(cars$speed) 

##            { R} is an interactive programming language 
celsius <- (0:4)*10 
fahrenheit <- 9/5*celsius+32 
conversion <- data.frame(Celsius=celsius, Fahrenheit=fahrenheit) 
print(conversion) 

## ss 1.1.3: Online Help 
?plot                # Equivalent to help(plot) 

apropos("sort")        # Try, also, apropos ("sor") 
 # List all functions where "sort" is part of the name  
help.search("sort")    # Note that the argument is 'sort' 
 # List functions with 'sort' in the help page title or as an alias 

example(image) 
par(ask=FALSE)       # turn off the prompts  

##            Wide-ranging information access and searches 
##                 Help in finding the right package 
## ss 1.1.4: Input of data from a file 
library(DAAG)
datafile('fuel')   ## Places file in working directory,
                   ## ready for input.
fossilfuel <- read.table("fuel.txt", header=TRUE) 

write.csv(fossilfuel, file='fuel.csv')
fossilfuel <- read.table("fuel.csv", header=TRUE, sep=",") 

## ss 1.1.5: R Packages 
library(DAAG) 
sessionInfo() 

##               Data sets that accompany { R} packages 
data(package="datasets")  # Specify 'package', not 'library'. 

## ss 1.1.6: Further steps in learning R 

## Sec 1.2: Vectors, Factors and Univariate Time Series 
## ss 1.2.1: Vectors 
c(2, 3, 5, 2, 7, 1) 
c(T, F, F, F, T, T, F) 
c("Canberra", "Sydney", "Canberra", "Sydney") 

## ss 1.2.2: Concatenation -- joining vector objects 
x <- c(2, 3, 5, 2, 7, 1)  # x then holds values 2, 3, 5, 2, 7, 1 
x 
y <- c(10, 15, 12) 
y 
z <- c(x, y) 
z 

## ss 1.2.3: The use of relational operators to compare vector elements 
x <- c(3, 11, 8, 15, 12) 
x > 8 
x != 8 

## ss 1.2.4: The use of square brackets to extract subsets of vectors 
x <- c(3, 11, 8, 15, 12)  
x[c(2,4)]                 # Elements in positions 2  

x[-c(2,3)]         # Remove the elements in positions 2 and 3 

x > 10 
x[x > 10] 

heights <- c(Andreas=178, John=185, Jeff=183) 
heights[c("John","Jeff")] 

## ss 1.2.5: Patterned data 
5:15 

seq(from=5, to=22, by=3)  # The first value is 5.  The final 

rep(c(2,3,5), 4) 

c(rep("female", 3), rep("male", 2)) 

## ss 1.2.6: Missing values 
library(DAAG) 
nbranch <- subset(rainforest, species=="Acacia mabellae")$branch 
nbranch            # Number of small branches (2cm or less) 

mean(nbranch) 
mean(nbranch, na.rm=TRUE) 

NA == 35 

## Replace all NAs by -999  
nbranch[is.na(nbranch)] <- -999 
nbranch 
## There is now no protection against use of the -999 values as 
## if they were legitimate numeric values 
mean(nbranch) 

## ss 1.2.7: Factors 
## Create character vector 
gender <- c(rep("female",691), rep("male",692)) 
levels(gender)     # For a character vector, this returns NULL 
## From character vector, create factor 
gender <- factor(gender) 
levels(gender) 

levels(gender) 

gender <- factor(gender, levels=c("male", "female")) 

## ss 1.2.8: Time series 
## Footnote Code
## Alternatively, obtain from data frame jobs (DAAG) 
library(DAAG) 
numjobs <- jobs$Prairies 

numjobs <- c(982,981,984,982,981,983,983,983,983,979,973,979, 
             974,981,985,987,986,980,983,983,988,994,990,999) 

numjobs <- ts(numjobs, start=1995, frequency = 12) 
plot(numjobs) 

first15 <- window(numjobs, start=1995.75, end=1996.25) 


## Sec 1.3: Data Frames and Matrices 
Cars93.summary 

Cars93.summary <- edit(Cars93.summary) 

##    Displaying the first few, or last few, rows of a data frame 
head(Cars93.summary, n=3)  # Display the first 3 rows (the default is 6) 
 # . . . 

##                        Column and row names 
rownames(Cars93.summary)   # Extract names of rows       
colnames(Cars93.summary)   # Extract column names 

## names(Cars93.summary)[3] <- "numCars" 
## names(Cars93.summary) <- c("minPass","maxPass","numCars","code") 

##                       Subsets of data frames 
Cars93.summary[1:3, 2:3]   # Rows 1-3 and columns 2-3 
Cars93.summary[, 2:3]      # Columns 2-3 (all rows) 
Cars93.summary[, c("No.of.cars", "abbrev")]   # Cols 2-3, by name 
Cars93.summary[, -c(2,3)]  # omit columns 2 and 3 

subset(Cars93.summary,  
       subset=c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE)) 

##             Data frames are a specialized type of list 
## Cities with more than 2.5 million inhabitants 
USACanada <- list(USACities=c("NY", "LA", "Chicago"),  
                  CanadaCities=c("Toronto", "Montreal"), 
                  millionsPop=c(USA=305.9, Canada=31.6)) 
 
USACanada 

## ss 1.3.1: Accessing the columns of data frames -- {with()} and {attach()} 
## cfseal (DAAG) has data on Cape Fur seals 
with(cfseal, c(mean(weight), median(weight))) 

with(pair65,       # stretch of rubber bands, from DAAG 
  {lenchange <- heated-ambient 
   c(mean(lenchange), median(lenchange)) 
}) 

## year 
attach(fossilfuel)  # Attach data frame fossilfuel 
year 
detach(fossilfuel)  # Detach data frame 

## ss 1.3.2: Aggregation, stacking and unstacking 
chickwtAvs <- with(chickwts,  
                   aggregate(weight, by=list(feed), mean)) 
names(chickwtAvs) <- c("Feed Group", "Mean Weight") 
chickwtAvs  

library(DAAG) 
head(jobs,3) 
  # . . . 
Jobs <- stack(jobs, select = 1:6)  
  # stack() concatenates selected data frame columns into a 
  # single column named "values", & adds a factor named "ind" 
  # that has the names of the concatenated columns as levels. 
head(Jobs,3) 
  # . . . 

## ss 1.3.3: *~Data frames and matrices 
fossilfuelmat <- matrix(c(1800, 1850, 1900, 1950, 2000,  
                          8, 54, 534, 1630, 6611), nrow=5) 
colnames(fossilfuel) <- c("year", "carbon") 

fossilfuelmat <- cbind(year=c(1800, 1850, 1900, 1950, 2000), 
                       carbon=c(8, 54, 534, 1630, 6611)) 


## Sec 1.4: Functions, Operators and Loops 
## ss 1.4.1: Common useful built-in functions 
# all()       # returns TRUE if all values are TRUE 
# any()       # returns TRUE if any values are TRUE 
# args()      # information on the arguments to a function 
# cat()       # prints multiple objects, one after the other 
# cumprod()   # cumulative product 
# cumsum()    # cumulative sum 
# diff()      # form vector of first differences  
#             # N. B. diff(x) has one less element than x 
# history()   # displays previous commands used 
# is.factor() # returns TRUE if the argument is a factor 
# is.na()     # returns TRUE if the argument is an NA 
#             # NB also is.logical(), is.matrix(), etc. 
# length()    # number of elements in a vector or of a list 
# ls()        # list names of objects in the workspace 
# mean()      # mean of the elements of a vector 
# median()    # median of the elements of a vector 
# order()     # x[order(x)] sorts x (by default, NAs are last) 
# print()     # prints a single R object 
# range()     # minimum and maximum value elements of vector 
# sort()      # sort elements into order, by default omitting NAs 
# rev()       # reverse the order of vector elements 
# str()       # information on an R object 
# unique()    # form the vector of distinct values 
# which()     # locates 'TRUE' indices of logical vectors  
# which.max() # locates (first) maximum of a numeric vector 
# which.min() # locates (first) minimum of a numeric vector 
# with()      # do computation using columns of specified data frame 

##                       The {print()} function 
x <- 2    # Assign to x the value 2; nothing is printed 
x         # Equivalent to print(x) 
x*5       # Equivalent to print(x*5) 

(x <- 2)  # Equivalent to: x <- 2; print(x) 

##      Calculations in parallel across all elements of a vector 
##        Data summary functions -- {table()}and { {sapply()} 
## table()              # Form a table of counts 
## xtabs()              # Form a table of totals 

library(DAAG)      # tinting is from DAAG 
table(Sex=tinting$sex, AgeGroup=tinting$agegp) 

sapply(jobs[, -7], range) 

##                         Utility functions 
ls(pattern="p")      # List object names that include the letter "p" 
ls(pattern="^p")     # List object names that start with "p" 

## ss 1.4.2: Generic functions, and the class of an object 
## ss 1.4.3: User-written functions 
mean.and.sd <- function(x){ 
    av <- mean(x) 
    sdev <- sd(x) 
    c(mean=av, SD=sdev) 
    } 

distance <- c(148,182,173,166,109,141,166) 
mean.and.sd(distance) 

mean.and.sd <- function(x = rnorm(10)){ 
    av <- mean(x) 
    sdev <- sd(x) 
    c(mean=av, SD=sdev) 
    } 

mean.and.sd() 

##                     The structure of functions 
## Footnote Code
## Thus, to return the mean, SD and name of the input vector 
## replace c(mean=av, SD=sdev) by 
## list(mean=av, SD=sdev, dataset = deparse(substitute(x))) 

## ss 1.4.4: If statements 
Carbon <- fossilfuel$carbon 
if (mean(Carbon) > median(Carbon)) print("Mean > Median") else  
    print("Median <= Mean") 

dist <- c(148, 182, 173, 166, 109, 141, 166) 
dist.sort <- if (dist[1] < 150) 
                sort(dist, decreasing=TRUE) else sort(dist) 
dist.sort 

## ss 1.4.5: Selection and matching 
x <- rep(1:5, rep(3,5)) 
x[x %in% c(2,4)] 

match(x, c(2,4), nomatch=0) 

## ss 1.4.6: Functions for working with missing values 
##         Identification of rows that include missing values 
## Which rows have missing values: data frame science (DAAG) 
science[!complete.cases(science), ]   

dim(science) 
Science <- na.omit(science) 
dim(Science) 

## ss 1.4.7: \kern-4pt* Looping 
for (i in 1:3) print(i) 

## Relative population increase in Australian states: 1917-1997 
## Data frame austpop (DAAG) 
relGrowth <- numeric(8)   # numeric(8) creates a numeric vector 
                          # with 8 elements, all set equal to 0 
for (j in seq(from=2, to=9)) { 
    relGrowth[j-1] <- (austpop[9, j]-austpop[1, j])/ 
    austpop[1, j]} 
names(relGrowth) <- names(austpop[c(-1,-10)]) 
  # We have used names() to name the elements of relGrowth 
relGrowth          # Output is with options(digits=3) 


## Sec 1.5: Graphics in R 
demo(graphics) 

## ss 1.5.1: The function {plot( )} and allied functions 
plot(Brainwt ~ Bodywt, data=primates)  # plot(y ~ x) syntax  

with(primates, plot(Bodywt, Brainwt))    # plot(x, y) syntax  

##           Adding points, lines, text and axis annotation 
## Place labels on points 
plot(Brainwt ~ Bodywt, xlim=c(0, 300), data=primates) 
  # Specify xlim so that there is room for the labels 
with(primates,  
     text(Brainwt ~ Bodywt, labels=row.names(primates), pos=4)) 
  # pos=4 places text to the right of the points. Other 
  # possibilities are: 1: below; 2: to the left; 3: above 

## Footnote Code
## Plot Brainwt vs Bodywt, primates data frame 
plot(Brainwt ~ Bodywt, xlim=c(0, 300), ylim=c(0,1500), data=primates) 
yoff <- c(-.125,0,0,.125,0)*par()$cxy[2] 
with(primates, text(x=Bodywt, y=Brainwt+yoff, labels=row.names(primates), pos=4)) 

##                 Fine control -- parameter settings 
oldpar <- par(cex=1.25)    
# Use par(oldpar) to restore previous settings 

## ss 1.5.2: The use of color 
theta <- (1:50)*0.92 
plot(theta, sin(theta), col=1:50, pch=16, cex=4) 
points(theta, cos(theta), col=51:100, pch=15, cex=4) 
palette()            # Names of the colors in the current palette 

plot(theta, sin(theta), col=colors()[1:50], pch=16, cex=4) 
points(theta, cos(theta), col=colors()[51:100], pch=15, cex=4) 

## ss 1.5.3: The importance of aspect ratio 
## Plot sin(theta) vs theta, at regularly spaced values of theta 
## sin() expects angles to be in radians 
 # multiply angles in degrees by pi/180 to get radians 
plot((0:20)*pi/10, sin((0:20)*pi/10)) 
plot((1:50)*0.92, sin((1:50)*0.92)) 

par(mfrow=c(3,1))    # Gives a 3 by 1 layout of plots 
plot((1:50)*0.92, sin((1:50)*0.92)) 
par(mfrow=c(1,1)) 

## ss 1.5.4: Dimensions and other settings for graphics devices 
## ss 1.5.5: The plotting of expressions and mathematical symbols 
symbols(x=1.5, y=0, circles=1.2, xlim=c(0,3), ylim=c(-1.5,1.5), 
        bg="gray", inches=FALSE)  
  # inches=FALSE ensures that radius is in x-axis units 
text(1.5, 0.5, expression("Area" == pi*phantom("'")*italic(r)^2)) 
  # Use '==' to insert '='. 
  # Text or symbols that appear either side of '*' are juxtaposed. 
  # Notice the use of phantom("'") to insert a small space. 

## Footnote Code
## To add the double-headed arrow and associated label, specify: 
arrows(1.5, 0, 2.7, 0, length=.1, code=3)   # code=3: arrows at both ends 
  # length is the length of the arrow head (in inches!) 
text(2.1, -strheight("R"), expression(italic(r) == 1.2)) 

## ss 1.5.6: Identification and location on the figure region 
plot(Brainwt ~ Bodywt, data=primates) 
with(primates, 
     identify(Brainwt ~ Bodywt, labels=row.names(primates), n=2)) 
  # Now click near 2 plotted points 

## ss 1.5.7: Plot methods for objects other than vectors 
## Use plot() with data frame trees (datasets) 
plot(trees)          # Gives a 3 x 3 layout of pairwise 
                     # scatterplots among the three variables 

## ss 1.5.8: Lattice (trellis) Graphics 
##  Lattice graphics versus base graphics -- {xyplot()} versus {plot()} 
## Plot Brainwt vs Bodywt, data frame primates (DAAG) 
plot(Brainwt ~ Bodywt, data=primates)       # base graphics 
 # 'base' graphics use the abilities of the graphics package 
library(lattice) 
xyplot(Brainwt ~ Bodywt, data=primates)     # lattice 

invisible(plot(Brainwt ~ Bodywt, data=primates))     # Graph appears 
invisible(xyplot(Brainwt ~ Bodywt, data=primates))   # No graph 

print(xyplot(ACT ~ year, data=austpop)) 

##           Panels of scatterplots -- the use of xyplot() 
trellis.device(color=FALSE) 
xyplot(ht ~ wt | sport, groups=sex, pch=c(4,1), aspect=1, data=ais, 
       auto.key=list(columns=2), subset=sport%in%c("Row","Swim")) 
dev.off()            # Close device 
trellis.device()     # Start new device, by default with color=TRUE 

##                    Plotting columns in parallel 
xyplot(Prairies+Atlantic ~ Date, outer=TRUE, data=jobs) 

##                     Selected lattice functions 
# dotplot(factor ~ numeric,..)   # 1-dim. Display 
# stripplot(factor ~ numeric,..) # 1-dim. Display 
# barchart(character ~ numeric,..) 
# histogram( ~ numeric,..) 
# densityplot( ~ numeric,..)     # Density plot 
# bwplot(factor ~ numeric,..)    # Box and whisker plot 
# qqmath(factor ~ numeric,..)    # normal probability plots 
# splom( ~ dataframe,..)         # Scatterplot matrix  
# parallel( ~ dataframe,..)      # Parallel coordinate plots 
# cloud(numeric ~ numeric * numeric, ...)      # 3D surface 
# wireframe(numeric ~ numeric * numeric, ...)  # 3D scatterplot 

## ss 1.5.9: Good and bad graphs 
## Footnote Code
## Example of plotting with different shades of gray 
plot(1:4, 1:4, pch=16, col=c("gray20","gray40","gray60","gray80"), cex=3) 

## ss 1.5.10: Further information on graphics 

## Sec 1.6: Additional Points on the Use of R 
##                  *Workspace management strategies 
##                  Forward slashes and backslashes 
##           Setting the number of decimal places in output 
sqrt(10) 
options(digits=2)  # Change until further notice, 
sqrt(10) 

##                       Other option settings 
##                          Cosmetic issues 
##             \kern-4pt*\, Common sources of difficulty 
##                    Variable names in data sets 

## Sec 1.7: Recap 
(x <- 2)     # Equivalent to: x <- 2; print(x) 


## Sec 1.8: Further Reading and Study 
attach(Manitoba.lakes) 
plot(log2(area) ~ elevation, pch=16, xlim=c(170,280)) 
  # NB: Doubling the area increases log2(area) by 1.0 
text(log2(area) ~ elevation,  
     labels=row.names(Manitoba.lakes), pos=4) 
text(log2(area) ~ elevation, labels=area, pos=2) 
title("Manitoba's Largest Lakes") 
detach(Manitoba.lakes) 

rep(c(2,3,5), c(4,3,2)) 

rep(c(2,3,5), 4:2) 

1000*((1+0.075)^5 - 1)  # Interest on $1000, compounded 
                        # annually at 7.5% p.a. for five years 

gender <- factor(c(rep("female", 91), rep("male", 92))) 
table(gender) 
gender <- factor(gender, levels=c("male", "female")) 
table(gender) 
gender <- factor(gender, levels=c("Male", "female"))  
                     # Note the mistake: "Male" should be "male" 
table(gender) 
table(gender, exclude=NULL) 
rm(gender)           # Remove gender 

par(mfrow=c(2,2))    # 2 by 2 layout on the page 
library(MASS)        # Animals is in the MASS package 
plot(brain ~ body, data=Animals) 
plot(sqrt(brain) ~ sqrt(body), data=Animals) 
plot(I(brain^0.1) ~ I(body^0.1), data=Animals) 
  # I() forces its argument to be treated "as is" 
plot(log(brain) ~ log(body), data=Animals) 
par(mfrow=c(1,1))    # Restore to 1 figure per page 

plot(BDI ~ age, data=socsupport) 
plot(BDI ~ unclass(age), data=socsupport) 

gender1 <- with(socsupport, abbreviate(gender, 1)) 
table(gender1)       # Examine the result  
country3 <-  with(socsupport, abbreviate(country, 3)) 
table(country3)      # Examine the result  

num <-  with(socsupport, seq(along=gender))   # Generate row numbers  
lab <- paste(gender1, country3, num, sep=":") 

x <-  c(8, 54, 534, 1630, 6611) 
seq(1, length(x)) 
seq(along=x) 

head(vlt, 4)       # first few rows of vlt 
  # . . . 

vltcv <- stack(vlt[, 1:3]) 
head(vltcv)          # first few rows of vltcv 
table(vltcv$values, vltcv$ind) 
  # More cryptically, table(vltcv) gives the same result.  

oldpar <- par(mfrow=c(2,4)) 
for (i in 2:9){ 
plot(austpop[, 1], log(austpop[, i]), xlab="Year",  
     ylab=names(austpop)[i], pch=16, ylim=c(0,10))} 
par(oldpar)