## Chapter 14: The R System -- Additional Topics
## Sec 14.1: Graphical User Interfaces to R
## Installing the R Commander
## install.packages("Rcmdr", dependencies=TRUE)
## ss 14.1.1: The R Commander's interface -- a guide to getting started
## library(Rcmdr)
## ss 14.1.2: The {rattle} GUI
## library(rattle)
## rattle()
## ss 14.1.3: The Creation of Simple GUIs -- the {fgui} Package
library(fgui)
## Loading required package: tools
library(DAAG); library(lattice)
## Loading required package: lattice
## Create function that does the smoothing
fitsmooth <-
function(form=mdbRain^(1/3) ~ Year, df=bomregions, myspan=0.75)
print(xyplot(form, data=df, type=c("p","smooth"), span=myspan))
## Create function that sets up a GUI interface to fitsmooth()
callsmooth <- function()gui(func=fitsmooth,
argSlider=list(myspan=c(0.1,1.5,0.025)), # start,stop,stepsize
output=NULL, callback=guiExec)
## callsmooth()
## Click on and/or move the slider to display the graph.
## Sec 14.2: Working Directories, Workspaces and the Search List
## ss 14.2.1: *The search path
search()
## [1] ".GlobalEnv" "package:DAAG" "package:lattice"
## [4] "package:fgui" "package:tools" "package:stats"
## [7] "package:graphics" "package:grDevices" "package:utils"
## [10] "package:datasets" "package:methods" "Autoloads"
## [13] "package:base"
## ss 14.2.2: Workspace management
xy <- matrix(rnorm(60000), nrow=600)
xy.rowrange <- apply(xy, 1, range)
save(xy, xy.rowrange, file="xy.RData")
rm(xy, xy.rowrange)
rm(list = ls(all=TRUE))
## Changing the Working Directory and/or Workspace
## ss 14.2.3: Utility functions
# dir() # List files in the working directory
# file.choose() # Choose a file interactively
# sessionInfo() # Print version numbers for R and for
# # attached packages
# system.file() # Show path, by default to 'package="base"'
# # Try, e.g.: system.file(package="DAAG")
# R.home() # Give the path to the R home directory
# .Library # Path to the default library.
# Sys.getenv() # Show settings of environment variables
# object.size() # Show size of R object
dir()
## [1] "advanced.html" "advanced.R" "advanced.spin.R"
## [4] "advanced.spin.Rmd" "bostonc.txt" "cps3.RData"
## [7] "eda.html" "eda.R" "exploit.html"
## [10] "exploit.R" "fossilcopy.pdf" "fossilfuel.pdf"
## [13] "fuel.csv" "fuel.txt" "fuel2.txt"
## [16] "glm.html" "glm.R" "graphics.html"
## [19] "graphics.R" "hillracesDB" "inference.html"
## [22] "inference.R" "intro.html" "intro.R"
## [25] "mlm-new.html" "mlm-new.R" "mlm.html"
## [28] "mlm.R" "models.html" "models.R"
## [31] "multilr.html" "multilr.R" "mva.html"
## [34] "mva.R" "oneBadRow.txt" "reg1.html"
## [37] "reg1.R" "regWscores.html" "regWscores.R"
## [40] "scan-demo.txt" "treebased.html" "treebased.R"
## [43] "ts.html" "ts.R" "xy.RData"
Sys.getenv("R_HOME")
## [1] "/Library/Frameworks/R.framework/Resources"
normalizePath(Sys.getenv("R_HOME"))
## [1] "/Library/Frameworks/R.framework/Versions/3.1/Resources"
system.file("misc/ViewTemps.RData", package="DAAG")
## [1] "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/DAAG/misc/ViewTemps.RData"
## Sec 14.3: R System Configuration
## ss 14.3.1: The R Windows installation directory tree
## ss 14.3.2: The Library directories
.libPaths("D:/R-2.8.1/library")
.First <- function().libPaths("D:/R-2.8.1/library")
## ss 14.3.3: The startup mechanism
## Sec 14.4: Data Input and Output
library(DAAG)
datafile("oneBadRow")
##
## Data written to file: oneBadRow.txt
datafile("scan-demo")
##
## Data written to file: scan-demo.txt
## ss 14.4.1: Input of data
## Issues for data input with {read.table()} and variants
## read.table("quote.txt")
## read.table("quote.txt", quote="'")
## Tracking errors in input data
nfields <- count.fields("oneBadRow.txt")
nfields # Number of fields, for each row
## [1] 3 4 3 3 3 3 3
## Now identify rows where the number of fields seems anomalous
(1:length(nfields))[nfields == 4]
## [1] 2
## One character string for each input row -- {readLines()}
readLines("oneBadRow.txt", n=3) # First 3 lines
## [1] "10 9 17 # First of 7 lines" "11 13 1 6"
## [3] "9 14 16"
readLines("oneBadRow.txt", n=-1) # All lines
## [1] "10 9 17 # First of 7 lines" "11 13 1 6"
## [3] "9 14 16" "12 15 14"
## [5] "8 15 15" "9 13 12"
## [7] "7 14 18"
## Input of fixed format data
## Input of large rectangular files
scan("oneBadRow.txt", skip = 1, quiet= TRUE)
## [1] 11 13 1 6 9 14 16 12 15 14 8 15 15 9 13 12 7 14 18
data.frame(scan("scan-demo.txt", skip = 1, quiet= TRUE,
what=list(ab="", col2=1, col3=1)))
## ab col2 col3
## 1 a 2 3
## 2 b 11 13
## 3 c 9 7
## Example -- input of the Boston housing data
## readLines("bostonc.txt", n=11)[10:11]
## strsplit(readLines("bostonc.txt", n=11)[10:11], split="\t")
datafile('bostonc')
##
## Data written to file: bostonc.txt
boston <- scan("bostonc.txt", n=-1, sep="\t", skip=10,
what=c(list(1,""), as.list(rep(1,19))))
colnams <- scan("bostonc.txt", skip=9, n=21, what="")
boston <- data.frame(boston)
names(boston) <- colnams
boston <- read.table("bostonc.txt", sep="\t", skip=9,
comment.char="", header=TRUE)
## ss 14.4.2: Data output
## Output of data frames
write.table(fossilfuel, file="fuel.txt")
## Redirection of screen output to a file
fossilfuel # Below, this will be written to the file
## year carbon
## 1 1800 8
## 2 1850 54
## 3 1900 534
## 4 1950 1630
## 5 2000 6611
## sink("fuel2.txt")
## fossilfuel # NB: No output on screen
## sink()
## Output to a file using {cat()}
## ss 14.4.3: Database connections
library(DAAG); library(DAAG)
library(RSQLite)
## Loading required package: DBI
driveLite <- dbDriver("SQLite")
con <- dbConnect(driveLite, dbname="hillracesDB")
dbWriteTable(con, "hills2000", hills2000, overwrite=TRUE)
## [1] TRUE
dbWriteTable(con, "nihills", nihills, overwrite=TRUE)
## [1] TRUE
dbListTables(con)
## [1] "hills2000" "nihills"
## Obtain rows 11 to 20 from the newly created nihills table
dbGetQuery(con, "select * from nihills limit 10 offset 10")
## row_names dist climb time timef
## 1 Monument Race 4.0 1000 0.4717 0.5947
## 2 Loughshannagh Horseshoe 4.3 1700 0.6469 0.8822
## 3 Rocky 4.0 1300 0.5231 0.6653
## 4 Meelbeg Meelmore 3.5 1800 0.4544 0.6086
## 5 Donard Forest 4.5 1400 0.5186 0.6433
## 6 Slieve Donard 5.5 2790 0.9483 1.2086
## 7 Flagstaff to Carling 11.0 3000 1.4569 2.0344
## 8 Slieve Bearnagh 4.0 2690 0.6878 0.7992
## 9 Seven Sevens 18.9 8775 3.9028 5.9856
## 10 Lurig Challenge 4.0 1000 0.4347 0.5756
dbDisconnect(con)
## [1] TRUE
## Sec 14.5: Functions and operators -- Some Further Details
## Issues for the writing and use of functions
## ss 14.5.1: Function arguments
## The {args()} function
args(write.table) # Version 2.10.0 of R
## function (x, file = "", append = FALSE, quote = TRUE, sep = " ",
## eol = "\n", na = "NA", dec = ".", row.names = TRUE, col.names = TRUE,
## qmethod = c("escape", "double"), fileEncoding = "")
## NULL
## The {...} argument
x <- c(1,5,7); z <- c(4,9,10,NA); u <- 1:10
# Now do calculations that use x, z and u
rm(x, z, u)
## ss 14.5.2: Character string and vector functions
substring("abracadabra",3, 8) # Extract characters 3 to 8
## [1] "racada"
nchar("abracadabra") # Count the number of characters
## [1] 11
strsplit("abracadabra", "r") # Split wherever "r" appears
## [[1]]
## [1] "ab" "acadab" "a"
strsplit("abcd", split="") # Split into separate characters
## [[1]]
## [1] "a" "b" "c" "d"
paste("ab","c","d", sep="") # Join together
## [1] "abcd"
paste(c("a","b","c"), collapse="") # Join vector elements
## [1] "abc"
## ss 14.5.3: Anonymous functions
ssfun <- function(x)sum(x^2)
sapply(elastic1, ssfun) # elastic1 is from the DAAG package
## stretch distance
## 16240 275748
sapply(elastic1, function(x)sum(x^2))
## stretch distance
## 16240 275748
growthfun <- function(x)(x[9] - x[1])/x[1]
sapply(austpop[, -c(1,10)], growthfun)
## NSW Vic Qld SA WA Tas NT ACT
## 2.295 2.268 3.980 2.364 4.876 1.456 36.400 102.333
sapply(austpop[, -c(1,10)], function(x)(x[9] - x[1])/x[1])
## NSW Vic Qld SA WA Tas NT ACT
## 2.295 2.268 3.980 2.364 4.876 1.456 36.400 102.333
## ss 14.5.4: Functions for working with dates (and times)
# Electricity Billing Dates
dd <- as.Date(c("2003-08-24","2003-11-23","2004-02-22","2004-05-03"))
diff(dd)
## Time differences in days
## [1] 91 91 71
as.Date("1/1/1960", format="%d/%m/%Y")
## [1] "1960-01-01"
as.Date("1:12:1960",format="%d:%m:%Y")
## [1] "1960-12-01"
as.Date("1960-12-1") - as.Date("1960-1-1")
## Time difference of 335 days
as.Date("31/12/1960","%d/%m/%Y")
## [1] "1960-12-31"
julian(as.Date("1/1/1970","%d/%m/%Y"))
## [1] 0
## attr(,"origin")
## [1] "1970-01-01"
julian(as.Date("1/1/2000","%d/%m/%Y"))
## [1] 10957
## attr(,"origin")
## [1] "1970-01-01"
dec1 <- as.Date("2004-12-1")
format(dec1, format="%b %d %Y")
## [1] "Dec 01 2004"
format(dec1, format="%a %b %d %Y")
## [1] "Wed Dec 01 2004"
## Labeling of graph: data frame jobs (DAAG)
startofmonth <- seq(from=as.Date("1Jan1995", format="%d%b%Y"),
by="1 month", length=24)
atdates <- seq(from=as.Date("1Jan1995", format="%d%b%Y"),
by="6 month", length=4)
datelabs <- format(atdates, "%b%y")
xyplot(BC+Alberta ~ startofmonth, data=jobs, outer=TRUE,
scales=list(x=list(at=atdates, labels=datelabs)),
auto.key=list(columns=2, between=1))

dd <- as.Date(c("2003-08-24","2003-11-23","2004-02-22","2004-05-03"))
weekdays(dd)
## [1] "Sunday" "Sunday" "Sunday" "Monday"
months(dd)
## [1] "August" "November" "February" "May"
quarters(dd)
## [1] "Q3" "Q4" "Q1" "Q2"
## ss 14.5.5: Creating Groups
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:DAAG':
##
## hills
catBP <- cut(Pima.tr2$bp, breaks=4)
table(catBP, exclude=NULL)
## catBP
## (37.9,57] (57,76] (76,95] (95,114] <NA>
## 22 170 87 8 13
## ss 14.5.6: Logical operators
c(TRUE, TRUE, FALSE) & c(FALSE, TRUE, FALSE)
## [1] FALSE TRUE FALSE
c(TRUE, TRUE, FALSE) && c(FALSE, TRUE, FALSE)
## [1] FALSE
## A vector form of {if} statement
## Operators are functions
"+"(2, 3)
## [1] 5
## Sec 14.6: Factors
fac <- factor(c("c", "b", "a"))
for (i in fac) print(i)
## [1] "c"
## [1] "b"
## [1] "a"
## Ordered factors
stress.level <- rep(c("low","medium","high"), 2)
ordf.stress <- ordered(stress.level, levels=c("low", "medium",
"high"))
ordf.stress
## [1] low medium high low medium high
## Levels: low < medium < high
ordf.stress < "medium"
## [1] TRUE FALSE FALSE TRUE FALSE FALSE
ordf.stress >= "medium"
## [1] FALSE TRUE TRUE FALSE TRUE TRUE
## Factor contrasts
contr.treatment(3)
## 2 3
## 1 0 0
## 2 1 0
## 3 0 1
contr.sum(3)
## [,1] [,2]
## 1 1 0
## 2 0 1
## 3 -1 -1
cities <- factor(c("Melbourne","Sydney","Adelaide"))
contr.sum(cities)
## [,1] [,2]
## Melbourne 1 0
## Sydney 0 1
## Adelaide -1 -1
## *Tests for main effects in the presence of interactions?
## Sec 14.7: Missing Values
x <- c(1, 6, 2, NA)
is.na(x) # TRUE for NAs, otherwise FALSE
## [1] FALSE FALSE FALSE TRUE
x == NA # All elements are set to NA
## [1] NA NA NA NA
NA == NA
## [1] NA
x <- c(1, NA, 6, 2, 10)
x > 4 # The second element will be NA
## [1] FALSE NA TRUE FALSE TRUE
x[x>4] # NB: This generates a vector of length 3
## [1] NA 6 10
## x[x > 4] <- c(101, 102)
x[!is.na(x) & x > 4] <- c(101, 102)
x
## [1] 1 NA 101 2 102
## Counting and identifying { NA}s -- the use of {table()}
with(nswdemo, table(trt, re74>0, useNA="ifany"))
##
## trt FALSE TRUE <NA>
## 0 195 65 165
## 1 131 54 112
fac_re74 <- with(nswdemo, factor(re74>0, exclude=NULL))
levels(fac_re74)
## [1] "FALSE" "TRUE" NA
levels(fac_re74) <- c("0", "gt0", "<NA>")
with(nswdemo, table(trt, fac_re74))
## fac_re74
## trt 0 gt0 <NA>
## 0 195 65 165
## 1 131 54 112
## The removal of NAs
## {NA}s in modeling functions
options()$na.action # Version 2.10.0, following startup
## [1] "na.omit"
## Sorting and ordering, where there are NAs
x <- c(1, 20, 2, NA, 22)
order(x) # By default, na.last=TRUE
## [1] 1 3 2 5 4
x[order(x)]
## [1] 1 2 20 22 NA
sort(x) # By default na.last=NA
## [1] 1 2 20 22
sort(x, na.last=TRUE)
## [1] 1 2 20 22 NA
## Sec 14.8: * Matrices and Arrays
xx <- matrix(1:6, ncol=3) # Equivalently, enter
xx
## [,1] [,2] [,3]
## [1,] 1 3 5
## [2,] 2 4 6
dim(xx)
## [1] 2 3
## Use as.vector()
x <- as.vector(xx)
## Alternatively, directly remove the dimension attribute
x <- xx
dim(x) <- NULL
## The extraction of submatrices
x34 <- matrix(1:12, ncol=4)
x34
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
x34[2:3, c(1,4)] # Extract rows 2 & 3 and columns 1 & 4
## [,1] [,2]
## [1,] 2 11
## [2,] 3 12
x34[-2, ] # Extract all rows except the second
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 3 6 9 12
x34[-2, -3] # Omit row 2 and column 3
## [,1] [,2] [,3]
## [1,] 1 4 10
## [2,] 3 6 12
x34[2, ] # The dimension attribute is dropped
## [1] 2 5 8 11
x34[2, , drop=FALSE] # Retain the dimension attribute
## [,1] [,2] [,3] [,4]
## [1,] 2 5 8 11
## Conversion of data frames and tables into matrices
## ss 14.8.1: Matrix arithmetic
## Set up example matrices G, H and B
G <- matrix(1:12, nrow=4); H <- matrix(112:101, nrow=4); B <- 1:3
G + H # Element-wise addition (X & Y both n by m)
## [,1] [,2] [,3]
## [1,] 113 113 113
## [2,] 113 113 113
## [3,] 113 113 113
## [4,] 113 113 113
G * H # Element-wise multiplication
## [,1] [,2] [,3]
## [1,] 112 540 936
## [2,] 222 642 1030
## [3,] 330 742 1122
## [4,] 436 840 1212
G %*% B # Matrix multiplication (X is n by k; B is k by m)
## [,1]
## [1,] 38
## [2,] 44
## [3,] 50
## [4,] 56
## Set up example matrices X (square, full rank) and Y
X <- matrix(c(1,1,1, -1,0,1, 2,4,2), nrow=3)
Y <- matrix(1:6, nrow=3)
solve(X, Y) # Solve XB = Y (X must be square)
## [,1] [,2]
## [1,] 2 5
## [2,] 1 1
## [3,] 0 0
## Computational Efficiency
xy <- matrix(rnorm(1500000),ncol=50)
dim(xy)
## [1] 30000 50
system.time(xy+1) # user and system are processor times
## user system elapsed
## 0.002 0.001 0.002
xy.df <- data.frame(xy)
system.time(xy.df+1)
## user system elapsed
## 0.092 0.005 0.098
## ss 14.8.2: Outer products
outer(1:4, 1:10)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 1 2 3 4 5 6 7 8 9 10
## [2,] 2 4 6 8 10 12 14 16 18 20
## [3,] 3 6 9 12 15 18 21 24 27 30
## [4,] 4 8 12 16 20 24 28 32 36 40
rbgshades <- outer(c("red","blue","green"), c("",paste(1:4)),
function(x,y)paste(x,y, sep=""))
rbgshades # Display the matrix
## [,1] [,2] [,3] [,4] [,5]
## [1,] "red" "red1" "red2" "red3" "red4"
## [2,] "blue" "blue1" "blue2" "blue3" "blue4"
## [3,] "green" "green1" "green2" "green3" "green4"
plot(rep(0:4, rep(3,5)), rep(1:3, 5), col=rbgshades, pch=15, cex=8)

## ss 14.8.3: Arrays
x <- 1:24
dim(x) <- c(2, 12); print(x)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 1 3 5 7 9 11 13 15 17 19 21 23
## [2,] 2 4 6 8 10 12 14 16 18 20 22 24
dim(x) <- c(3, 4, 2); print(x)
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 13 16 19 22
## [2,] 14 17 20 23
## [3,] 15 18 21 24
## Sec 14.9: Manipulations with Lists, Data Frames, Matrices and Time Series
## ss 14.9.1: Lists -- an extension of the notion of ``vector''
zz <- list("Shireen", "Peter", c("Luke","Amelia","Ted"), c(8,4,0))
zz[c(3,1)]
## [[1]]
## [1] "Luke" "Amelia" "Ted"
##
## [[2]]
## [1] "Shireen"
zz[c(3,1)]
## [[1]]
## [1] "Luke" "Amelia" "Ted"
##
## [[2]]
## [1] "Shireen"
## Return list whose only element is the vector c("Luke","Amelia")
zz[3]
## [[1]]
## [1] "Luke" "Amelia" "Ted"
## Return the vector c("Luke","Amelia","Ted")
zz[[3]]
## [1] "Luke" "Amelia" "Ted"
duo <- list(family="Braun", names=c("Matthew","Phillip"),
ages=c(14,9))
duo[["names"]] # Alternatively, specify duo$names
## [1] "Matthew" "Phillip"
## The dual identity of data frames
xyz <- data.frame(x=1:4, y=11:14, z=I(letters[1:4]))
xyz[1, ] # Returns a data frame, i.e., a list)
## x y z
## 1 1 11 a
xyz[1, , drop=TRUE]
## $x
## [1] 1
##
## $y
## [1] 11
##
## $z
## [1] "a"
unlist(xyz[1, ]) # Returns, here, a vector of atomic mode
## x y z
## "1" "11" "a"
unlist(xyz[1, , drop=TRUE])
## x y z
## "1" "11" "a"
## ss 14.9.2: Changing the shape of data frames (or matrices)
## Melting and casting
library(reshape)
Jobs <- melt(jobs, measure.vars=names(jobs)[1:6],
variable_name="Region", id.vars="Date")
head(Jobs, 3)
## Date Region value
## 1 95.00 BC 1752
## 2 95.08 BC 1737
## 3 95.17 BC 1765
jobsBack <- cast(Jobs, Date ~ Region)
head(jobsBack,3)
## Date BC Alberta Prairies Ontario Quebec Atlantic
## 1 95.00 1752 1366 982 5239 3196 947
## 2 95.08 1737 1369 981 5233 3205 946
## 3 95.17 1765 1380 984 5212 3191 954
## ss 14.9.3: * Merging data frames -- {merge()}
new.Cars93 <- merge(x=Cars93, y=Cars93.summary[, "abbrev", drop=F],
by.x="Type", by.y="row.names")
## ss 14.9.4: Joining data frames, matrices and vectors -- {cbind()}
## Conversion of tables and arrays into data frames
## ss 14.9.5: The {apply} family of functions
## The {tapply()} function
## Compare tapply() with aggregate(): data frame kiwishade (DAAG)
with(kiwishade, tapply(yield, INDEX=list(block, shade), FUN=mean))
## none Aug2Dec Dec2Feb Feb2May
## east 99.03 105.6 88.92 95.88
## north 104.03 103.6 93.70 92.03
## west 97.56 100.5 87.15 90.41
with(kiwishade, aggregate(yield, by=list(block, shade), FUN=mean))
## Group.1 Group.2 x
## 1 east none 99.03
## 2 north none 104.03
## 3 west none 97.56
## 4 east Aug2Dec 105.56
## 5 north Aug2Dec 103.59
## 6 west Aug2Dec 100.55
## 7 east Dec2Feb 88.92
## 8 north Dec2Feb 93.70
## 9 west Dec2Feb 87.15
## 10 east Feb2May 95.88
## 11 north Feb2May 92.03
## 12 west Feb2May 90.41
## The {apply()} function
## The functions {lapply()} and {sapply()}
## Uses data frame rainforest (DAAG)
sapply(rainforest[, -7], range, na.rm=TRUE)
## dbh wood bark root rootsk branch
## [1,] 4 3 8 2 0.3 4
## [2,] 56 1530 105 135 24.0 120
## ss 14.9.6: Splitting vectors and data frames into lists -- {split()}
with(Cars93.summary, split(No.of.cars, Max.passengers))
## $`4`
## [1] 14
##
## $`5`
## [1] 21
##
## $`6`
## [1] 16 11 22
##
## $`8`
## [1] 9
## Split dataframe by Max.passengers (2nd column)
split(Cars93.summary[, -2], Cars93.summary[, 2])
## $`4`
## Min.passengers No.of.cars abbrev
## Sporty 2 14 Sp
##
## $`5`
## Min.passengers No.of.cars abbrev
## Small 4 21 Sm
##
## $`6`
## Min.passengers No.of.cars abbrev
## Compact 4 16 C
## Large 6 11 L
## Midsize 4 22 M
##
## $`8`
## Min.passengers No.of.cars abbrev
## Van 7 9 V
## ss 14.9.7: Multivariate time series
jobts <- ts(jobs[,1:6], start=1995, frequency=12)
colnames(jobts)
## [1] "BC" "Alberta" "Prairies" "Ontario" "Quebec" "Atlantic"
## Subseries through to the third month of 1995
window(jobts, end=1995+2/12)
## BC Alberta Prairies Ontario Quebec Atlantic
## Jan 1995 1752 1366 982 5239 3196 947
## Feb 1995 1737 1369 981 5233 3205 946
## Mar 1995 1765 1380 984 5212 3191 954
# Rows are 1995+0/12, 1990+1/12, 1990+2/12
plot(jobts, plot.type="single") # Use one panel for all

plot(jobts, plot.type="multiple") # Separate panels.

## Alternative code for Figure 2.9A; plot as time series
plot(jobts, plot.type="single", xlim=c(1995,1997.2), lty=1:6, log="y",
xaxt="n", xlab="", ylab="Number of Jobs")
## Move label positions so that labels do not overlap
ylast <- bounce(window(jobts, 1996+11/12), d=strheight("O"), log=TRUE)
# bounce() is from DAAG
text(rep(1996+11/12,6), ylast, colnames(ylast), pos=4)
datlab <- format(seq(from=as.Date("1Jan1995", format="%d%b%Y"), by="3 month",
length=8), "%b%Y")
axis(1, at=seq(from=1995, by=0.25, length=8), datlab)

## Sec 14.10: Classes and Methods
## S3 methods and classes
print
## function (x, ...)
## UseMethod("print")
## <bytecode: 0x10094e078>
## <environment: namespace:base>
## ss 14.10.1: Printing and summarizing model objects
elastic.lm <- lm(distance ~ stretch, data=elasticband)
elastic.sum <- summary(elastic.lm)
## ss 14.10.2: Extracting information from model objects
names(elastic.lm)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
elastic.lm$coefficients
## (Intercept) stretch
## -63.571 4.554
elastic.lm[["coefficients"]]
## (Intercept) stretch
## -63.571 4.554
elastic.lm[[1]]
## (Intercept) stretch
## -63.571 4.554
coef(elastic.lm)
## (Intercept) stretch
## -63.571 4.554
x <- 1:5
class(x) <- "lm" # Inappropriate assignment of class
## print(x)
## ss 14.10.3: S4 classes and methods
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following object is masked from 'package:reshape':
##
## expand
##
## Loading required package: Rcpp
hp.lmList <- lmList(o2 ~ wattsPerKg | id, data=humanpower1)
slotNames(hp.lmList)
## [1] ".Data" "call" "pool"
slot(hp.lmList, "call")
## lmList(formula = o2 ~ wattsPerKg | id, data = humanpower1)
hp.lmList@call
## lmList(formula = o2 ~ wattsPerKg | id, data = humanpower1)
## Sec 14.11: Manipulation of Language Constructs
## ss 14.11.1: Model and graphics formulae
plot.mtcars <- function(xvar="disp", yvar="hp"){
mt.form <- paste(yvar, "~", xvar)
plot(formula(mt.form), data=mtcars)
}
## Plot using data frame mtcars (datasets)
names(mtcars)
## [1] "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am" "gear"
## [11] "carb"
plot.mtcars()

plot.mtcars(xvar="disp", yvar="mpg")

## Extraction of Variable Names from Formula Objects
all.vars(mpg ~ disp)
## [1] "mpg" "disp"
plot.mtcars <- function(form = mpg~disp){
## Include information that allows a meaningful label
mtcars.info <- c(mpg= "Miles/(US) gallon",
cyl= "Number of cylinders",
disp= "Displacement (cu.in.)",
hp= "Gross horsepower",
drat= "Rear axle ratio",
wt= "Weight (lb/1000)",
qsec= "1/4 mile time",
vs= "V/S",
am= "Transmission (0 = automatic, 1 = manual)",
gear= "Number of forward gears",
carb= "Number of carburettors")
xlab <- mtcars.info[all.vars(form)[1]]
ylab <- mtcars.info[all.vars(form)[2]]
plot(form, xlab=xlab, ylab=ylab, data=mtcars)
}
## ss 14.11.2: The use of a list to pass arguments
mean(rnorm(20))
## [1] 0.2687
do.call("mean", args=list(x=rnorm(20)))
## [1] 0.1845
simulate.distribution <-
function(distn="weibull", params=list(n=10)){
## Simulates one of: weibull, gaussian, logistic
if(distn=="weibull" & !("shape" %in% names(params)))
params <- c(shape=1, params)
## weibull requires a default shape argument.
## =====================================================
## Choose the function that will generate the random sample
rfun <- switch(distn,
weibull = rweibull,
normal = rnorm,
logistic= rlogis)
## Call rfun(). Use of do.call() makes it possible to give
## the argument list as a list of named values.
## Pass shape argument (or NULL), plus n = # of numbers
do.call("rfun", args=params)
}
simulate.distribution()
## [1] 0.7738 0.6918 0.5466 1.5960 0.1976 1.4604 1.4008 0.6528 2.8480 0.2019
plot(density(simulate.distribution("normal", params=list(n=100))))

plot(density(simulate.distribution("weibull",
params=list(n=100, scale=0.5))))

mean.call <- call("mean", x=rnorm(5))
eval(mean.call)
## [1] 0.3112
eval(mean.call)
## [1] 0.3112
mean.call
## mean(x = c(1.42770546085013, -0.200464045575479, 1.27272152961708,
## -0.660698809687278, -0.283365884082554))
## ss 14.11.3: Expressions
local(a+b*x+c*x^2, envir=list(x=1:4, a=3, b=5, c=1))
## [1] 9 17 27 39
## ss 14.11.4: Environments
test <- function()as.character(sys.call())
test()
## [1] "test"
newtest <- test # Create a copy with a different name
newtest()
## [1] "newtest"
## Automatic naming of a file that holds function output
hcopy <-
function(width=2.25, height=2.25, pointsize=8){
funtxt <- sys.call(1)
fnam <- paste(funtxt, ".pdf", sep="")
print(paste("Output is to the file '", fnam, "'", sep=""))
pdf(file=fnam, width=width, height=height, pointsize=
pointsize)
}
fig1 <- function(){
hcopy() # Call with default arguments
curve(sin, -pi, 2*pi)
dev.off()
}
## ss 14.11.5: Function environments, and lazy evaluation
## Lazy evaluation
lazyfoo <- function(x=4, y = x^2)y
lazyfoo()
## [1] 16
lazyfoo(x=3)
## [1] 9
# lazyfoo(y=x^3) # We specify a value for y in the function call
# x <- 9 # Now, in the parent environment, x=9
# lazyfoo(y=x^3)
## Example -- a function that identifies objects added during asession
dsetnames <- objects()
additions <- function(objnames = dsetnames){
newnames <- objects(envir=.GlobalEnv)
existing <- newnames %in% objnames
newnames[!existing]
}
additions(dsetnames)
## [1] "additions" "dsetnames"
## Sec 14.12: *Creation of R Packages
system.file("misc/ViewTemps.RData", package="DAAG")
## [1] "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/DAAG/misc/ViewTemps.RData"
## package.skeleton(name = "ViewTemps")
## Namespaces
## Sec 14.13: Document Preparation --- {Sweave()} and {xtable()}
## Sweave("sec1-1.Rnw", keep.source=TRUE)
# actually, Sweave("sec1-1") is sufficient
# The argument keep.source=TRUE preserves the code layout
# and ensures that comments are retained.
R.home(component="share/texmf/Sweave.sty")
## [1] "/Library/Frameworks/R.framework/Resources/share/texmf/Sweave.sty"
## Sec 14.14: Further Reading
citation()
##
## To cite R in publications use:
##
## R Core Team (2014). R: A language and environment for
## statistical computing. R Foundation for Statistical Computing,
## Vienna, Austria. URL http://www.R-project.org/.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {R: A Language and Environment for Statistical Computing},
## author = {{R Core Team}},
## organization = {R Foundation for Statistical Computing},
## address = {Vienna, Austria},
## year = {2014},
## url = {http://www.R-project.org/},
## }
##
## We have invested a lot of time and effort in creating R, please
## cite it when using it for data analysis. See also
## 'citation("pkgname")' for citing R packages.
citation("DAAG")
##
## To cite package 'DAAG' in publications use:
##
## John H. Maindonald and W. John Braun (2014). DAAG: Data Analysis
## And Graphics data and functions. R package version 1.20.
## http://CRAN.R-project.org/package=DAAG
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {DAAG: Data Analysis And Graphics data and functions},
## author = {John H. Maindonald and W. John Braun},
## year = {2014},
## note = {R package version 1.20},
## url = {http://CRAN.R-project.org/package=DAAG},
## }
##
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
## Vignettes
vignette() # All vignettes in all installed packages
## vignette(package="grid")
## vignette("viewports") # Equivalent to vignette(topic="viewports")
## Use of split() and sapply(): data frame science (DAAG)
with(science, sapply(split(school, PrivPub),
function(x)length(unique(x))))
## private public
## 12 29
### Thanks to Markus Hegland (ANU), who wrote the initial version
##1 Generate the data
cat("generate data \n")
## generate data
n <- 800 # length of noise vector
m <- 100 # length of signal vector
xsignal <- runif(m)
sig <- 0.01
enoise <- rnorm(m)*sig
ysignal <- xsignal**2+enoise
maxys <- max(ysignal)
minys <- min(ysignal)
x <- c(runif(n), xsignal)
y <- c(runif(n)*(maxys-minys)+minys, ysignal)
# random permutation of the data vectors
iperm <- sample(seq(x))
x <- x[iperm]
y <- y[iperm]
# normalize the data, i.e., scale x & y values to
# lie between 0 & 1
xn <- (x - min(x))/(max(x) - min(x))
yn <- (y - min(y))/(max(y) - min(y))
##1 End
##2 determine number of neighbors within
# a distance <= h = 1/sqrt(length(xn))
nx <- length(xn)
# determine distance matrix
d <- sqrt( (matrix(xn, nx, nx) - t(matrix(xn, nx, nx)) )**2 +
(matrix(yn, nx, nx) - t(matrix(yn, nx, nx)) )**2 )
h <- 1/sqrt(nx)
nnear <- apply(d <= h, 1, sum)
##2 End
##3 Plot data, with reconstructed signal overlaid.
cat("produce plots \n")
## produce plots
# identify the points which have many such neighbors
ns <- 8
plot(x,y)
points(x[nnear > ns], y[nnear > ns], col="red", pch=16)

##3 End
n <- 10000; system.time(sd(rnorm(n)))
## user system elapsed
## 0.001 0.000 0.001
library(plyr)
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:reshape':
##
## rename, round_any
##
## The following object is masked from 'package:DAAG':
##
## ozone
aaply(.data=UCBAdmissions, .margins=1:2, .fun=sum)
## Gender
## Admit Male Female
## Admitted 1198 557
## Rejected 1493 1278
adply(.data=UCBAdmissions, .margins=1:3, .fun=identity)
## Admit Gender Dept V1
## 1 Admitted Male A 512
## 2 Rejected Male A 313
## 3 Admitted Female A 89
## 4 Rejected Female A 19
## 5 Admitted Male B 353
## 6 Rejected Male B 207
## 7 Admitted Female B 17
## 8 Rejected Female B 8
## 9 Admitted Male C 120
## 10 Rejected Male C 205
## 11 Admitted Female C 202
## 12 Rejected Female C 391
## 13 Admitted Male D 138
## 14 Rejected Male D 279
## 15 Admitted Female D 131
## 16 Rejected Female D 244
## 17 Admitted Male E 53
## 18 Rejected Male E 138
## 19 Admitted Female E 94
## 20 Rejected Female E 299
## 21 Admitted Male F 22
## 22 Rejected Male F 351
## 23 Admitted Female F 24
## 24 Rejected Female F 317
library(DAAG)
daply(.data=tinting, .variables=c("sex","agegp"), .fun=length)
## agegp
## sex younger older
## f 9 9
## m 9 9
## References
## References for further reading
## References
## References for further reading