advanced.R

## Chapter 14: The R System -- Additional Topics 

## Sec 14.1: Graphical User Interfaces to R 
##                    Installing the R Commander  
## install.packages("Rcmdr", dependencies=TRUE) 

## ss 14.1.1: The R Commander's interface -- a guide to getting started 
## library(Rcmdr) 

## ss 14.1.2: The {rattle} GUI 
## library(rattle) 
## rattle() 

## ss 14.1.3: The Creation of Simple GUIs -- the {fgui} Package 
library(fgui)

## Loading required package: tools

library(DAAG); library(lattice)

## Loading required package: lattice

## Create function that does the smoothing 
fitsmooth <- 
  function(form=mdbRain^(1/3) ~ Year, df=bomregions, myspan=0.75) 
    print(xyplot(form, data=df, type=c("p","smooth"), span=myspan)) 
## Create function that sets up a GUI interface to fitsmooth() 
callsmooth <- function()gui(func=fitsmooth,  
    argSlider=list(myspan=c(0.1,1.5,0.025)),  # start,stop,stepsize 
    output=NULL, callback=guiExec)   
## callsmooth() 
## Click on and/or move the slider to display the graph. 


## Sec 14.2: Working Directories, Workspaces and the Search List 
## ss 14.2.1: *The search path 
search()

##  [1] ".GlobalEnv"        "package:DAAG"      "package:lattice"  
##  [4] "package:fgui"      "package:tools"     "package:stats"    
##  [7] "package:graphics"  "package:grDevices" "package:utils"    
## [10] "package:datasets"  "package:methods"   "Autoloads"        
## [13] "package:base"

## ss 14.2.2: Workspace management 
xy <- matrix(rnorm(60000), nrow=600) 
xy.rowrange <- apply(xy, 1, range) 
save(xy, xy.rowrange, file="xy.RData") 
rm(xy, xy.rowrange) 

rm(list = ls(all=TRUE)) 

##          Changing the Working Directory and/or Workspace 
## ss 14.2.3: Utility functions 
# dir()                # List files in the working directory 
# file.choose()        # Choose a file interactively 
# sessionInfo()        # Print version numbers for R and for 
#                      # attached packages 
# system.file()        # Show path, by default to 'package="base"' 
#                      # Try, e.g.: system.file(package="DAAG") 
# R.home()             # Give the path to the R home directory 
# .Library             # Path to the default library. 
# Sys.getenv()         # Show settings of environment variables 
# object.size()        # Show size of R object 

dir()

##  [1] "advanced.html"     "advanced.R"        "advanced.spin.R"  
##  [4] "advanced.spin.Rmd" "bostonc.txt"       "cps3.RData"       
##  [7] "eda.html"          "eda.R"             "exploit.html"     
## [10] "exploit.R"         "fossilcopy.pdf"    "fossilfuel.pdf"   
## [13] "fuel.csv"          "fuel.txt"          "fuel2.txt"        
## [16] "glm.html"          "glm.R"             "graphics.html"    
## [19] "graphics.R"        "hillracesDB"       "inference.html"   
## [22] "inference.R"       "intro.html"        "intro.R"          
## [25] "mlm-new.html"      "mlm-new.R"         "mlm.html"         
## [28] "mlm.R"             "models.html"       "models.R"         
## [31] "multilr.html"      "multilr.R"         "mva.html"         
## [34] "mva.R"             "oneBadRow.txt"     "reg1.html"        
## [37] "reg1.R"            "regWscores.html"   "regWscores.R"     
## [40] "scan-demo.txt"     "treebased.html"    "treebased.R"      
## [43] "ts.html"           "ts.R"              "xy.RData"

Sys.getenv("R_HOME")

## [1] "/Library/Frameworks/R.framework/Resources"

normalizePath(Sys.getenv("R_HOME"))

## [1] "/Library/Frameworks/R.framework/Versions/3.1/Resources"

system.file("misc/ViewTemps.RData", package="DAAG")

## [1] "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/DAAG/misc/ViewTemps.RData"

## Sec 14.3: R System Configuration 
## ss 14.3.1: The R Windows installation directory tree 
## ss 14.3.2: The Library directories 
.libPaths("D:/R-2.8.1/library") 

.First <- function().libPaths("D:/R-2.8.1/library") 

## ss 14.3.3: The startup mechanism 

## Sec 14.4: Data Input and Output 
library(DAAG) 
datafile("oneBadRow")

## 
## Data written to file: oneBadRow.txt

datafile("scan-demo")

## 
## Data written to file: scan-demo.txt

## ss 14.4.1: Input of data 
##       Issues for data input with {read.table()} and variants 
## read.table("quote.txt") 
## read.table("quote.txt", quote="'") 

##                   Tracking errors in input data 
nfields <- count.fields("oneBadRow.txt") 
nfields            # Number of fields, for each row

## [1] 3 4 3 3 3 3 3

## Now identify rows where the number of fields seems anomalous 
(1:length(nfields))[nfields == 4]

## [1] 2

##      One character string for each input row -- {readLines()} 
readLines("oneBadRow.txt", n=3)    # First 3 lines

## [1] "10 9 17  # First of 7 lines" "11 13 1 6"                  
## [3] "9 14 16"

readLines("oneBadRow.txt", n=-1)   # All lines

## [1] "10 9 17  # First of 7 lines" "11 13 1 6"                  
## [3] "9 14 16"                     "12 15 14"                   
## [5] "8 15 15"                     "9 13 12"                    
## [7] "7 14 18"

##                     Input of fixed format data 
##                  Input of large rectangular files 
scan("oneBadRow.txt", skip = 1, quiet= TRUE)

##  [1] 11 13  1  6  9 14 16 12 15 14  8 15 15  9 13 12  7 14 18

data.frame(scan("scan-demo.txt", skip = 1, quiet= TRUE, 
           what=list(ab="", col2=1, col3=1)))

##   ab col2 col3
## 1  a    2    3
## 2  b   11   13
## 3  c    9    7

##            Example -- input of the Boston housing data 
## readLines("bostonc.txt", n=11)[10:11] 

## strsplit(readLines("bostonc.txt", n=11)[10:11], split="\t") 

datafile('bostonc')

## 
## Data written to file: bostonc.txt

boston <- scan("bostonc.txt", n=-1, sep="\t", skip=10, 
               what=c(list(1,""), as.list(rep(1,19)))) 
colnams <- scan("bostonc.txt", skip=9, n=21, what="") 
boston <- data.frame(boston) 
names(boston) <- colnams 

boston <- read.table("bostonc.txt", sep="\t", skip=9,  
                     comment.char="", header=TRUE) 

## ss 14.4.2: Data output 
##                       Output of data frames 
write.table(fossilfuel, file="fuel.txt") 

##               Redirection of screen output to a file 
fossilfuel         # Below, this will be written to the file

##   year carbon
## 1 1800      8
## 2 1850     54
## 3 1900    534
## 4 1950   1630
## 5 2000   6611

## sink("fuel2.txt") 
## fossilfuel         # NB: No output on screen 
## sink() 

##                   Output to a file using {cat()} 
## ss 14.4.3: Database connections 
library(DAAG); library(DAAG) 
library(RSQLite)

## Loading required package: DBI

driveLite <- dbDriver("SQLite") 
con <- dbConnect(driveLite, dbname="hillracesDB") 
dbWriteTable(con, "hills2000", hills2000, overwrite=TRUE)

## [1] TRUE

dbWriteTable(con, "nihills", nihills, overwrite=TRUE)

## [1] TRUE

dbListTables(con)

## [1] "hills2000" "nihills"

## Obtain rows 11 to 20 from the newly created nihills table 
dbGetQuery(con, "select * from nihills limit 10 offset 10")

##                  row_names dist climb   time  timef
## 1            Monument Race  4.0  1000 0.4717 0.5947
## 2  Loughshannagh Horseshoe  4.3  1700 0.6469 0.8822
## 3                    Rocky  4.0  1300 0.5231 0.6653
## 4         Meelbeg Meelmore  3.5  1800 0.4544 0.6086
## 5            Donard Forest  4.5  1400 0.5186 0.6433
## 6            Slieve Donard  5.5  2790 0.9483 1.2086
## 7     Flagstaff to Carling 11.0  3000 1.4569 2.0344
## 8          Slieve Bearnagh  4.0  2690 0.6878 0.7992
## 9             Seven Sevens 18.9  8775 3.9028 5.9856
## 10         Lurig Challenge  4.0  1000 0.4347 0.5756

dbDisconnect(con)

## [1] TRUE

## Sec 14.5: Functions and operators -- Some Further Details 
##            Issues for the writing and use of functions 
## ss 14.5.1: Function arguments 
##                       The {args()} function 
args(write.table)  # Version 2.10.0 of R

## function (x, file = "", append = FALSE, quote = TRUE, sep = " ", 
##     eol = "\n", na = "NA", dec = ".", row.names = TRUE, col.names = TRUE, 
##     qmethod = c("escape", "double"), fileEncoding = "") 
## NULL

##                         The {...} argument 
x <- c(1,5,7); z <- c(4,9,10,NA); u <- 1:10 
# Now do calculations that use x, z and u 
rm(x, z, u) 

## ss 14.5.2: Character string and vector functions 
substring("abracadabra",3, 8) # Extract characters 3 to 8

## [1] "racada"

nchar("abracadabra")          # Count the number of characters

## [1] 11

strsplit("abracadabra", "r")  # Split wherever "r" appears

## [[1]]
## [1] "ab"     "acadab" "a"

strsplit("abcd", split="")    # Split into separate characters

## [[1]]
## [1] "a" "b" "c" "d"

paste("ab","c","d", sep="")   # Join together

## [1] "abcd"

paste(c("a","b","c"), collapse="")  # Join vector elements

## [1] "abc"

## ss 14.5.3: Anonymous functions 
ssfun <- function(x)sum(x^2) 
sapply(elastic1, ssfun)   # elastic1 is from the DAAG package

##  stretch distance 
##    16240   275748

sapply(elastic1, function(x)sum(x^2))

##  stretch distance 
##    16240   275748

growthfun <- function(x)(x[9] - x[1])/x[1] 
sapply(austpop[, -c(1,10)], growthfun)

##     NSW     Vic     Qld      SA      WA     Tas      NT     ACT 
##   2.295   2.268   3.980   2.364   4.876   1.456  36.400 102.333

sapply(austpop[, -c(1,10)], function(x)(x[9] - x[1])/x[1])

##     NSW     Vic     Qld      SA      WA     Tas      NT     ACT 
##   2.295   2.268   3.980   2.364   4.876   1.456  36.400 102.333

## ss 14.5.4: Functions for working with dates (and times) 
# Electricity Billing Dates 
dd <- as.Date(c("2003-08-24","2003-11-23","2004-02-22","2004-05-03")) 
diff(dd)

## Time differences in days
## [1] 91 91 71

as.Date("1/1/1960", format="%d/%m/%Y")

## [1] "1960-01-01"

as.Date("1:12:1960",format="%d:%m:%Y")

## [1] "1960-12-01"

as.Date("1960-12-1") - as.Date("1960-1-1")

## Time difference of 335 days

as.Date("31/12/1960","%d/%m/%Y")

## [1] "1960-12-31"

julian(as.Date("1/1/1970","%d/%m/%Y"))

## [1] 0
## attr(,"origin")
## [1] "1970-01-01"

julian(as.Date("1/1/2000","%d/%m/%Y"))

## [1] 10957
## attr(,"origin")
## [1] "1970-01-01"

dec1 <- as.Date("2004-12-1") 
format(dec1, format="%b %d %Y")

## [1] "Dec 01 2004"

format(dec1, format="%a %b %d %Y")

## [1] "Wed Dec 01 2004"

## Labeling of graph: data frame jobs (DAAG) 
startofmonth <- seq(from=as.Date("1Jan1995", format="%d%b%Y"),  
                    by="1 month", length=24) 
atdates <- seq(from=as.Date("1Jan1995", format="%d%b%Y"),  
               by="6 month", length=4) 
datelabs <- format(atdates, "%b%y") 
xyplot(BC+Alberta ~ startofmonth, data=jobs, outer=TRUE, 
       scales=list(x=list(at=atdates, labels=datelabs)), 
       auto.key=list(columns=2, between=1))

plot of chunk unnamed-chunk-1

dd <- as.Date(c("2003-08-24","2003-11-23","2004-02-22","2004-05-03")) 
weekdays(dd)

## [1] "Sunday" "Sunday" "Sunday" "Monday"

months(dd)

## [1] "August"   "November" "February" "May"

quarters(dd)

## [1] "Q3" "Q4" "Q1" "Q2"

## ss 14.5.5: Creating Groups 
library(MASS)

## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:DAAG':
## 
##     hills

catBP <- cut(Pima.tr2$bp, breaks=4) 
table(catBP, exclude=NULL)

## catBP
## (37.9,57]   (57,76]   (76,95]  (95,114]      <NA> 
##        22       170        87         8        13

## ss 14.5.6: Logical operators 
c(TRUE, TRUE, FALSE) & c(FALSE, TRUE, FALSE)

## [1] FALSE  TRUE FALSE

c(TRUE, TRUE, FALSE) && c(FALSE, TRUE, FALSE)

## [1] FALSE

##                  A vector form of {if} statement 
##                      Operators are functions 
"+"(2, 3)

## [1] 5

## Sec 14.6: Factors 
fac <- factor(c("c", "b", "a")) 
for (i in fac) print(i)

## [1] "c"
## [1] "b"
## [1] "a"

##                          Ordered factors 
stress.level <- rep(c("low","medium","high"), 2) 
ordf.stress <- ordered(stress.level, levels=c("low", "medium", 
"high")) 
ordf.stress

## [1] low    medium high   low    medium high  
## Levels: low < medium < high

ordf.stress < "medium"

## [1]  TRUE FALSE FALSE  TRUE FALSE FALSE

ordf.stress >= "medium"

## [1] FALSE  TRUE  TRUE FALSE  TRUE  TRUE

##                          Factor contrasts 
contr.treatment(3)

##   2 3
## 1 0 0
## 2 1 0
## 3 0 1

contr.sum(3)

##   [,1] [,2]
## 1    1    0
## 2    0    1
## 3   -1   -1

cities <- factor(c("Melbourne","Sydney","Adelaide")) 
contr.sum(cities)

##           [,1] [,2]
## Melbourne    1    0
## Sydney       0    1
## Adelaide    -1   -1

##      *Tests for main effects in the presence of interactions? 

## Sec 14.7: Missing Values 
x <- c(1, 6, 2, NA) 
is.na(x)           # TRUE for NAs, otherwise FALSE

## [1] FALSE FALSE FALSE  TRUE

x == NA            # All elements are set to NA

## [1] NA NA NA NA

NA == NA

## [1] NA

x <- c(1, NA, 6, 2, 10) 
x > 4 # The second element will be NA

## [1] FALSE    NA  TRUE FALSE  TRUE

x[x>4] # NB: This generates a vector of length 3

## [1] NA  6 10

## x[x > 4] <- c(101, 102) 

x[!is.na(x) & x > 4] <- c(101, 102) 
x

## [1]   1  NA 101   2 102

##      Counting and identifying { NA}s -- the use of {table()} 
with(nswdemo, table(trt, re74>0, useNA="ifany"))

##    
## trt FALSE TRUE <NA>
##   0   195   65  165
##   1   131   54  112

fac_re74 <- with(nswdemo, factor(re74>0, exclude=NULL)) 
levels(fac_re74)

## [1] "FALSE" "TRUE"  NA

levels(fac_re74) <- c("0", "gt0", "<NA>") 
with(nswdemo, table(trt, fac_re74))

##    fac_re74
## trt   0 gt0 <NA>
##   0 195  65  165
##   1 131  54  112

##                         The removal of NAs 
##                    {NA}s in modeling functions 
options()$na.action # Version 2.10.0, following startup

## [1] "na.omit"

##             Sorting and ordering, where there are NAs 
x <- c(1, 20,  2, NA, 22) 
order(x)           # By default, na.last=TRUE

## [1] 1 3 2 5 4

x[order(x)]

## [1]  1  2 20 22 NA

sort(x)            # By default na.last=NA

## [1]  1  2 20 22

sort(x, na.last=TRUE)

## [1]  1  2 20 22 NA

## Sec 14.8: * Matrices and Arrays 
xx <- matrix(1:6, ncol=3)  # Equivalently, enter 
xx

##      [,1] [,2] [,3]
## [1,]    1    3    5
## [2,]    2    4    6

dim(xx)

## [1] 2 3

## Use as.vector() 
x <- as.vector(xx) 
## Alternatively, directly remove the dimension attribute 
x <- xx 
dim(x) <- NULL 

##                   The extraction of submatrices 
x34 <- matrix(1:12, ncol=4) 
x34

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12

x34[2:3, c(1,4)]     # Extract rows 2 & 3 and columns 1 & 4

##      [,1] [,2]
## [1,]    2   11
## [2,]    3   12

x34[-2, ]            # Extract all rows except the second

##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    3    6    9   12

x34[-2, -3]          # Omit row 2 and column 3

##      [,1] [,2] [,3]
## [1,]    1    4   10
## [2,]    3    6   12

x34[2, ]                # The dimension attribute is dropped

## [1]  2  5  8 11

x34[2, , drop=FALSE]    # Retain the dimension attribute

##      [,1] [,2] [,3] [,4]
## [1,]    2    5    8   11

##         Conversion of data frames and tables into matrices 
## ss 14.8.1: Matrix arithmetic 
## Set up example matrices G, H and B 
G <- matrix(1:12, nrow=4); H <- matrix(112:101, nrow=4); B <- 1:3 
G + H             # Element-wise addition (X & Y both n by m)

##      [,1] [,2] [,3]
## [1,]  113  113  113
## [2,]  113  113  113
## [3,]  113  113  113
## [4,]  113  113  113

G * H             # Element-wise multiplication

##      [,1] [,2] [,3]
## [1,]  112  540  936
## [2,]  222  642 1030
## [3,]  330  742 1122
## [4,]  436  840 1212

G %*% B           # Matrix multiplication (X is n by k; B is k by m)

##      [,1]
## [1,]   38
## [2,]   44
## [3,]   50
## [4,]   56

## Set up example matrices X (square, full rank) and Y 
X <- matrix(c(1,1,1, -1,0,1, 2,4,2), nrow=3) 
Y <- matrix(1:6, nrow=3) 
solve(X, Y)       # Solve XB = Y (X must be square)

##      [,1] [,2]
## [1,]    2    5
## [2,]    1    1
## [3,]    0    0

##                      Computational Efficiency 
xy <- matrix(rnorm(1500000),ncol=50) 
dim(xy)

## [1] 30000    50

system.time(xy+1)  # user and system are processor times

##    user  system elapsed 
##   0.002   0.001   0.002

xy.df <- data.frame(xy) 
system.time(xy.df+1)

##    user  system elapsed 
##   0.092   0.005   0.098

## ss 14.8.2: Outer products 
outer(1:4, 1:10)

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]    1    2    3    4    5    6    7    8    9    10
## [2,]    2    4    6    8   10   12   14   16   18    20
## [3,]    3    6    9   12   15   18   21   24   27    30
## [4,]    4    8   12   16   20   24   28   32   36    40

rbgshades <- outer(c("red","blue","green"), c("",paste(1:4)), 
                   function(x,y)paste(x,y, sep="")) 
rbgshades          # Display the matrix

##      [,1]    [,2]     [,3]     [,4]     [,5]    
## [1,] "red"   "red1"   "red2"   "red3"   "red4"  
## [2,] "blue"  "blue1"  "blue2"  "blue3"  "blue4" 
## [3,] "green" "green1" "green2" "green3" "green4"

plot(rep(0:4, rep(3,5)), rep(1:3, 5), col=rbgshades, pch=15, cex=8)

plot of chunk unnamed-chunk-1

## ss 14.8.3: Arrays 
x <- 1:24 
dim(x) <- c(2, 12); print(x)

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,]    1    3    5    7    9   11   13   15   17    19    21    23
## [2,]    2    4    6    8   10   12   14   16   18    20    22    24

dim(x) <- c(3, 4, 2); print(x)

## , , 1
## 
##      [,1] [,2] [,3] [,4]
## [1,]    1    4    7   10
## [2,]    2    5    8   11
## [3,]    3    6    9   12
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]
## [1,]   13   16   19   22
## [2,]   14   17   20   23
## [3,]   15   18   21   24

## Sec 14.9: Manipulations with Lists, Data Frames, Matrices and Time Series 
## ss 14.9.1: Lists -- an extension of the notion of ``vector'' 
zz <- list("Shireen", "Peter", c("Luke","Amelia","Ted"), c(8,4,0)) 
zz[c(3,1)]

## [[1]]
## [1] "Luke"   "Amelia" "Ted"   
## 
## [[2]]
## [1] "Shireen"

 zz[c(3,1)]

## [[1]]
## [1] "Luke"   "Amelia" "Ted"   
## 
## [[2]]
## [1] "Shireen"

## Return list whose only element is the vector c("Luke","Amelia") 
zz[3]

## [[1]]
## [1] "Luke"   "Amelia" "Ted"

## Return the vector c("Luke","Amelia","Ted") 
zz[[3]]

## [1] "Luke"   "Amelia" "Ted"

duo <- list(family="Braun", names=c("Matthew","Phillip"),  
            ages=c(14,9)) 
duo[["names"]]      # Alternatively, specify duo$names

## [1] "Matthew" "Phillip"

##                  The dual identity of data frames 
xyz <- data.frame(x=1:4, y=11:14, z=I(letters[1:4])) 

xyz[1, ]            # Returns a data frame, i.e., a list)

##   x  y z
## 1 1 11 a

xyz[1, , drop=TRUE]

## $x
## [1] 1
## 
## $y
## [1] 11
## 
## $z
## [1] "a"

unlist(xyz[1, ])    # Returns, here, a vector of atomic mode

##    x    y    z 
##  "1" "11"  "a"

unlist(xyz[1, , drop=TRUE])

##    x    y    z 
##  "1" "11"  "a"

## ss 14.9.2: Changing the shape of data frames (or matrices) 
##                        Melting and casting 
library(reshape) 
Jobs <- melt(jobs, measure.vars=names(jobs)[1:6],  
             variable_name="Region", id.vars="Date") 
head(Jobs, 3)

##    Date Region value
## 1 95.00     BC  1752
## 2 95.08     BC  1737
## 3 95.17     BC  1765

jobsBack <- cast(Jobs, Date ~ Region) 
head(jobsBack,3)

##    Date   BC Alberta Prairies Ontario Quebec Atlantic
## 1 95.00 1752    1366      982    5239   3196      947
## 2 95.08 1737    1369      981    5233   3205      946
## 3 95.17 1765    1380      984    5212   3191      954

## ss 14.9.3: * Merging data frames -- {merge()} 
new.Cars93 <- merge(x=Cars93, y=Cars93.summary[, "abbrev", drop=F], 
                    by.x="Type", by.y="row.names") 

## ss 14.9.4: Joining data frames, matrices and vectors -- {cbind()} 
##          Conversion of tables and arrays into data frames 
## ss 14.9.5: The {apply} family of functions 
##                      The {tapply()} function 
## Compare tapply() with aggregate(): data frame kiwishade (DAAG) 
with(kiwishade, tapply(yield, INDEX=list(block, shade), FUN=mean))

##         none Aug2Dec Dec2Feb Feb2May
## east   99.03   105.6   88.92   95.88
## north 104.03   103.6   93.70   92.03
## west   97.56   100.5   87.15   90.41

with(kiwishade, aggregate(yield, by=list(block, shade), FUN=mean))

##    Group.1 Group.2      x
## 1     east    none  99.03
## 2    north    none 104.03
## 3     west    none  97.56
## 4     east Aug2Dec 105.56
## 5    north Aug2Dec 103.59
## 6     west Aug2Dec 100.55
## 7     east Dec2Feb  88.92
## 8    north Dec2Feb  93.70
## 9     west Dec2Feb  87.15
## 10    east Feb2May  95.88
## 11   north Feb2May  92.03
## 12    west Feb2May  90.41

##                       The {apply()} function 
##              The functions {lapply()} and {sapply()} 
## Uses data frame rainforest (DAAG) 
sapply(rainforest[, -7], range, na.rm=TRUE)

##      dbh wood bark root rootsk branch
## [1,]   4    3    8    2    0.3      4
## [2,]  56 1530  105  135   24.0    120

## ss 14.9.6: Splitting vectors and data frames into lists -- {split()} 
with(Cars93.summary, split(No.of.cars, Max.passengers))

## $`4`
## [1] 14
## 
## $`5`
## [1] 21
## 
## $`6`
## [1] 16 11 22
## 
## $`8`
## [1] 9

## Split dataframe by Max.passengers (2nd column) 
split(Cars93.summary[, -2], Cars93.summary[, 2])

## $`4`
##        Min.passengers No.of.cars abbrev
## Sporty              2         14     Sp
## 
## $`5`
##       Min.passengers No.of.cars abbrev
## Small              4         21     Sm
## 
## $`6`
##         Min.passengers No.of.cars abbrev
## Compact              4         16      C
## Large                6         11      L
## Midsize              4         22      M
## 
## $`8`
##     Min.passengers No.of.cars abbrev
## Van              7          9      V

## ss 14.9.7: Multivariate time series 
jobts <- ts(jobs[,1:6], start=1995, frequency=12) 
colnames(jobts)

## [1] "BC"       "Alberta"  "Prairies" "Ontario"  "Quebec"   "Atlantic"

## Subseries through to the third month of 1995 
window(jobts, end=1995+2/12)

##            BC Alberta Prairies Ontario Quebec Atlantic
## Jan 1995 1752    1366      982    5239   3196      947
## Feb 1995 1737    1369      981    5233   3205      946
## Mar 1995 1765    1380      984    5212   3191      954

 # Rows are 1995+0/12, 1990+1/12, 1990+2/12 

plot(jobts, plot.type="single")     # Use one panel for all

plot of chunk unnamed-chunk-1

plot(jobts, plot.type="multiple")   # Separate panels.

plot of chunk unnamed-chunk-1

## Alternative code for Figure 2.9A; plot as time series 
plot(jobts, plot.type="single", xlim=c(1995,1997.2), lty=1:6, log="y", 
     xaxt="n", xlab="", ylab="Number of Jobs") 
## Move label positions so that labels do not overlap 
ylast <- bounce(window(jobts, 1996+11/12), d=strheight("O"), log=TRUE) 
 # bounce() is from DAAG 
text(rep(1996+11/12,6), ylast, colnames(ylast), pos=4) 
datlab <- format(seq(from=as.Date("1Jan1995", format="%d%b%Y"), by="3 month",  
                 length=8), "%b%Y") 
axis(1, at=seq(from=1995, by=0.25, length=8), datlab)

plot of chunk unnamed-chunk-1

## Sec 14.10: Classes and Methods 
##                       S3 methods and classes 
print

## function (x, ...) 
## UseMethod("print")
## <bytecode: 0x10094e078>
## <environment: namespace:base>

## ss 14.10.1: Printing and summarizing model objects 
elastic.lm <- lm(distance ~ stretch, data=elasticband) 
elastic.sum <- summary(elastic.lm) 

## ss 14.10.2: Extracting information from model objects 
names(elastic.lm)

##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"

elastic.lm$coefficients

## (Intercept)     stretch 
##     -63.571       4.554

elastic.lm[["coefficients"]]

## (Intercept)     stretch 
##     -63.571       4.554

elastic.lm[[1]]

## (Intercept)     stretch 
##     -63.571       4.554

coef(elastic.lm)

## (Intercept)     stretch 
##     -63.571       4.554

x <- 1:5 
class(x) <- "lm"   # Inappropriate assignment of class 
## print(x) 

## ss 14.10.3: S4 classes and methods 
library(lme4)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following object is masked from 'package:reshape':
## 
##     expand
## 
## Loading required package: Rcpp

hp.lmList <- lmList(o2 ~ wattsPerKg | id, data=humanpower1) 
slotNames(hp.lmList)

## [1] ".Data" "call"  "pool"

slot(hp.lmList, "call")

## lmList(formula = o2 ~ wattsPerKg | id, data = humanpower1)

hp.lmList@call

## lmList(formula = o2 ~ wattsPerKg | id, data = humanpower1)

## Sec 14.11: Manipulation of Language Constructs 
## ss 14.11.1: Model and graphics formulae 
plot.mtcars <- function(xvar="disp", yvar="hp"){ 
   mt.form <- paste(yvar, "~", xvar) 
   plot(formula(mt.form), data=mtcars) 
   } 
## Plot using data frame mtcars (datasets) 
names(mtcars)

##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"

plot.mtcars()

plot of chunk unnamed-chunk-1

plot.mtcars(xvar="disp", yvar="mpg")

plot of chunk unnamed-chunk-1

##         Extraction of Variable Names from Formula Objects 
all.vars(mpg ~ disp)

## [1] "mpg"  "disp"

plot.mtcars <- function(form = mpg~disp){ 
   ## Include information that allows a meaningful label 
   mtcars.info <- c(mpg= "Miles/(US) gallon",  
                    cyl= "Number of cylinders",  
                    disp= "Displacement (cu.in.)",  
                    hp= "Gross horsepower",  
                    drat= "Rear axle ratio",  
                    wt= "Weight (lb/1000)",  
                    qsec= "1/4 mile time",  
                    vs= "V/S",  
                    am= "Transmission (0 = automatic, 1 = manual)",  
                    gear= "Number of forward gears",  
                    carb= "Number of carburettors") 
   xlab <- mtcars.info[all.vars(form)[1]] 
   ylab <- mtcars.info[all.vars(form)[2]] 
   plot(form, xlab=xlab, ylab=ylab, data=mtcars) 
} 

## ss 14.11.2: The use of a list to pass arguments 
mean(rnorm(20))

## [1] 0.2687

do.call("mean", args=list(x=rnorm(20)))

## [1] 0.1845

simulate.distribution <- 
  function(distn="weibull", params=list(n=10)){  
    ## Simulates one of: weibull, gaussian, logistic 
    if(distn=="weibull" & !("shape" %in% names(params))) 
      params <- c(shape=1, params) 
    ## weibull requires a default shape argument.  
    ## =====================================================  
    ## Choose the function that will generate the random sample  
    rfun <- switch(distn,   
                   weibull = rweibull,  
                   normal = rnorm,  
                   logistic= rlogis)  
    ## Call rfun(). Use of do.call() makes it possible to give 
    ## the argument list as a list of named values.  
    ## Pass shape argument (or NULL), plus n = # of numbers 
    do.call("rfun", args=params)  
  } 

simulate.distribution()

##  [1] 0.7738 0.6918 0.5466 1.5960 0.1976 1.4604 1.4008 0.6528 2.8480 0.2019

plot(density(simulate.distribution("normal", params=list(n=100))))

plot of chunk unnamed-chunk-1

plot(density(simulate.distribution("weibull",  
             params=list(n=100, scale=0.5))))

plot of chunk unnamed-chunk-1

mean.call <- call("mean", x=rnorm(5)) 
eval(mean.call)

## [1] 0.3112

eval(mean.call)

## [1] 0.3112

mean.call

## mean(x = c(1.42770546085013, -0.200464045575479, 1.27272152961708, 
## -0.660698809687278, -0.283365884082554))

## ss 14.11.3: Expressions 
local(a+b*x+c*x^2, envir=list(x=1:4, a=3, b=5, c=1))

## [1]  9 17 27 39

## ss 14.11.4: Environments 
test <- function()as.character(sys.call())  

test()

## [1] "test"

newtest <- test    # Create a copy with a different name 
newtest()

## [1] "newtest"

##       Automatic naming of a file that holds function output 
hcopy <- 
    function(width=2.25, height=2.25, pointsize=8){ 
        funtxt <- sys.call(1) 
        fnam <- paste(funtxt, ".pdf", sep="") 
        print(paste("Output is to the file '", fnam, "'", sep="")) 
        pdf(file=fnam, width=width, height=height, pointsize= 
            pointsize) 
    } 

fig1 <- function(){ 
    hcopy()             # Call with default arguments 
    curve(sin, -pi, 2*pi) 
    dev.off() 
} 

## ss 14.11.5: Function environments, and lazy evaluation 
##                          Lazy evaluation 
lazyfoo <- function(x=4, y = x^2)y 
lazyfoo()

## [1] 16

lazyfoo(x=3)

## [1] 9

# lazyfoo(y=x^3)     # We specify a value for y in the function call 
# x <- 9             # Now, in the parent environment, x=9 
# lazyfoo(y=x^3) 

##  Example -- a function that identifies objects added during asession 
dsetnames <- objects() 

additions <- function(objnames = dsetnames){ 
    newnames <- objects(envir=.GlobalEnv) 
    existing <- newnames %in% objnames 
    newnames[!existing] 
    } 

additions(dsetnames)

## [1] "additions" "dsetnames"

## Sec 14.12: *Creation of R Packages 
system.file("misc/ViewTemps.RData", package="DAAG")

## [1] "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/DAAG/misc/ViewTemps.RData"

## package.skeleton(name = "ViewTemps") 

##                             Namespaces 

## Sec 14.13: Document Preparation --- {Sweave()} and {xtable()} 
## Sweave("sec1-1.Rnw", keep.source=TRUE)    
  # actually, Sweave("sec1-1") is sufficient 
  # The argument keep.source=TRUE preserves the code layout 
  # and ensures that comments are retained. 

R.home(component="share/texmf/Sweave.sty")

## [1] "/Library/Frameworks/R.framework/Resources/share/texmf/Sweave.sty"

## Sec 14.14: Further Reading 
citation()

## 
## To cite R in publications use:
## 
##   R Core Team (2014). R: A language and environment for
##   statistical computing. R Foundation for Statistical Computing,
##   Vienna, Austria. URL http://www.R-project.org/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {R: A Language and Environment for Statistical Computing},
##     author = {{R Core Team}},
##     organization = {R Foundation for Statistical Computing},
##     address = {Vienna, Austria},
##     year = {2014},
##     url = {http://www.R-project.org/},
##   }
## 
## We have invested a lot of time and effort in creating R, please
## cite it when using it for data analysis. See also
## 'citation("pkgname")' for citing R packages.

citation("DAAG")

## 
## To cite package 'DAAG' in publications use:
## 
##   John H. Maindonald and W. John Braun (2014). DAAG: Data Analysis
##   And Graphics data and functions. R package version 1.20.
##   http://CRAN.R-project.org/package=DAAG
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {DAAG: Data Analysis And Graphics data and functions},
##     author = {John H. Maindonald and W. John Braun},
##     year = {2014},
##     note = {R package version 1.20},
##     url = {http://CRAN.R-project.org/package=DAAG},
##   }
## 
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.

##                             Vignettes 
vignette()           # All vignettes in all installed packages 

## vignette(package="grid") 

## vignette("viewports")    # Equivalent to vignette(topic="viewports") 

## Use of split() and sapply(): data frame science (DAAG) 
with(science, sapply(split(school, PrivPub), 
                     function(x)length(unique(x))))

## private  public 
##      12      29

### Thanks to Markus Hegland (ANU), who wrote the initial version 
##1 Generate the data 
  cat("generate data \n")

## generate data

  n <- 800           # length of noise vector 
  m <- 100           # length of signal vector 
  xsignal <- runif(m) 
  sig <- 0.01 
  enoise <- rnorm(m)*sig 
  ysignal <- xsignal**2+enoise 
  maxys <- max(ysignal) 
  minys <- min(ysignal) 
  x <- c(runif(n), xsignal) 
  y <- c(runif(n)*(maxys-minys)+minys, ysignal) 
  # random permutation of the data vectors 
  iperm <- sample(seq(x)) 
  x <- x[iperm] 
  y <- y[iperm] 
  # normalize the data, i.e., scale x & y values to 
  # lie between 0 & 1 
  xn <- (x - min(x))/(max(x) - min(x)) 
  yn <- (y - min(y))/(max(y) - min(y)) 
##1 End 
 
##2 determine number of neighbors within 
# a distance  <= h = 1/sqrt(length(xn)) 
  nx <- length(xn) 
  # determine distance matrix 
  d <- sqrt( (matrix(xn, nx, nx) - t(matrix(xn, nx, nx)) )**2 + 
             (matrix(yn, nx, nx) - t(matrix(yn, nx, nx)) )**2 ) 
  h <- 1/sqrt(nx) 
  nnear <- apply(d <= h, 1, sum) 
##2 End 
 
##3 Plot data, with reconstructed signal overlaid. 
  cat("produce plots \n")

## produce plots

  # identify the points which have many such neighbors 
  ns <- 8 
  plot(x,y) 
  points(x[nnear > ns], y[nnear > ns], col="red", pch=16)

##3 End 

n <- 10000; system.time(sd(rnorm(n)))

##    user  system elapsed 
##   0.001   0.000   0.001

library(plyr)

## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:reshape':
## 
##     rename, round_any
## 
## The following object is masked from 'package:DAAG':
## 
##     ozone

aaply(.data=UCBAdmissions, .margins=1:2, .fun=sum)

##           Gender
## Admit      Male Female
##   Admitted 1198    557
##   Rejected 1493   1278

adply(.data=UCBAdmissions, .margins=1:3, .fun=identity)

##       Admit Gender Dept  V1
## 1  Admitted   Male    A 512
## 2  Rejected   Male    A 313
## 3  Admitted Female    A  89
## 4  Rejected Female    A  19
## 5  Admitted   Male    B 353
## 6  Rejected   Male    B 207
## 7  Admitted Female    B  17
## 8  Rejected Female    B   8
## 9  Admitted   Male    C 120
## 10 Rejected   Male    C 205
## 11 Admitted Female    C 202
## 12 Rejected Female    C 391
## 13 Admitted   Male    D 138
## 14 Rejected   Male    D 279
## 15 Admitted Female    D 131
## 16 Rejected Female    D 244
## 17 Admitted   Male    E  53
## 18 Rejected   Male    E 138
## 19 Admitted Female    E  94
## 20 Rejected Female    E 299
## 21 Admitted   Male    F  22
## 22 Rejected   Male    F 351
## 23 Admitted Female    F  24
## 24 Rejected Female    F 317

library(DAAG) 
daply(.data=tinting, .variables=c("sex","agegp"), .fun=length)

##    agegp
## sex younger older
##   f       9     9
##   m       9     9

##                             References 
##                   References for further reading 
##                             References 
##                   References for further reading

advanced.R

johnm1

Sat Sep 27 19:21:18 2014