###################################################
### chunk number 1:  eval=FALSE
###################################################
## options(SweaveHooks=list(fig=function(){par(cex.main=1.1,
##                          mar=c(4.1,4.1,2.6,2.1),
##                          mgp=c(2.25,0.5,0), tck=-0.02)}))
## graphics.off()
## x11(width=8, height=4.25)
## x11(width=8, height=8)


###################################################
### chunk number 2: extract-test eval=FALSE
###################################################
## attach("covtype.RData")
## covtrain <- covtype[1:11340,]
## covtest <- covtype[11340+(1:3780),]
## every50th <- seq(from=11340+3780+1, to=581012, by=50)
## covsample <-  covtype[every50th, ]


###################################################
### chunk number 3: save-dsets eval=FALSE
###################################################
## save(covtrain, file="covtrain.RData")
## rm(covtrain)
## save(covtest, file="covtest.RData")
## rm(covtest)
## save(covsample, file="covsample.RData")
## rm(covsample)


###################################################
### chunk number 4: attach-images eval=FALSE
###################################################
## attach("covtrain.RData")
## attach("covtest.RData")
## attach("covsample.RData")


###################################################
### chunk number 5: info-freqs eval=FALSE
###################################################
## options(digits=3)
## tab.all <- table(covtype$V55)
## tab.train <- table(covtrain$V55)
## tab.test <- table(covtest$V55)
## tab.sample <- table(covsample$V55)
## tab.all/sum(tab.all)
## tab.sample/sum(tab.sample)
## tab.train/sum(tab.train)
## tab.test/sum(tab.test)


###################################################
### chunk number 6: varying-props eval=FALSE
###################################################
## library(splines)
## runningprops <- function(df=covtrain, type=1){
##   n <- dim(df)[1]
##   propthru <- (1:n)/(n+1)
##   occurs <- as.integer(df$V55==type)
##   print(table(occurs))
##   cov.glm <- glm(occurs ~ bs(propthru,6), family=binomial)  
##   hat <- predict(cov.glm, type="response")
##   cbind(propthru, hat)
## }
## hat.train <- runningprops()
## hat.test <- runningprops(df=covtest)
## hat.sample <- runningprops(df=covsample)
## print(range(c(hat.train[,2], hat.test[,2], hat.sample[,2])))


###################################################
### chunk number 7: running-occurrence-plots eval=FALSE
###################################################
## plot(hat.train[,1], hat.train[,2], ylim=c(0,0.65))
## lines(hat.test[,1], hat.test[,2], col=2)
## lines(hat.sample[,1], hat.sample[,2], col=3)


###################################################
### chunk number 8: fit-trees eval=FALSE
###################################################
## library(rpart)
## train.rpart <- rpart(V55 ~ ., data=covtrain, cp=0.0001, method="class")
## train.rpart <- prune(train.rpart, cp=0.0048)
## trainhat.train <- xpred.rpart(train.rpart, cp=0.0048)
## testhat.train <- predict(train.rpart, newdata=covtest, type="class")
## samplehat.train <- predict(train.rpart, newdata=covsample, type="class")


###################################################
### chunk number 9: acc-fun eval=FALSE
###################################################
## errs.fun <- function(obs, predicted){
## tab <- table(obs, predicted)
## grosserr <- 1-sum(tab[row(tab)==col(tab)])/sum(tab)
## errs <- 1-tab[row(tab)==col(tab)]/apply(tab,1,sum)
## names(errs) <- paste(1:length(errs))
## print("Overall error rate (%)")
## print(round(100*grosserr,1))
## print("Error rate (%), broken down by forest cover type")
## print(round(100*errs,2))
## cat("\n")
## invisible(errs)
## }


###################################################
### chunk number 10: accuracies eval=FALSE
###################################################
## errs.fun(covtrain$V55, trainhat.train)
## errs.fun(covtest$V55, testhat.train)
## errs.fun(covsample$V55, samplehat.train)


###################################################
### chunk number 11: sample-fit eval=FALSE
###################################################
## sample.rpart <- rpart(V55 ~ ., data=covsample, cp=0.0001, method="class")
## sample.rpart <- prune(sample.rpart, cp=0.001)
## samplehat.sample <- xpred.rpart(sample.rpart, cp=0.001)
## samplehat.sample <- factor(samplehat.sample, levels=1:7)
## trainhat.sample <- predict(sample.rpart, newdata=covtrain, type="class")
## testhat.sample <- predict(sample.rpart, newdata=covtest, type="class")
## errs.fun(covtrain$V55, trainhat.sample)
## errs.fun(covtest$V55, testhat.sample)
## errs.fun(covsample$V55, samplehat.sample)


###################################################
### chunk number 12: randomForest eval=FALSE
###################################################
## library(randomForest)
## xcovtrain <- as(covtrain[,1:54], "matrix")
## ycovtrain <- covtrain[,55]
## xsampletrain <- as(covsample[,1:54], "matrix")
## ysampletrain <- covsample[,55]
## ycovtrain <- factor(covtrain[,55])
## ysampletrain <- factor(covsample[,55])
## covtrain.rf <- randomForest(x=xcovtrain, y=ycovtrain,
##                             xtest=xsampletrain, ytest=ysampletrain)
## covtrain.rf