###################################################
### chunk number 1: 
###################################################
options(SweaveHooks=list(fig=function(){par(cex.main=1.1,
                         mar=c(4.1,4.1,2.6,2.1),
                         mgp=c(2.25,0.5,0), tck=-0.02)}))
## graphics.off()
x11(width=8, height=4.25)
x11(width=8, height=8)


###################################################
### chunk number 2: Pima-rpart
###################################################
library(MASS)
library(rpart)
Pima.rpart <- rpart(type ~ ., data=Pima.tr, method="class")
plotcp(Pima.rpart)


###################################################
### chunk number 3: Pima-opt
###################################################
Pima.rpart4 <- prune(Pima.rpart, cp=0.037)


###################################################
### chunk number 4: Pima-treeplot
###################################################
plot(Pima.rpart4)   # NB: plot, not plotcp()
text(Pima.rpart4)      # Labels the tree


###################################################
### chunk number 5: printcp
###################################################
printcp(Pima.rpart)


###################################################
### chunk number 6: Pima-rpart
###################################################
library(rpart)
Pima.rpart <- rpart(type ~ ., data=Pima.tr, method="class")
plotcp(Pima.rpart)


###################################################
### chunk number 7: Pima-accs
###################################################
errmat <- printcp(Pima.rpart)
colnames(errmat)        # Hints at what will come next
resub.err <- 1-0.3327*errmat[,"rel error"]
cv.err <- 1-0.3327*errmat[,"xerror"]


###################################################
### chunk number 8: confusion
###################################################
Pima.rpart <- prune(Pima.rpart, cp=0.037)
cvhat <- xpred.rpart(Pima.rpart4, cp=0.037)
tab <- table(Pima.tr$type, cvhat)
confusion <- rbind(tab[1,]/sum(tab[1,]), tab[2,]/sum(tab[2,]))
dimnames(confusion) <- list(ActualType=c("No","Yes"),
PredictedType=c("No","Yes"))
print(confusion)


###################################################
### chunk number 9: trPima-rpart
###################################################
trPima.rpart <- rpart(type ~ ., data=Pima.tr, method="class")
plotcp(trPima.rpart)
printcp(trPima.rpart)


###################################################
### chunk number 10: select-cp
###################################################
trPima.rpart <- rpart(type ~ ., data=Pima.tr, method="class")
cp.all <- printcp(trPima.rpart)[, "CP"]
n <- length(cp.all)
cp.all <- sqrt(cp.all*c(Inf, cp.all[-n]))
nsize <- printcp(trPima.rpart)[, "nsplit"] + 1


###################################################
### chunk number 11: Pima-hat
###################################################
tr.cverr <- printcp(trPima.rpart)[, "xerror"] * 0.34
n <- length(cp.all)
trPima0.rpart <- trPima.rpart
te.cverr <- numeric(n)
for (i in n:1){
   trPima0.rpart <- prune(trPima0.rpart, cp=cp.all[i])
   hat <- predict(trPima0.rpart, newdata=Pima.te, type="class")
   tab <- table(hat, Pima.te$type)
   te.cverr[i] <- 1-sum(tab[row(tab)==col(tab)])/sum(tab)
 }


###################################################
### chunk number 12: newdata-svm eval=FALSE
###################################################
## library(e1071)
## library(MASS)
## trPima.svm <- svm(type ~ ., data=Pima.tr)
## hat <- predict(trPima.svm, newdata=Pima.te)
## tab <- table(Pima.te$type, hat)
## 1-sum(tab[row(tab)==col(tab)])/sum(tab)
## confusion.svm <- rbind(tab[1,]/sum(tab[1,]), tab[2,]/sum(tab[2,]))
## print(confusion.svm)


###################################################
### chunk number 13: Pima-rf
###################################################
library(randomForest)
Pima.rf <- randomForest(type~., data=Pima.tr, xtest=Pima.te[,-8],
                        ytest=Pima.te$type)
Pima.rf


###################################################
### chunk number 14: Pima-1
###################################################
Pima.rf <- randomForest(type ~ ., data=Pima.tr, method="class")
Pima.rf.4 <- randomForest(type ~ ., data=Pima.tr, method="class", classwt=c(.6,.4))
Pima.rf.1 <- randomForest(type ~ ., data=Pima.tr, method="class", classwt=c(.9,.1))


###################################################
### chunk number 15: rf-scatter
###################################################
Pima.rf <- randomForest(type~., data=Pima.tr, proximity=TRUE)
Pima.prox <- predict(Pima.rf, proximity=TRUE)
Pima.cmd <- cmdscale(1-Pima.prox$proximity)
Pima.cmd3 <- cmdscale(1-Pima.prox$proximity, k=3)
library(lattice)
cloud(Pima.cmd3[,1] ~ Pima.cmd3[,2]*Pima.cmd3[,2], groups=Pima.tr$type)


###################################################
### chunk number 16: vary-sampsize
###################################################
## Default
randomForest(type ~ ., data=Pima.tr, sampsize=c(132,68))
## Simulate a prior that is close to 0.8:0.2
randomForest(type ~ ., data=Pima.tr, sampsize=c(132,33))
  # Notice the dramatically increased accuracy for the No's
## Simulate a prior that is close to 0.1:0.9
randomForest(type ~ ., data=Pima.tr, sampsize=c(17,68))
  # Notice the dramatically increased accuracy for the Yes's