################################################### ### chunk number 1: ################################################### options(SweaveHooks=list(fig=function(){par(cex.main=1.1, mar=c(4.1,4.1,2.6,2.1), mgp=c(2.25,0.5,0), tck=-0.02)})) ## graphics.off() x11(width=8, height=4.25) x11(width=8, height=8) ################################################### ### chunk number 2: au-cmdscale ################################################### load("audists.RData") aupoints <- cmdscale(audists) plot(aupoints) text(aupoints, labels=paste(rownames(aupoints))) ################################################### ### chunk number 3: au-cmd-res ################################################### audistfits <- as.matrix(dist(aupoints)) misfit <- audistfits-audists for (j in 1:9)for (i in (j+1):10){ lines(aupoints[c(i,j), 1], aupoints[c(i,j), 2], col="gray") midx <- mean(aupoints[c(i,j), 1]) midy <- mean(aupoints[c(i,j), 2]) text(midx, midy, paste(round(misfit[i,j]))) } print(round(misfit)) ################################################### ### chunk number 4: au-overlay ################################################### attach("aulatlong.RData") library(oz) oz() points(aulatlong, col="red", pch=16, cex=1.5) comparePhysical <- function(lat=aulatlong$latitude, long=aulatlong$longitude, x1=aupoints[,1], x2 = aupoints[,2]){ ## Get best fit in space of (latitude, longitude) fitlat <- predict(lm(lat ~ x1+x2)) fitlong <- predict(lm(long ~ x1+x2)) x <- as.vector(rbind(lat, fitlat, rep(NA,10))) y <- as.vector(rbind(long, fitlong, rep(NA,10))) lines(x, y, col=3, lwd=2) } comparePhysical() ################################################### ### chunk number 5: au-sammon ################################################### aupoints.sam <- sammon(audists) oz() points(aulatlong, col="red", pch=16, cex=1.5) comparePhysical(x1=aupoints.sam$points[,1], x2 = aupoints.sam$points[,2]) ################################################### ### chunk number 6: au-mds ################################################### oz() points(aulatlong, col="red", pch=16, cex=1.5) aupoints.mds <- isoMDS(audists, as.matrix(aulatlong)) comparePhysical(x1=aupoints.mds$points[,1], x2 = aupoints.mds$points[,2]) ################################################### ### chunk number 7: diabetes-distances ################################################### library(cluster) load("diabetes.RData") data(diabetes) diadist <- daisy(diabetes[, -1], stand=TRUE) ## Examine distribution of distances plot(density(diadist, from=0)) ################################################### ### chunk number 8: read-primates ################################################### library(ape) webpage <- "http://evolution.genetics.washington.edu/book/primates.dna" primates.dna <- read.dna(url(webpage)) ################################################### ### chunk number 9: primates-dist ################################################### primates.dist <- dist.dna(primates.dna, model="F84") ################################################### ### chunk number 10: primates-2d ################################################### primates.cmd <- cmdscale(primates.dist) eqscplot(primates.cmd) rtleft <- c(4,2,4,2)[unclass(cut(primates.cmd[,1], breaks=4))] text(primates.cmd[,1], primates.cmd[,2], row.names(primates.cmd), pos=rtleft) ################################################### ### chunk number 11: check-dist ################################################### d <- dist(primates.cmd) sum((d-primates.dist)^2)/sum(primates.dist^2) ################################################### ### chunk number 12: primates-3d ################################################### primates.cmd <- cmdscale(primates.dist, k=3) cloud(primates.cmd[,3] ~ primates.cmd[,1]*primates.cmd[,2]) d <- dist(primates.cmd) sum((d-primates.dist)^2)/sum(primates.dist^2) ################################################### ### chunk number 13: sammon-primates ################################################### primates.sam <- sammon(primates.dist, k=3) eqscplot(primates.sam$points) rtleft <- c(4,2,4,2)[unclass(cut(primates.sam$points[,1], breaks=4))] text(primates.sam$points[,1], primates.sam$points[,2], row.names(primates.sam$points), pos=rtleft) ################################################### ### chunk number 14: mds-primates ################################################### primates.mds <- isoMDS(primates.dist, primates.cmd, k=3) eqscplot(primates.mds$points) rtleft <- c(4,2,4,2)[unclass(cut(primates.mds$points[,1], breaks=4))] text(primates.mds$points[,1], primates.mds$points[,2], row.names(primates.mds$points), pos=rtleft) ################################################### ### chunk number 15: diptera-data ################################################### ## Diptera diptera <- read.csv("Dipt.csv", comment="",na.strings=c(NA,"")) dim(diptera) ## Now observe that the names all start with "#" species <- as.character(diptera[,1]) table(substring(species, 1, 1)) ## Now strip off the initial "#" species <- substring(species,2) genus <- as.character(diptera[,2]) ## Now store columns 5 to 114 of diptera as a distance object. ## The distance matrix stores, in vector form, the elements of the ## matrix that are below the diagonal. dipdist <- as.dist(diptera[1:110,5:114]) attributes(dipdist)$Labels <- species length(dipdist) ## The length reflects the number of elements in ## the lower triangle, i.e., below the diagonal ################################################### ### chunk number 16: zero-dists ################################################### "zerodists" <- function(distmat=dipdist, omit=TRUE){ cl <- match.call() oldatt <- attributes(distmat) lab <- attributes(distmat)$Labels lowerIJ <- function(n=8, IJ=10){ oneN <- 1:(n-1) prediags <- n*(oneN) - (oneN+2)*(oneN+1)/2 +oneN+1 j <- min(oneN[IJ<=prediags]) i <- IJ-c(0,prediags)[j]+j c(i,j) } if(!"dist"%in%class(distmat)) stop("'distmat' must be of class 'dissimilarity' or 'dist'") dlen <- length(distmat) n <- round((-1+sqrt(1+8*dlen))/2+1) zeroels <- seq(along=distmat)[distmat==0] zeros <- matrix(NA, nrow=length(zeroels), ncol=2) for(i in seq(along=zeroels)) zeros[i,] <- lowerIJ(n, zeroels[i]) omitrowcol <- zeros[,1] if(omit) {omit.els <- numeric(0) for (j in omitrowcol){ j1 <- 1:(j-1) jpat <- n*(j1-1)-j1*(j1+1)/2+j first <- n*(j-1)-j*(j+1)/2+j+1 last <- first+n-j-1 omit.els <- c(omit.els,jpat, first:last) } omit.els <- sort(omit.els) n <- n-dim(zeros)[1] distmat <- distmat[-omit.els] attributes(distmat) <- oldatt attr(distmat, "Size") <- n attr(distmat, "Labels") <- lab[-omitrowcol] attr(distmat, "call") <- match.call() print(c(n*(n-1)/2, length(distmat))) } print(data.frame(Species1=lab[zeros[,1]], zeros[,1], Species2=lab[zeros[,2]], zeros[,2])) invisible(if(omit)distmat else zeros) } udist <- zerodists(omit=TRUE) ################################################### ### chunk number 17: ord-cmd ################################################### dipt.cmd <- cmdscale(udist) genus <- sapply(strsplit(rownames(dipt.cmd), split="_", fixed=TRUE), function(x)x[1]) nam <- names(sort(table(genus), decreasing=TRUE)) genus <- factor(genus, levels=nam) plot(dipt.cmd, col=unclass(genus), pch=16) dipt.sam <- sammon(udist) plot(dipt.sam$points, col=unclass(genus), pch=16) dipt.mds <- isoMDS(udist, dipt.cmd) plot(dipt.mds$points, col=unclass(genus), pch=16) dipt.mds2 <- isoMDS(udist, dipt.sam$points) plot(dipt.mds2$points, col=unclass(genus), pch=16)