################################################### ### chunk number 1: ################################################### options(SweaveHooks=list(fig=function(){par(cex.main=1.1, mar=c(4.1,4.1,2.6,2.1), mgp=c(2.25,0.5,0), tck=-0.02)})) ## graphics.off() x11(width=8, height=4.25) x11(width=8, height=8) ################################################### ### chunk number 2: au-cmdscale ################################################### library(DAAGxtras) aupoints <- cmdscale(audists) plot(aupoints) text(aupoints, labels=paste(rownames(aupoints))) ################################################### ### chunk number 3: au-cmd-res ################################################### origDists <- as.matrix(audists) audistfits <- as.matrix(dist(aupoints)) misfit <- audistfits-origDists for (j in 1:9)for (i in (j+1):10){ lines(aupoints[c(i,j), 1], aupoints[c(i,j), 2], col="gray") midx <- mean(aupoints[c(i,j), 1]) midy <- mean(aupoints[c(i,j), 2]) text(midx, midy, paste(round(misfit[i,j]))) } colnames(misfit) <- abbreviate(colnames(misfit), 6) print(round(misfit)) ################################################### ### chunk number 4: au-overlay ################################################### if(!exists("aulatlong"))load("aulatlong.RData") library(oz) oz() points(aulatlong, col="red", pch=16, cex=1.5) comparePhysical <- function(lat=aulatlong$latitude, long=aulatlong$longitude, x1=aupoints[,1], x2 = aupoints[,2]){ ## Get best fit in space of (latitude, longitude) fitlat <- predict(lm(lat ~ x1+x2)) fitlong <- predict(lm(long ~ x1+x2)) x <- as.vector(rbind(lat, fitlat, rep(NA,10))) y <- as.vector(rbind(long, fitlong, rep(NA,10))) lines(x, y, col=3, lwd=2) } comparePhysical() ################################################### ### chunk number 5: au-sammon ################################################### library(MASS) aupoints.sam <- sammon(audists) oz() points(aulatlong, col="red", pch=16, cex=1.5) comparePhysical(x1=aupoints.sam$points[,1], x2 = aupoints.sam$points[,2]) ################################################### ### chunk number 6: au-mds ################################################### oz() points(aulatlong, col="red", pch=16, cex=1.5) aupoints.mds <- isoMDS(audists, as.matrix(aulatlong)) comparePhysical(x1=aupoints.mds$points[,1], x2 = aupoints.mds$points[,2]) ################################################### ### chunk number 7: diabetes-distances ################################################### library(cluster) library(mclust) data(diabetes) diadist <- daisy(diabetes[, -1], stand=TRUE) ## Examine distribution of distances plot(density(diadist, from=0)) ################################################### ### chunk number 8: read-primates ################################################### library(ape) webpage <- "http://evolution.genetics.washington.edu/book/primates.dna" test <- try(readLines(webpage)[1]) if (!inherits(test, "try-error")){ primates.dna <- read.dna(con <- url(webpage)) close(con) hasdata <- TRUE } else hasdata <- FALSE ################################################### ### chunk number 9: primates-dist ################################################### if(hasdata) primates.dist <- dist.dna(primates.dna, model="F84") ################################################### ### chunk number 10: primates-2d ################################################### if(hasdata){ primates.cmd <- cmdscale(primates.dist) eqscplot(primates.cmd) rtleft <- c(4,2,4,2)[unclass(cut(primates.cmd[,1], breaks=4))] text(primates.cmd[,1], primates.cmd[,2], row.names(primates.cmd), pos=rtleft) } ################################################### ### chunk number 11: check-dist ################################################### if(hasdata){ d <- dist(primates.cmd) sum((d-primates.dist)^2)/sum(primates.dist^2) } ################################################### ### chunk number 12: primates-3d ################################################### if(hasdata){ primates.cmd <- cmdscale(primates.dist, k=3) cloud(primates.cmd[,3] ~ primates.cmd[,1]*primates.cmd[,2]) d <- dist(primates.cmd) sum((d-primates.dist)^2)/sum(primates.dist^2) } ################################################### ### chunk number 13: sammon-primates ################################################### if(hasdata){ primates.sam <- sammon(primates.dist, k=3) eqscplot(primates.sam$points) rtleft <- c(4,2,4,2)[unclass(cut(primates.sam$points[,1], breaks=4))] text(primates.sam$points[,1], primates.sam$points[,2], row.names(primates.sam$points), pos=rtleft) } ################################################### ### chunk number 14: mds-primates ################################################### if(hasdata){ primates.mds <- isoMDS(primates.dist, primates.cmd, k=3) eqscplot(primates.mds$points) rtleft <- c(4,2,4,2)[unclass(cut(primates.mds$points[,1], breaks=4))] text(primates.mds$points[,1], primates.mds$points[,2], row.names(primates.mds$points), pos=rtleft) } ################################################### ### chunk number 15: diptera-data ################################################### ## Diptera webpage <- "http://www.maths.anu.edu.au/~johnm/datasets/ordination/dipt.csv" test <- try(readLines(webpage)[1]) if (!inherits(test, "try-error")){ diptera <- read.csv(webpage, comment="", na.strings=c(NA,"")) dim(diptera) ## Now observe that the names all start with "#" species <- as.character(diptera[,1]) table(substring(species, 1, 1)) ## Now strip off the initial "#" species <- substring(species,2) genus <- as.character(diptera[,2]) ## Now store columns 5 to 114 of diptera as a distance object. ## The distance matrix stores, in vector form, the elements of the ## matrix that are below the diagonal. dipdist <- as.dist(diptera[1:110,5:114]) attributes(dipdist)$Labels <- species length(dipdist) ## The length reflects the number of elements in ## the lower triangle, i.e., below the diagonal hasdata <- TRUE } else hasdata <- FALSE ################################################### ### chunk number 16: zero-dists ################################################### if(hasdata) dipdist[dipdist==0] <- 0.5*min(dipdist[dipdist>0]) ################################################### ### chunk number 17: ord-cmd eval=FALSE ################################################### ## if(hasdata){ ## dipt.cmd <- cmdscale(dipdist) ## genus <- sapply(strsplit(rownames(dipt.cmd), split="_", fixed=TRUE), ## function(x)x[1]) ## nam <- names(sort(table(genus), decreasing=TRUE)) ## genus <- factor(genus, levels=nam) ## plot(dipt.cmd, col=unclass(genus), pch=16) ## dipt.sam <- sammon(dipdist) ## plot(dipt.sam$points, col=unclass(genus), pch=16) ## dipt.mds <- isoMDS(dipdist, dipt.cmd) ## plot(dipt.mds$points, col=unclass(genus), pch=16) ## dipt.mds2 <- isoMDS(dipdist, dipt.sam$points) ## plot(dipt.mds2$points, col=unclass(genus), pch=16) ## } ################################################### ### chunk number 18: pacific-dist ################################################### library(DAAGxtras) pacific.dist <- dist(x = as.matrix(rockArt[-c(47, 54, 60, 63, 92), 28:641]), method = "binary") sum(pacific.dist==1)/length(pacific.dist) plot(density(pacific.dist, to = 1)) ## Now check that all columns have some distances that are less than 1 symmat <- as.matrix(pacific.dist) table(apply(symmat, 2, function(x) sum(x==1))) ################################################### ### chunk number 19: pacific-cmd ################################################### pacific.cmd <- cmdscale(pacific.dist) eqscplot(pacific.cmd) # pacific.sam <- sammon(pacific.dist) # eqscplot(pacific.sam)