## Chapter 13: Regression on Principal Component or Discriminant Scores 

## The following version of overlapDensity() is needed, which should
## be in DAAG_1.21

overlapDensity <-
function (x0, x1, ratio = c(0.05, 20), ratio.number = FALSE, 
          plotvalues = c("Density", "Numbers"), gpnames = c("Control", "Treatment"), 
          cutoffs = c(lower = TRUE, upper = TRUE), bw = FALSE, 
          xlab = "Score", ylab = NULL, 
          col = 1:2, lty = 1:2, lwd = c(1, 1)) 
{
    if (is.null(plotvalues)) 
        plotvalues <- ""
    if (length(plotvalues) > 1) 
        plotvalues <- plotvalues[1]
    if (plotvalues %in% c("Numbers", "Density")) 
        plotit <- TRUE else plotit <- FALSE
    ran <- range(c(x0, x1))
    if (all(cutoffs)) {
        d0 <- density(x0, from = ran[1], to = ran[2])
        d1 <- density(x1, from = ran[1], to = ran[2])
    }
    else if (cutoffs[1]) {
        d0 <- density(x0, from = ran[1])
        d1 <- density(x1, from = ran[1])
    }
    else if (cutoffs[2]) {
        d0 <- density(x0, to = ran[2])
        d1 <- density(x1, to = ran[2])
    }
    else {
        d0 <- density(x0)
        d1 <- density(x1)
    }
    n0 <- length(x0)
    n1 <- length(x1)
    f0 <- d0$y
    f1 <- d1$y
    if (plotvalues %in% "Number") {
        pf0 <- d0$y * n0
        pf1 <- d1$y * n1
    }
    else {
        pf0 <- d0$y
        pf1 <- d1$y
    }
    if (plotit) {
        xlim <- range(c(d0$x, d1$x), na.rm = TRUE)
        if (plotvalues == "Number" & is.null(ylab)) 
            ylab <- "Density x total frequency"
        else if (plotvalues == "Density" & is.null(ylab)) 
            ylab <- "Density"
        ylim <- range(c(0, pf0, pf1))
        ylim[2] <- ylim[2] + 0.1 * diff(ylim)
        plot(d1$x, pf1, xlim = xlim, xlab = xlab, xaxt = "n", 
            bty = "n", yaxs = "i", ylim = ylim, ylab = ylab, 
            main = "", type = "n")
        axis(1)
        box(bty = "L")
        lines(d0$x, pf0, col = col[1], lty = lty[1], lwd = lwd[1])
        if (bw & lty[2] > 1) 
            lines(d1$x, pf1, col = col[2], lty = 1)
        lines(d1$x, f1, col = col[2], lty = lty[2], lwd = lwd[2])
        xpos <- par()$usr[2]
        ypos <- par()$usr[4]
        legend(xpos, ypos, lty = lty, lwd = lwd, col = col, cex = 0.8, 
            legend = gpnames, bty = "n", xjust = 1)
    }
    if (is.null(ratio)) 
        return()
    d0 <- density(x0, from = xlim[1], to = xlim[2])
    d1 <- density(x1, from = xlim[1], to = xlim[2])
    x01 <- d0$x
    f0 <- d0$y
    f1 <- d1$y
    if (ratio.number) {
        cf0 <- f0 * n0
        cf1 <- f1 * n1
    }
    else {
        cf0 <- f0
        cf1 <- f1
    }
    cf0[cf0 < 0] <- 0
    cf1[cf1 < 0] <- 0
    fmin <- pmin(cf0, cf1)
    fmax <- max(fmin)
    subs <- match(fmax, fmin)
    xmid <- x01[subs]
    flow <- ratio[1]
    fhi <- ratio[2]
    lochoose <- x01 < xmid & (cf0 <= flow * cf1 | cf1 <= cf0 * 
        flow)
    if (any(lochoose)) 
        xlim[1] <- max(x01[lochoose])
    else xlim[1] <- min(x01)
    hichoose <- x01 > xmid & (cf0 >= fhi * cf1 | cf1 >= cf0 * 
        fhi)
    if (any(hichoose)) 
        xlim[2] <- min(x01[hichoose])
    else xlim[2] <- max(x01)
    if (plotit) {
        if(ratio.number)midlab <- "ratio: number" else midlab <- "ratio: p.d."
        axis(3, at = xlim, labels = paste(signif(xlim, 4)), mgp = c(3, 
            0.5, 0), col = "gray45", line = 1, cex.axis = 0.8)
        xlim3 <- c(xlim[1], mean(xlim), xlim[2])
        axis(3, at = xlim3, line = 1, labels = c(paste("1:", round(1/ratio[1]), 
            sep = ""), midlab, paste(round(ratio[2]), ":1", sep = "")), 
            mgp = c(-3, -1, 0), col = "gray", cex.axis = 0.7, 
            lty = 0)
    }
    xlim
}

## Sec 13.1: Principal Component Scores in Regression 
## Principal components: data frame socsupport (DAAG) 
library(DAAG)
## Loading required package: lattice
## 
## Attaching package: 'DAAG'
## 
## The following object is masked _by_ '.GlobalEnv':
## 
##     overlapDensity
ss.pr1 <- princomp(as.matrix(na.omit(socsupport[, 9:19])), cor=TRUE) 
pairs(ss.pr1$scores[, 1:3]) 

plot of chunk unnamed-chunk-2

sort(-ss.pr1$scores[,1], decr=TRUE)[1:10]   # Note the outlier 
##    25    47    48    58    13    34    53    61    23    87 
## 4.062 3.796 3.603 3.536 3.417 3.332 3.259 3.165 3.026 3.018
## Alternative to pairs(), using the lattice function splom() 
splom(~ss.pr1$scores[, 1:3]) 

plot of chunk unnamed-chunk-3

not.na <- complete.cases(socsupport[, 9:19]) 
not.na[36] <- FALSE 
ss.pr <- princomp(as.matrix(socsupport[not.na, 9:19]), cor=TRUE) 

summary(ss.pr)     # Examine the contributions of the components 
## Importance of components:
##                        Comp.1 Comp.2 Comp.3  Comp.4  Comp.5  Comp.6
## Standard deviation     2.3937 1.2190 1.1366 0.84477 0.75447 0.69536
## Proportion of Variance 0.5209 0.1351 0.1174 0.06488 0.05175 0.04396
## Cumulative Proportion  0.5209 0.6560 0.7734 0.83833 0.89007 0.93403
##                         Comp.7  Comp.8  Comp.9  Comp.10  Comp.11
## Standard deviation     0.49726 0.45610 0.35952 0.295553 0.231892
## Proportion of Variance 0.02248 0.01891 0.01175 0.007941 0.004889
## Cumulative Proportion  0.95651 0.97542 0.98717 0.995111 1.000000
ss.lm <- lm(BDI[not.na] ~ ss.pr$scores[, 1:6], data=socsupport) 
summary(ss.lm)$coef 
##                           Estimate Std. Error t value  Pr(>|t|)
## (Intercept)                10.4607     0.8934 11.7088 3.489e-19
## ss.pr$scores[, 1:6]Comp.1   1.3113     0.3732  3.5134 7.229e-04
## ss.pr$scores[, 1:6]Comp.2   0.3959     0.7329  0.5402 5.905e-01
## ss.pr$scores[, 1:6]Comp.3   0.6036     0.7860  0.7679 4.447e-01
## ss.pr$scores[, 1:6]Comp.4   1.4248     1.0576  1.3473 1.816e-01
## ss.pr$scores[, 1:6]Comp.5   2.1459     1.1841  1.8122 7.362e-02
## ss.pr$scores[, 1:6]Comp.6   1.2882     1.2848  1.0027 3.190e-01
ss.pr$loadings[, 1] 
##    emotional emotionalsat     tangible  tangiblesat       affect 
##      -0.3201      -0.2979      -0.2467      -0.2887      -0.3069 
##    affectsat          psi       psisat     esupport     psupport 
##      -0.2883      -0.3628      -0.3322      -0.2886      -0.2846 
##   supsources 
##      -0.2846
## Footnote Code
## Plot first principal component score against BDI 
attach(socsupport) 
plot(BDI[not.na] ~ ss.pr$scores[ ,1], col=as.numeric(gender[not.na]),  
     pch=as.numeric(gender[not.na]), xlab ="1st principal component",  
     ylab="BDI") 
topleft <- par()$usr[c(1,4)] 
legend(topleft[1], topleft[2], col=1:2, pch=1:2, legend=levels(gender))