``````## Chapter 4: A Review of Inference Concepts

## Sec 4.1: Basic Concepts of Estimation
## ss 4.1.1: Population parameters and sample statistics
## ss 4.1.2: Sampling distributions
##               Reliance on the central limit theorem
## ss 4.1.3: Assessing accuracy -- the standard error
## Footnote Code
## Calculate heated-ambient; take heated & ambient from columns of pair65
library(DAAG)``````
``## Loading required package: lattice``
``````test <- with(pair65, heated-ambient)
c(mean = mean(test), SD = sd(test), SEM = sd(test)/sqrt(length(test))) ``````
``````##  mean    SD   SEM
## 6.333 6.103 2.034``````
``````## ss 4.1.4: The standard error for the difference of means
## Footnote Code
## Heated vs ambient; unpaired elastic band data
heated <- c(254, 252, 239, 240, 250, 256, 267, 249, 259, 269)
ambient <- c(233, 252, 237, 246, 255, 244, 248, 242, 217, 257, 254)
v1 <- var(heated)           # 10 numbers; 10-1 = 9 d.f.
v2 <- var(ambient)          # 11 numbers; 11-1 = 10 d.f.
v <- (9*v1 + 10*v2)/(9+10)  # Pooled estimate of variance
# Estimate SED; variances may not be equal
c(sem1 = sqrt(v1/10), sem2 = sqrt(v2/11), sed = sqrt(v1/10 + v2/11)) ``````
``````##  sem1  sem2   sed
## 3.138 3.538 4.729``````
``````# Estimate SED; use pooled estimate
c(sd = sqrt(v), sed = sqrt(v1/10 + v2/11)) ``````
``````##     sd    sed
## 10.915  4.729``````
``````## ss 4.1.5: \kern-4pt* The standard error of the median
## Footnote Code
## median and SD for length, by species: data frame cuckoos (DAAG)
wren <- split(cuckoos\$length, cuckoos\$species)\$wren
median(wren) ``````
``## [1] 21``
``````n <- length(wren)
sqrt(pi/2)*sd(wren)/sqrt(n)  # this SE computation assumes normality ``````
``## [1] 0.2441``
``````## ss 4.1.6: The sampling distribution of the \$t\$ statistic

## Sec 4.2: Confidence Intervals and Tests of Hypotheses
##               Calculations with the \$t\$-distribution
#  Plus or minus 1.96SE normal distribution limits, e.g.
pnorm(1.96) - pnorm(-1.96) ``````
``## [1] 0.95``
``````#  Plus or minus 2.31SE t distribution (8 df) limits, e.g.
pt(2.31, 8) - pt(-2.31,8)  # 2.31 SEs either side ``````
``## [1] 0.9503``
``qnorm(0.975)          # normal distribution ``
``## [1] 1.96``
``qt(0.975, 8)          # t-distribution with 8 d.f. ``
``## [1] 2.306``
``````##                Confidence intervals of 95\% or 99\%
## Footnote Code
## Detailed calculations; 95% CI for mean of heated-ambient
pair65.diff <- with(pair65, heated-ambient)
pair65.n <- length(pair65.diff)
pair65.se <- sd(pair65.diff)/sqrt(pair65.n)
mean(pair65.diff) + qt(c(.025,.975),8)*pair65.se  ``````
``## [1]  1.642 11.025``
``````## 95% CI for mean of heated-ambient: data frame pair65 (DAAG)
with(pair65, t.test(heated, ambient, paired=TRUE,
conf.level=0.95)\$conf.int) ``````
``````## [1]  1.642 11.025
## attr(,"conf.level")
## [1] 0.95``````
``````##                        Tests of hypotheses
1-pt(6.33/2.03, 8)   # Equals pt(-6.33/2.03, 8) ``````
``## [1] 0.007133``
``````##                     What is a small \$p\$-value?
##          \$t\$-distribution versus the normal distribution
##            How good is the normal theory approximation?
## ss 4.2.1: A summary of one- and two-sample calculations
## Footnote Code
## t-test and confidence interval calculations
heated <- c(254, 252, 239, 240, 250, 256, 267, 249, 259, 269)
ambient <- c(233, 252, 237, 246, 255, 244, 248, 242, 217, 257, 254)
t.test(heated, ambient, var.equal=TRUE) ``````
``````##
##  Two Sample t-test
##
## data:  heated and ambient
## t = 1.973, df = 19, p-value = 0.06322
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5723 19.3905
## sample estimates:
## mean of x mean of y
##     253.5     244.1``````
``````##                      When is pairing helpful?
## Footnote Code
## heated vs ambient: pair65 (DAAG); and cross vs self: mignonette (DAAG)
par(mfrow=c(1,2), pty="s")
plot(heated ~ ambient, data=pair65); abline(0, 1)    # left panel
with(pair65, abline(mean(heated-ambient), 1, lty=2))
plot(cross ~ self, data=mignonette); abline(0, 1)    # right panel
with(mignonette, abline(mean(cross-self), 1, lty=2)) ``````

``````par(mfrow = c(1,1), pty="m")

##            What if the standard deviations are unequal?
##                  Different ways to report results
## Footnote Code
## Different ways to report results: calculations
pair65.diff <- with(pair65, heated-ambient)
n <- length(pair65.diff)
av <- mean(pair65.diff); sd <- sqrt(var(pair65.diff)); se <- sd/sqrt(n)
print(c(mean=av, SED=se, "mean/SED"=av/se))   # Items 1 and 2 ``````
``````##     mean      SED mean/SED
##    6.333    2.034    3.113``````
``t.test(pair65.diff)                           # Items 3 and 4 ``
``````##
##  One Sample t-test
##
## data:  pair65.diff
## t = 3.113, df = 8, p-value = 0.01438
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##   1.642 11.025
## sample estimates:
## mean of x
##     6.333``````
``````## ss 4.2.2: Confidence intervals and tests for proportions
## ss 4.2.3: Confidence intervals for the correlation
## ss 4.2.4: Confidence intervals versus hypothesis tests
##                Guidelines for significance testing

## Sec 4.3: Contingency Tables
## Footnote Code
## Compare number with a high school qualification, between `untreated' rows
## from data frame psid3 and `treated' rows from nswdemo
library(DAAG)   # Data are from DAAG
nswpsid3 <- rbind(psid3, subset(nswdemo, trt==1))
table(nswpsid3\$trt, nswpsid3\$nodeg) ``````
``````##
##       0   1
##   0  63  65
##   1  80 217``````
``````  # PSID3 males are coded 0; NSW male trainees are coded 1.

# To agree with hand calculation below, specify correct=FALSE
chisq.test(with(nswpsid3, table(trt, nodeg)), correct=FALSE) ``````
``````##
##  Pearson's Chi-squared test
##
## data:  with(nswpsid3, table(trt, nodeg))
## X-squared = 19.89, df = 1, p-value = 8.189e-06``````
``````##               The mechanics of the chi-squared test
##        An example where a chi-squared test may not be valid
## Footnote Code
## Engine man data
engineman <- matrix(c(5,3,17,85), 2,2)
chisq.test(engineman) ``````
``## Warning: Chi-squared approximation may be incorrect``
``````##
##  Pearson's Chi-squared test with Yates' continuity correction
##
## data:  engineman
## X-squared = 7.086, df = 1, p-value = 0.00777``````
``````## ss 4.3.1: Rare and endangered plant species
## Footnote Code
## Enter the data thus:
rareplants <- matrix(c(37,190,94,23,59,23,10,141,28,15,58,16), ncol=3,
byrow=TRUE, dimnames=list(c("CC","CR","RC","RR"), c("D","W","WD")))

(x2 <- chisq.test(rareplants)) ``````
``````##
##  Pearson's Chi-squared test
##
## data:  rareplants
## X-squared = 34.99, df = 6, p-value = 4.336e-06``````
``````## Footnote Code
## Expected number of species, by habitat (calculate x2 as above)
x2E <- stack(data.frame(t(x2\$expected)))
habitat <- rep(c(1,2,3), 4)
plot(x2E\$values ~ habitat, axes=FALSE, xlim=c(0.5, 3.5), pch=16,
xlab="habitat", ylab="Expected Number of Species")
text(x2E\$values ~ habitat, labels=x2E\$ind, pos=rep(c(4,4,2,2),3))
axis(1, at=seq(1,3), labels=c("D", "W", "WD"))
axis(2); box() ``````

``````##  Examination of departures from a consistent overall row pattern
x2 <- chisq.test(rareplants)
## Standardized residuals
residuals(x2) ``````
``````##          D        W      WD
## CC -0.3693 -1.19598  2.2634
## CR  2.8275 -1.06657 -0.2753
## RC -2.5466  2.36753 -2.0990
## RR  1.2416  0.07224 -1.0227``````
``x2\$expected ``
``````##        D      W    WD
## CC 39.32 207.22 74.47
## CR 12.86  67.78 24.36
## RC 21.92 115.55 41.53
## RR 10.90  57.45 20.65``````
``````## ss 4.3.2: Additional notes
##                       Interpretation issues
##                        Modeling approaches

## Sec 4.4: One-Way Unstructured Comparisons
tomato <-
data.frame(weight=
c(1.5, 1.9, 1.3, 1.5, 2.4, 1.5,   # water
1.5, 1.2, 1.2, 2.1, 2.9, 1.6,   # Nutrient
1.9, 1.6, 0.8, 1.15, 0.9, 1.6), # Nutrient+24D
trt = rep(c("water", "Nutrient", "Nutrient+24D"),
c(6, 6, 6)))
## Make water the first level of trt.  It will then appear as
## the initial level in the graphs. In aov or lm calculations,
## it will be the baseline or reference level.
tomato\$trt <- relevel(tomato\$trt, ref="water")

stripplot(trt~weight, aspect=0.6, data=tomato) ``````

``````##                The tomato data -- a visual summary
## Do analysis of variance calculations
tomato.aov <- aov(weight ~ trt, data=tomato)
## Summarize results graphically
oneway.plot(obj=tomato.aov) ``````

``## [1] 1 1 1 1``
``````##                   The analysis of variance table
anova(tomato.aov) ``````
``````## Analysis of Variance Table
##
## Response: weight
##           Df Sum Sq Mean Sq F value Pr(>F)
## trt        2   0.63   0.314     1.2   0.33
## Residuals 15   3.91   0.261``````
``````## ss 4.4.1: Multiple comparisons
##              *Microarray data -- severe multiplicity
## ss 4.4.2: Data with a two-way structure, i.e.\ two factors
## Footnote Code
## Simplified version of code
library(lattice)
## Panel A
dotplot(trt ~ ShootDryMass, data=rice, aspect=1,
panel=function(x,y,...){panel.dotplot(x, y, pch=1, col="gray40")
panel.average(x, y, type="p", col="black",
pch=3, cex=1.25)},
xlab="Shoot dry mass (g)") ``````

``````## Panel B
with(rice, interaction.plot(fert, variety, ShootDryMass,
xlab="Level of first factor")) ``````

``````## ss 4.4.3: Presentation issues

## Sec 4.5: Response Curves
## Footnote Code
## Data frame modelcars (DAAG)
plot(distance.traveled ~ starting.point, data=modelcars,
xlim=c(0,12.5), xaxs="i", xlab = "Distance up ramp (cm)",
ylab="Distance traveled (cm)")   ``````

``````## Sec 4.6: Data with a Nested Variation Structure
## ss 4.6.1: Degrees of freedom considerations
## ss 4.6.2: General multi-way analysis of variance designs

## Sec 4.7: Resampling Methods for Standard Errors, Tests and Confidence Intervals
## ss 4.7.1: The one-sample permutation test
## ss 4.7.2: The two-sample permutation test
## Footnote Code
## Draw one curve only; permutation distribution of difference in means
x1 <- two65\$ambient; x2 <- two65\$heated; x <- c(x1, x2)
n1 <- length(x1); n2 <- length(x2); n <- n1+n2
dbar <- mean(x2) - mean(x1)
z <- numeric(2000)
for(i in 1:2000){
mn <- sample(n, n2, replace=FALSE)
dbardash <- mean(x[mn]) - mean(x[-mn])
z[i]<- dbardash
}
plot(density(z), yaxs="i")
abline(v = dbar)
abline(v = -dbar, lty=2) ``````