``````## Chapter 3: Statistical Models

## Sec 3.1: Statistical Models
## ss 3.1.1: Incorporation of a error or noise component
## Footnote Code
## Plot depression vs weight: data frame roller (DAAG)
library(DAAG)``````
``## Loading required package: lattice``
``````par(pty="s")
plot(depression ~ weight, data = roller, xlim=c(0,1.04*max(weight)),
ylim=c(0,1.04*max(depression)),
xaxs="i", yaxs="i",   # "i"=inner: Fit axes exactly to the limits
xlab = "Weight of roller (t)", ylab = "Depression(mm)", pch = 16)
abline(0, 2.25)            # A slope of 2.25 looks about right ``````

``````par(pty="m")

##                       Predicting with models
##                        Which model is best?
## ss 3.1.2: Fitting models -- the model formula
## Fit line - by default, this fits intercept & slope.
## requires data frame roller (DAAG)
roller.lm <- lm(depression ~ weight, data=roller)
## Compare with the code used to plot the data
par(pty="s")
plot(depression ~ weight, data=roller)
## Add the fitted line to the plot
abline(roller.lm)``````

``````par(pty="m")

## Footnote Code
## For a model that omits the intercept term, specify
lm(depression ~ -1 + weight, data=roller)   ``````
``````##
## Call:
## lm(formula = depression ~ -1 + weight, data = roller)
##
## Coefficients:
## weight
##   2.39``````
``````##             Fitted values, residuals and coefficients
round(fitted(roller.lm), 1) ``````
``````##    1    2    3    4    5    6    7    8    9   10
##  3.0  6.2  6.7 10.7 12.0 14.2 15.0 18.2 24.0 31.0``````
``round(resid(roller.lm), 1) ``
``````##    1    2    3    4    5    6    7    8    9   10
## -1.0 -5.2 -1.7 -5.7  8.0  5.8  8.0 -8.2  6.0 -6.0``````
``coef(roller.lm) ``
``````## (Intercept)      weight
##      -2.087       2.667``````
``````##                           Model objects
names(roller.lm)     # Get names of list elements ``````
``````##  [1] "coefficients"  "residuals"     "effects"       "rank"
##  [5] "fitted.values" "assign"        "qr"            "df.residual"
##  [9] "xlevels"       "call"          "terms"         "model"``````
``roller.lm\$coef ``
``````## (Intercept)      weight
##      -2.087       2.667``````
``````##              Summary information about model objects
summary(roller.lm) ``````
``````##
## Call:
## lm(formula = depression ~ weight, data = roller)
##
## Residuals:
##    Min     1Q Median     3Q    Max
##  -8.18  -5.58  -1.35   5.92   8.02
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    -2.09       4.75   -0.44   0.6723
## weight          2.67       0.70    3.81   0.0052
##
## Residual standard error: 6.74 on 8 degrees of freedom
## Multiple R-squared:  0.644,  Adjusted R-squared:   0.6
## F-statistic: 14.5 on 1 and 8 DF,  p-value: 0.00518``````
``````## Sec 3.2: Distributions: Models for the Random Component
## ss 3.2.1: Discrete distributions -- models for counts
##                       Bernoulli distribution
##                       Binomial distribution
## Footnote Code
## To get the labeling (0, 1, 2) as in the text, specify:
probs <- dbinom(0:2, size=2, prob=0.5)
names(probs) <- 0:2
probs ``````
``````##    0    1    2
## 0.25 0.50 0.25``````
``pbinom(q=2, size=4, prob=0.5) ``
``## [1] 0.6875``
``pbinom(q=4, size=50, prob=0.2) ``
``## [1] 0.0185``
``qbinom(p = 0.70, size = 4, prob = 0.5) ``
``## [1] 3``
``````##                   Means and standard deviations
##                        Poisson distribution
## ss 3.2.2: Continuous distributions
##                        Normal distribution
## Footnote Code
## Plot the normal density, in the range -3.25 to 3.25
z <- pretty(c(-3.25,3.25), 30) # Find ~30 equally spaced points
ht <- dnorm(z)                 # By default: mean=0, standard deviation=1
plot(z, ht, type="l", xlab="Normal deviate", ylab="Ordinate", yaxs="i")
polygon(c(z[z <= 1.0], 1.0), c(dnorm(z[z <= 1.0]), 0), col="grey") ``````

`````` # Around 84.1% of the total area is to the left of the vertical line.

## Plot the normal density, in the range -3 to 3
z <- pretty(c(-3,3), 30)   # Find ~30 equally spaced points
ht <- dnorm(z)             # By default: mean=0, standard deviation=1
plot(z, ht, type="l", xlab="Normal deviate", ylab="Density", yaxs="i") ``````

`````` # yaxs="i" locates the axes at the limits of the data

pnorm(1.0)         # by default, mean=0 and SD=1 ``````
``## [1] 0.8413``
``````## Footnote Code
pnorm(0)             # .5    (exactly half the area is to the left of the mean) ``````
``## [1] 0.5``
``pnorm(-1.96)         # .025 ``
``## [1] 0.025``
``pnorm(1.96)          # .975 ``
``## [1] 0.975``
``pnorm(1.96, mean=2)  # .484  (a normal distribution with mean 2 and SD 1) ``
``## [1] 0.484``
``pnorm(1.96, sd=2)    # .836  (sd = standard deviation) ``
``## [1] 0.8365``
``qnorm(.9)          # 90th percentile; mean=0 and SD=1 ``
``## [1] 1.282``
``````## Footnote Code
qnorm(0.841)         # 1.0 ``````
``## [1] 0.9986``
``qnorm(0.5)           # 0 ``
``## [1] 0``
``qnorm(0.975)         # 1.96 ``
``## [1] 1.96``
``qnorm(c(.1,.2,.3))   # -1.282 -0.842 -0.524  (10th, 20th and 30th percentiles) ``
``## [1] -1.2816 -0.8416 -0.5244``
``qnorm(.1, mean=100, sd=10)  # 87.2 (10th percentile, mean=100, SD=10) ``
``## [1] 87.18``
``````##                   Other continuous distributions
##              Different ways to describe distributions

## Sec 3.3: Simulation of Random Numbers and Random Samples
set.seed(23286)  # Use to reproduce the sample below
rbinom(10, size=1, p=.5) ``````
``##  [1] 0 0 0 0 1 0 1 0 1 1``
``````##                Sampling from discrete distributions
rbinom(10, size=1, p=.5)   # 10 Bernoulli trials, prob=0.5 ``````
``##  [1] 1 1 1 0 0 1 1 1 1 0``
``rbinom(25, size=4, prob=0.5)   ``
``##  [1] 3 2 1 2 1 1 2 1 4 2 2 2 1 2 3 2 4 2 2 3 2 2 2 2 3``
``````set.seed(9388)
rpois(20, 3) ``````
``##  [1] 3 3 4 1 2 2 3 1 1 4 3 0 1 1 3 1 0 4 5 2``
``````## ss 3.3.1: Sampling from the normal and other continuous distributions
options(digits=2)  # Suggest number of digits to display
rnorm(10)          # 10 random values from the normal distribution ``````
``##  [1] -1.01  0.59 -0.74 -0.85 -0.42  1.87  2.18  0.01  0.64  1.92``
``````## Footnote Code
## The following gives a rough equivalent of the figure:
set.seed (21)        # Use to reproduce the data in the figure
par(mfrow=c(2,3))
x <- pretty(c(6.5,13.5), 40)
for(i in 1:5){
y <- rnorm(50, mean=10, sd=1)
hist(y, prob=TRUE, xlim=c(6.5,13.5), ylim=c(0,0.5), main="")
lines(x, dnorm(x,10,1))
}
par(mfrow=c(1,1)) ``````

``runif(n = 20, min=0, max=1) # 20 numbers, uniform distn on (0, 1) ``
``````##  [1] 0.16 0.33 0.61 0.34 0.95 0.24 0.93 0.49 0.59 0.38 0.21 0.85 0.53 0.46
## [15] 0.95 0.47 0.12 0.72 0.97 0.53``````
``rexp(n=10, rate=3)          # 10 numbers, exponential, mean 1/3. ``
``##  [1] 0.440 0.153 0.094 0.054 0.324 0.024 0.045 0.311 0.576 0.149``
``````## Exercises at the end of this chapter explore further possibilities.

## ss 3.3.2: Simulation of regression data
options(digits=3)
n <- 8;  x <- seq(1,n); sigma <- 2.5; b0 <- 2;  b1 <- 3
error <- rnorm(n, sd=sigma)
y <- b0 + b1*x + error

t(data.frame(x,y)) ``````
``````##   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## x 1.00  2.0  3.0  4.0  5.0  6.0  7.0  8.0
## y 8.14  5.3 11.3 14.3 21.2 19.1 22.4 23.1``````
``````roller.lm <- lm(depression ~ weight, data=roller)
roller.sim <- simulate(roller.lm, nsim=20)  # 20 simulations

matplot(roller\$weight, roller.sim, pch=1, ylim=range(roller\$depression))
points(roller, pch=16) ``````

``````## ss 3.3.3: Simulation of the sampling distribution of the mean
## Function to generate n sample values; skew population
sampvals <- function(n)exp(rnorm(n, mean = 0.5, sd = 0.3))
## Means across rows of a dimension nsamp x sampsize matrix of
## sample values gives nsamp means of samples of size sampsize.
samplingDist <- function(sampsize=3, nsamp=1000, FUN=mean)
apply(matrix(sampvals(sampsize*nsamp), ncol=sampsize), 1, FUN)
size <- c(3,10,30)
## Simulate means of samples of 3, 9 and 30; place in dataframe
df <- data.frame(y3=samplingDist(sampsize=size[1]),
y9=samplingDist(sampsize=size[2]),
y30=samplingDist(sampsize=size[3]))

## Simulate source population (sampsize=1)
y <- samplingDist(sampsize=1)
densityplot(~y3+y9+y30, data = df, outer=TRUE, layout = c(3,1),
plot.points = FALSE, panel = function(x, ...) {
panel.densityplot(x, ..., col = "black")
panel.densityplot(y, col = "gray40", lty = 2, ...)
})                                                      ``````