This script reproduces all of the analysis and graphs for the MANOVA of the Wine
data in the paper and also includes other analyses not described there. It is set up as an R script that can be “compiled” to HTML, Word, or PDF using knitr::knit()
. This is most convenient within R Studio via the File -> Compile Notebook
option.
library(heplots)
library(car)
library(candisc)
data(Wine, package="candisc")
Wine.mod <- lm(as.matrix(Wine[, -1]) ~ Cultivar, data=Wine)
Anova(Wine.mod)
##
## Type II MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## Cultivar 2 1.71 73.2 26 328 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
heplot(Wine.mod, fill=TRUE, fill.alpha=.1)
#pairs(Wine.mod, fill=TRUE, fill.alpha=.1)
pairs(Wine.mod, fill=TRUE, fill.alpha=.1, variables=1:5)
There is no simple way to get a compact summary of the univariate F-tests from a MLM. This code extracts the result of summary()
and makes them into a data frame.
ss <- summary(Wine.mod)
UniStats <- as.data.frame(matrix(0, nrow=length(ss), 5))
for (i in 1:length(ss)) {
UniStats[i,1] <- ss[[i]]$r.squared
f <- ss[[i]]$fstatistic
UniStats[i,2:4] <- f
UniStats[i,5] <- pf(f[1], f[2], f[3], lower.tail=FALSE)
}
rownames(UniStats) <- sub("Response ", "", names(ss))
UniStats$stars <- c(gtools:::stars.pval(UniStats[,5]))
UniStats[,5] <- format.pval(UniStats[,5], eps=0.001)
colnames(UniStats) <- c("R^2", "F", "df1", "df2", "Pr (>F)", "")
UniStats
## R^2 F df1 df2 Pr (>F)
## Alcohol 0.6069 135.08 2 175 <0.001 ***
## MalicAcid 0.2969 36.94 2 175 <0.001 ***
## Ash 0.1321 13.31 2 175 <0.001 ***
## AlcAsh 0.2902 35.77 2 175 <0.001 ***
## Mg 0.1244 12.43 2 175 <0.001 ***
## Phenols 0.5172 93.73 2 175 <0.001 ***
## Flav 0.7278 233.93 2 175 <0.001 ***
## NonFlavPhenols 0.2396 27.58 2 175 <0.001 ***
## Proa 0.2570 30.27 2 175 <0.001 ***
## Color 0.5797 120.66 2 175 <0.001 ***
## Hue 0.5366 101.32 2 175 <0.001 ***
## OD 0.6847 189.97 2 175 <0.001 ***
## Proline 0.7038 207.92 2 175 <0.001 ***
Wine.can <- candisc(Wine.mod)
Wine.can
##
## Canonical Discriminant Analysis for Cultivar:
##
## CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.901 9.08 4.95 68.7 68.7
## 2 0.805 4.13 4.95 31.3 100.0
##
## Test of H0: The canonical correlations in the
## current row and all that follow are zero
##
## LR test stat approx F num Df den Df Pr(> F)
## 1 0.0193 539 4 348 <2e-16 ***
## 2 0.1950 722 1 175 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(Wine.can, ellipse=TRUE, var.lwd=2)
heplot(Wine.can, var.lwd=2)
## Vector scale factor set to 20.73
# need to use extremely small p-value to preserve resolution
heplot(Wine.can, var.lwd=2, alpha=0.00001, fill=TRUE, fill.alpha=.1, var.col="black")
## Vector scale factor set to 10.32
The HE pairs plot is confusing with so many responses, but can be simplified by rearranging the variables in the order derived from the canonical discriminant plot.
struc <- Wine.can$structure
angles <- ifelse( struc[,1] > 0,
atan(struc[,2]/struc[,1]),
atan(struc[,2]/struc[,1]) + pi)
#angles[order(angles)]
colnames(Wine[,-1])[order(angles)]
## [1] "AlcAsh" "NonFlavPhenols" "MalicAcid" "Color"
## [5] "Ash" "Alcohol" "Mg" "Proline"
## [9] "Phenols" "Flav" "Proa" "OD"
## [13] "Hue"
This computation is now done by the function varOrder()
in the candisc package.
(ord <- varOrder(Wine.mod))
## [1] 4 8 2 10 3 1 5 13 6 7 9 12 11
varOrder(Wine.mod, names=TRUE)
## [1] "AlcAsh" "NonFlavPhenols" "MalicAcid" "Color"
## [5] "Ash" "Alcohol" "Mg" "Proline"
## [9] "Phenols" "Flav" "Proa" "OD"
## [13] "Hue"
ord <- varOrder(Wine.mod)
pairs(Wine.mod, variables=ord, fill=TRUE, fill.alpha=.1, var.cex=1.5)
wine.boxm <- boxM(Wine.mod)
wine.boxm
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: Y
## Chi-Sq (approx.) = 680, df = 180, p-value <2e-16
More details are given by the summary
method
summary(wine.boxm)
## Summary for Box's M-test of Equality of Covariance Matrices
##
## Chi-Sq: 684.2
## df: 182
## p-value: <2e-16
##
## log of Covariance determinants:
## barolo grignolino barbera pooled
## -10.902 -2.443 -11.055 -3.189
##
## Eigenvalues:
## barolo grignolino barbera pooled
## 1 4.907e+04 2.479e+04 1.325e+04 2.972e+04
## 2 1.083e+02 2.101e+02 1.146e+02 1.731e+02
## 3 6.199e+00 1.143e+01 6.052e+00 7.951e+00
## 4 9.330e-01 1.130e+00 4.307e+00 2.258e+00
## 5 4.120e-01 9.590e-01 1.092e+00 8.197e-01
## 6 1.817e-01 6.396e-01 2.362e-01 4.260e-01
## 7 1.118e-01 2.543e-01 1.279e-01 2.295e-01
## 8 9.973e-02 1.815e-01 8.230e-02 1.451e-01
## 9 7.915e-02 1.156e-01 3.305e-02 1.117e-01
## 10 3.052e-02 5.565e-02 2.380e-02 6.357e-02
## 11 2.034e-02 3.260e-02 9.232e-03 3.446e-02
## 12 6.755e-03 2.952e-02 6.518e-03 1.962e-02
## 13 2.164e-03 7.369e-03 3.108e-03 7.987e-03
##
## Statistics based on eigenvalues:
## barolo grignolino barbera pooled
## product 1.842e-05 8.688e-02 1.580e-05 4.119e-02
## sum 4.919e+04 2.501e+04 1.338e+04 2.990e+04
## precision 1.364e-03 4.168e-03 1.468e-03 4.077e-03
## max 4.907e+04 2.479e+04 1.325e+04 2.972e+04
plot(wine.boxm, bias.adj=FALSE)
eigs <- summary(wine.boxm, quiet=TRUE)$eigs
large eigenvalues don’t differ that much; small one do!
op <- par(mfrow = c(1,2), mar=c(5,4,1,1)+.1)
matplot(1:6, log(eigs)[1:6,], type='b',
lwd=c(rep(1,3), 3), pch=c(1:3, 'p'),
lty=c(rep(3,3), 1),
cex=1.2, cex.lab=1.25,
xlab="Dimension", ylab="log Eigenvalue")
matplot(7:13, log(eigs)[7:13,], type='b',
lwd=c(rep(1,3), 3), pch=c(1:3, 'p'),
lty=c(rep(3,3), 1),
cex=1.2, cex.lab=1.25,
xlab="Dimension", ylab="log Eigenvalue")
par(op)
op <- par(mfrow=c(2,2), mar=c(5,4,1,1))
plot(wine.boxm, which="product", gplabel="Cultivar")
plot(wine.boxm, which="sum", gplabel="Cultivar")
plot(wine.boxm, which="precision", gplabel="Cultivar")
plot(wine.boxm, which="max", gplabel="Cultivar")
par(op)
bartlettTests(Wine[,-1], Wine$Cultivar)
## Bartlett's Tests for Homogeneity of Variance
##
## Chisq df Pr(>Chisq)
## Alcohol 1.60 2 0.44959
## MalicAcid 12.23 2 0.00221 **
## Ash 16.60 2 0.00025 ***
## AlcAsh 9.74 2 0.00766 **
## Mg 17.45 2 0.00016 ***
## Phenols 17.61 2 0.00015 ***
## Flav 44.59 2 2.1e-10 ***
## NonFlavPhenols 21.34 2 2.3e-05 ***
## Proa 12.54 2 0.00189 **
## Color 51.57 2 6.3e-12 ***
## Hue 27.06 2 1.3e-06 ***
## OD 19.80 2 5.0e-05 ***
## Proline 21.56 2 2.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTests(Wine[,-1], Wine$Cultivar)
## Levene's Tests for Homogeneity of Variance (center = median)
##
## df1 df2 F value Pr(>F)
## Alcohol 2 175 0.60 0.55005
## MalicAcid 2 175 6.36 0.00216 **
## Ash 2 175 4.21 0.01635 *
## AlcAsh 2 175 2.94 0.05568 .
## Mg 2 175 0.66 0.51972
## Phenols 2 175 8.56 0.00028 ***
## Flav 2 175 11.43 2.2e-05 ***
## NonFlavPhenols 2 175 10.41 5.3e-05 ***
## Proa 2 175 2.94 0.05530 .
## Color 2 175 31.26 2.5e-12 ***
## Hue 2 175 11.85 1.5e-05 ***
## OD 2 175 10.71 4.1e-05 ***
## Proline 2 175 8.51 0.00030 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
winedev <- abs( colDevs(Wine[,-1], Wine$Cultivar, median) )
winedev <- data.frame( winedev, Cultivar=Wine$Cultivar)
winedev.mod <-lm( as.matrix(winedev[,1:13]) ~ Cultivar, data=winedev)
Anova(winedev.mod)
##
## Type II MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## Cultivar 2 0.792 8.28 26 328 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# variables that do not show any effects
pairs(winedev.mod, variables=1:5, fill=TRUE, fill.alpha=.1)
# variables that do show effects
pairs(winedev.mod, variables=c(7,10:12), fill=TRUE, fill.alpha=.1)
winedev.can <- candisc(winedev.mod)
winedev.can
##
## Canonical Discriminant Analysis for Cultivar:
##
## CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.528 1.12 0.757 75.6 75.6
## 2 0.265 0.36 0.757 24.4 100.0
##
## Test of H0: The canonical correlations in the
## current row and all that follow are zero
##
## LR test stat approx F num Df den Df Pr(> F)
## 1 0.347 60.6 4 348 < 2e-16 ***
## 2 0.735 63.0 1 175 2.4e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(winedev.can, which=1)
plot(winedev.can, ellipse=TRUE)