Wine data examples for EqCov paper

This script reproduces all of the analysis and graphs for the MANOVA of the Wine data in the paper and also includes other analyses not described there. It is set up as an R script that can be “compiled” to HTML, Word, or PDF using knitr::knit(). This is most convenient within R Studio via the File -> Compile Notebook option.

Load packages and the data

library(heplots)
library(car)
library(candisc)
data(Wine, package="candisc")

Fit the MANOVA model

Wine.mod <- lm(as.matrix(Wine[, -1]) ~ Cultivar, data=Wine)
Anova(Wine.mod)

## 
## Type II MANOVA Tests: Pillai test statistic
##          Df test stat approx F num Df den Df Pr(>F)    
## Cultivar  2      1.71     73.2     26    328 <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HE plots

heplot(Wine.mod, fill=TRUE, fill.alpha=.1)

#pairs(Wine.mod, fill=TRUE, fill.alpha=.1)
pairs(Wine.mod, fill=TRUE, fill.alpha=.1, variables=1:5)

univariate F tests

There is no simple way to get a compact summary of the univariate F-tests from a MLM. This code extracts the result of summary() and makes them into a data frame.

ss <- summary(Wine.mod)
UniStats <- as.data.frame(matrix(0, nrow=length(ss), 5))
for (i in 1:length(ss)) {
    UniStats[i,1] <- ss[[i]]$r.squared
    f <- ss[[i]]$fstatistic
    UniStats[i,2:4] <- f
    UniStats[i,5] <- pf(f[1], f[2], f[3], lower.tail=FALSE)
}

rownames(UniStats) <- sub("Response ", "", names(ss))
UniStats$stars <- c(gtools:::stars.pval(UniStats[,5]))
UniStats[,5] <- format.pval(UniStats[,5], eps=0.001)
colnames(UniStats) <- c("R^2", "F", "df1", "df2", "Pr (>F)", "")
UniStats

##                   R^2      F df1 df2 Pr (>F)    
## Alcohol        0.6069 135.08   2 175  <0.001 ***
## MalicAcid      0.2969  36.94   2 175  <0.001 ***
## Ash            0.1321  13.31   2 175  <0.001 ***
## AlcAsh         0.2902  35.77   2 175  <0.001 ***
## Mg             0.1244  12.43   2 175  <0.001 ***
## Phenols        0.5172  93.73   2 175  <0.001 ***
## Flav           0.7278 233.93   2 175  <0.001 ***
## NonFlavPhenols 0.2396  27.58   2 175  <0.001 ***
## Proa           0.2570  30.27   2 175  <0.001 ***
## Color          0.5797 120.66   2 175  <0.001 ***
## Hue            0.5366 101.32   2 175  <0.001 ***
## OD             0.6847 189.97   2 175  <0.001 ***
## Proline        0.7038 207.92   2 175  <0.001 ***

view in canonical space

Wine.can <- candisc(Wine.mod)
Wine.can

## 
## Canonical Discriminant Analysis for Cultivar:
## 
##   CanRsq Eigenvalue Difference Percent Cumulative
## 1  0.901       9.08       4.95    68.7       68.7
## 2  0.805       4.13       4.95    31.3      100.0
## 
## Test of H0: The canonical correlations in the 
## current row and all that follow are zero
## 
##   LR test stat approx F num Df den Df Pr(> F)    
## 1       0.0193      539      4    348  <2e-16 ***
## 2       0.1950      722      1    175  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(Wine.can, ellipse=TRUE, var.lwd=2)

heplot(Wine.can, var.lwd=2)

## Vector scale factor set to  20.73

# need to use extremely small p-value to preserve resolution
heplot(Wine.can, var.lwd=2, alpha=0.00001, fill=TRUE, fill.alpha=.1, var.col="black")

## Vector scale factor set to  10.32

effect ordered pairs.mlm plot

The HE pairs plot is confusing with so many responses, but can be simplified by rearranging the variables in the order derived from the canonical discriminant plot.

struc <- Wine.can$structure
angles <- ifelse( struc[,1] > 0, 
                  atan(struc[,2]/struc[,1]), 
                  atan(struc[,2]/struc[,1]) + pi)
#angles[order(angles)]
colnames(Wine[,-1])[order(angles)]

##  [1] "AlcAsh"         "NonFlavPhenols" "MalicAcid"      "Color"         
##  [5] "Ash"            "Alcohol"        "Mg"             "Proline"       
##  [9] "Phenols"        "Flav"           "Proa"           "OD"            
## [13] "Hue"

This computation is now done by the function varOrder() in the candisc package.

(ord <- varOrder(Wine.mod))

##  [1]  4  8  2 10  3  1  5 13  6  7  9 12 11

varOrder(Wine.mod, names=TRUE)

##  [1] "AlcAsh"         "NonFlavPhenols" "MalicAcid"      "Color"         
##  [5] "Ash"            "Alcohol"        "Mg"             "Proline"       
##  [9] "Phenols"        "Flav"           "Proa"           "OD"            
## [13] "Hue"

ord <- varOrder(Wine.mod)
pairs(Wine.mod, variables=ord, fill=TRUE, fill.alpha=.1, var.cex=1.5)

Box’s M test

wine.boxm <- boxM(Wine.mod)
wine.boxm

## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 680, df = 180, p-value <2e-16

More details are given by the summary method

summary(wine.boxm)

## Summary for Box's M-test of Equality of Covariance Matrices
## 
## Chi-Sq:   684.2 
## df:   182 
## p-value: <2e-16 
## 
## log of Covariance determinants:
##     barolo grignolino    barbera     pooled 
##    -10.902     -2.443    -11.055     -3.189 
## 
## Eigenvalues:
##       barolo grignolino   barbera    pooled
## 1  4.907e+04  2.479e+04 1.325e+04 2.972e+04
## 2  1.083e+02  2.101e+02 1.146e+02 1.731e+02
## 3  6.199e+00  1.143e+01 6.052e+00 7.951e+00
## 4  9.330e-01  1.130e+00 4.307e+00 2.258e+00
## 5  4.120e-01  9.590e-01 1.092e+00 8.197e-01
## 6  1.817e-01  6.396e-01 2.362e-01 4.260e-01
## 7  1.118e-01  2.543e-01 1.279e-01 2.295e-01
## 8  9.973e-02  1.815e-01 8.230e-02 1.451e-01
## 9  7.915e-02  1.156e-01 3.305e-02 1.117e-01
## 10 3.052e-02  5.565e-02 2.380e-02 6.357e-02
## 11 2.034e-02  3.260e-02 9.232e-03 3.446e-02
## 12 6.755e-03  2.952e-02 6.518e-03 1.962e-02
## 13 2.164e-03  7.369e-03 3.108e-03 7.987e-03
## 
## Statistics based on eigenvalues:
##              barolo grignolino   barbera    pooled
## product   1.842e-05  8.688e-02 1.580e-05 4.119e-02
## sum       4.919e+04  2.501e+04 1.338e+04 2.990e+04
## precision 1.364e-03  4.168e-03 1.468e-03 4.077e-03
## max       4.907e+04  2.479e+04 1.325e+04 2.972e+04

plot(wine.boxm, bias.adj=FALSE)

Examine eigenvalue distributions

eigs <- summary(wine.boxm, quiet=TRUE)$eigs

large eigenvalues don’t differ that much; small one do!

op <- par(mfrow = c(1,2), mar=c(5,4,1,1)+.1)
matplot(1:6, log(eigs)[1:6,], type='b', 
    lwd=c(rep(1,3), 3), pch=c(1:3, 'p'),
    lty=c(rep(3,3), 1), 
    cex=1.2, cex.lab=1.25,
    xlab="Dimension", ylab="log Eigenvalue")
matplot(7:13, log(eigs)[7:13,], type='b', 
    lwd=c(rep(1,3), 3), pch=c(1:3, 'p'),
    lty=c(rep(3,3), 1), 
    cex=1.2, cex.lab=1.25,
    xlab="Dimension", ylab="log Eigenvalue")

par(op)

Plot other eigenvalue measures

 op <- par(mfrow=c(2,2), mar=c(5,4,1,1))
 plot(wine.boxm, which="product", gplabel="Cultivar")
 plot(wine.boxm, which="sum", gplabel="Cultivar")
 plot(wine.boxm, which="precision", gplabel="Cultivar")
 plot(wine.boxm, which="max", gplabel="Cultivar")

 par(op)

univariate Bartlett tests

bartlettTests(Wine[,-1], Wine$Cultivar)

## Bartlett's Tests for Homogeneity of Variance  
## 
##                Chisq df Pr(>Chisq)    
## Alcohol         1.60  2    0.44959    
## MalicAcid      12.23  2    0.00221 ** 
## Ash            16.60  2    0.00025 ***
## AlcAsh          9.74  2    0.00766 ** 
## Mg             17.45  2    0.00016 ***
## Phenols        17.61  2    0.00015 ***
## Flav           44.59  2    2.1e-10 ***
## NonFlavPhenols 21.34  2    2.3e-05 ***
## Proa           12.54  2    0.00189 ** 
## Color          51.57  2    6.3e-12 ***
## Hue            27.06  2    1.3e-06 ***
## OD             19.80  2    5.0e-05 ***
## Proline        21.56  2    2.1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

univariate Levene tests

leveneTests(Wine[,-1], Wine$Cultivar)

## Levene's Tests for Homogeneity of Variance (center = median)
## 
##                df1 df2 F value  Pr(>F)    
## Alcohol          2 175    0.60 0.55005    
## MalicAcid        2 175    6.36 0.00216 ** 
## Ash              2 175    4.21 0.01635 *  
## AlcAsh           2 175    2.94 0.05568 .  
## Mg               2 175    0.66 0.51972    
## Phenols          2 175    8.56 0.00028 ***
## Flav             2 175   11.43 2.2e-05 ***
## NonFlavPhenols   2 175   10.41 5.3e-05 ***
## Proa             2 175    2.94 0.05530 .  
## Color            2 175   31.26 2.5e-12 ***
## Hue              2 175   11.85 1.5e-05 ***
## OD               2 175   10.71 4.1e-05 ***
## Proline          2 175    8.51 0.00030 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

multivariate analog of Levene’s test

winedev <- abs( colDevs(Wine[,-1], Wine$Cultivar, median) )
winedev <- data.frame( winedev, Cultivar=Wine$Cultivar)
winedev.mod <-lm( as.matrix(winedev[,1:13]) ~ Cultivar, data=winedev)
Anova(winedev.mod)

## 
## Type II MANOVA Tests: Pillai test statistic
##          Df test stat approx F num Df den Df Pr(>F)    
## Cultivar  2     0.792     8.28     26    328 <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# variables that do not show any effects
pairs(winedev.mod, variables=1:5, fill=TRUE, fill.alpha=.1)

# variables that do show effects
pairs(winedev.mod, variables=c(7,10:12), fill=TRUE, fill.alpha=.1)

Canonical view

winedev.can <- candisc(winedev.mod)
winedev.can

## 
## Canonical Discriminant Analysis for Cultivar:
## 
##   CanRsq Eigenvalue Difference Percent Cumulative
## 1  0.528       1.12      0.757    75.6       75.6
## 2  0.265       0.36      0.757    24.4      100.0
## 
## Test of H0: The canonical correlations in the 
## current row and all that follow are zero
## 
##   LR test stat approx F num Df den Df Pr(> F)    
## 1        0.347     60.6      4    348 < 2e-16 ***
## 2        0.735     63.0      1    175 2.4e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(winedev.can, which=1)

plot(winedev.can, ellipse=TRUE)