This script reproduces all of the analysis and graphs for the MANOVA of the Iris data in the paper and also includes other analyses not described there. It is set up as an R script that can be “compiled” to HTML, Word, or PDF using knitr::knit(). This is most convenient within R Studio via the File -> Compile Notebook option.

Load packages and the data

library(heplots)
library(car)    # actually, loaded by heplots
data(iris)

Initial scatterplots and data ellipses

op <- par(mfcol=c(1,2), mar=c(5,4,1,1)+.1)
scatterplot(Sepal.Width ~ Sepal.Length | Species, data=iris,
        ellipse=TRUE, levels=0.68, smoother=NULL, reg.line=FALSE, grid=FALSE,
        legend.coords=list(x=7, y=4.4), col=c("red", "blue", "black"))  

scatterplot(Sepal.Width ~ Sepal.Length | Species, data=iris,
        ellipse=TRUE, levels=0.68, smoother=NULL, grid=FALSE,
        reg.line=FALSE, cex=0, 
        legend.plot=FALSE, col=c("red", "blue", "black"))

par(op)

Using the covEllipse function

Uncentered and centered, first two variables

covEllipses(iris[,1:4], iris$Species, 
    fill=c(rep(FALSE,3), TRUE))

covEllipses(iris[,1:4], iris$Species, center=TRUE, 
    fill=c(rep(FALSE,3), TRUE), fill.alpha=.1, label.pos=c(1:3,0))

All pairs when more than two are specified

covEllipses(iris[,1:4], iris$Species, 
    fill=c(rep(FALSE,3), TRUE), variables=1:4, 
    fill.alpha=.1)

covEllipses(iris[,1:4], iris$Species, center=TRUE,
    fill=c(rep(FALSE,3), TRUE), variables=1:4, 
    label.pos=c(1:3,0), fill.alpha=.1)

view in PCA space

NB: scale.=FALSE by default

iris.pca <- prcomp(iris[,1:4])
summary(iris.pca)
## Importance of components:
##                          PC1    PC2    PC3     PC4
## Standard deviation     2.056 0.4926 0.2797 0.15439
## Proportion of Variance 0.925 0.0531 0.0171 0.00521
## Cumulative Proportion  0.925 0.9777 0.9948 1.00000
op <- par(mfcol=c(1,2), mar=c(5,4,1,1)+.1)
covEllipses(iris.pca$x, iris$Species, 
    fill=c(rep(FALSE,3), TRUE), 
    label.pos=1:4, fill.alpha=.1, asp=1)

covEllipses(iris.pca$x, iris$Species, 
    fill=c(rep(FALSE,3), TRUE), center=TRUE,
    label.pos=1:4, fill.alpha=.1, asp=1)

par(op)

# all variables
covEllipses(iris.pca$x, iris$Species, 
    fill=c(rep(FALSE,3), TRUE), variables=1:4, 
    label.pos=1:4, fill.alpha=.1)

covEllipses(iris.pca$x, iris$Species, center=TRUE,  
    fill=c(rep(FALSE,3), TRUE), variables=1:4, 
    label.pos=1:4, fill.alpha=.1)

# Plot the last two, PC 3,4
covEllipses(iris.pca$x, iris$Species, 
    fill=c(rep(FALSE,3), TRUE), variables=3:4, 
    label.pos=c(1:3,0), fill.alpha=.1, asp=1)

covEllipses(iris.pca$x, iris$Species, 
    fill=c(rep(FALSE,3), TRUE), center=TRUE, variables=3:4, 
    label.pos=c(1:3,0), fill.alpha=.1, asp=1)

compare classical and robust covariance estimates

covEllipses(iris[,1:4], iris$Species)
covEllipses(iris[,1:4], iris$Species, fill=TRUE, method="mve", add=TRUE, labels="")

Box’s M test

iris.boxm <- boxM(iris[, 1:4], iris[, "Species"])
iris.boxm
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  iris[, 1:4]
## Chi-Sq (approx.) = 140, df = 20, p-value <2e-16

covEllipses has a method for "boxm" objects

covEllipses(iris.boxm, fill=c(rep(FALSE,3), TRUE) )

covEllipses(iris.boxm, fill=c(rep(FALSE,3), TRUE), center=TRUE, label.pos=1:4 )

Boxplots of means, using car::Boxplot

op <- par(mfrow=c(1, 4), mar=c(5,4,1,1))
for (response in names(iris)[1:4]){
    Boxplot(iris[, response] ~ Species, data=iris,
            ylab=response, axes=FALSE, col=c("red", "blue", "gray"))
    box()
    axis(2)
    axis(1, at=1:3, labels=c("Setosa", "Vers.", "Virginca"))
    }

par(op)

models & plots

iris.mod <- lm(as.matrix(iris[, 1:4]) ~ Species, data=iris)
Anova(iris.mod)
## 
## Type II MANOVA Tests: Pillai test statistic
##         Df test stat approx F num Df den Df Pr(>F)    
## Species  2      1.19     53.5      8    290 <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iris.boxm <- boxM(iris.mod)
iris.boxm
## 
##  Box's M-test for Homogeneity of Covariance Matrices
## 
## data:  Y
## Chi-Sq (approx.) = 140, df = 20, p-value <2e-16

multivariate Levene test

irisdev <- abs( colDevs(iris[,1:4], iris$Species, median) )
irisdev.mod <- lm( irisdev ~ iris$Species)
Anova(irisdev.mod)
## 
## Type II MANOVA Tests: Pillai test statistic
##              Df test stat approx F num Df den Df  Pr(>F)    
## iris$Species  2     0.394     8.89      8    290 6.7e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

robust MLM

irisdev.rmod <- robmlm( irisdev ~ iris$Species)
Anova(irisdev.rmod)
## 
## Type II MANOVA Tests: Pillai test statistic
##              Df test stat approx F num Df den Df  Pr(>F)    
## iris$Species  2     0.408     9.28      8    290 2.2e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairs(irisdev.rmod, variables=1:4, fill=TRUE, fill.alpha=.1)

covariance ellipses of absolute deviations

covEllipses(irisdev, group=iris$Species,
    variables=1:4,
    fill=c(rep(FALSE,3), TRUE), fill.alpha=0.1, label.pos=c(1:3,0))

pairs(irisdev.mod, variables=1:4, fill=TRUE, fill.alpha=.1)

Canonical views

library(candisc)
irisdev.can <- candisc(irisdev.mod)
irisdev.can
## 
## Canonical Discriminant Analysis for iris$Species:
## 
##   CanRsq Eigenvalue Difference Percent Cumulative
## 1  0.381     0.6154      0.602    97.9       97.9
## 2  0.013     0.0132      0.602     2.1      100.0
## 
## Test of H0: The canonical correlations in the 
## current row and all that follow are zero
## 
##   LR test stat approx F num Df den Df Pr(> F)    
## 1        0.611    20.39      4    292 7.9e-15 ***
## 2        0.987     1.94      1    147    0.17    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(irisdev.can, which=1)

plot(irisdev.can, ellipse=TRUE)