This script reproduces all of the analysis and graphs for the MANOVA of the Iris
data in the paper and also includes other analyses not described there. It is set up as an R script that can be “compiled” to HTML, Word, or PDF using knitr::knit()
. This is most convenient within R Studio via the File -> Compile Notebook
option.
library(heplots)
library(car) # actually, loaded by heplots
data(iris)
op <- par(mfcol=c(1,2), mar=c(5,4,1,1)+.1)
scatterplot(Sepal.Width ~ Sepal.Length | Species, data=iris,
ellipse=TRUE, levels=0.68, smoother=NULL, reg.line=FALSE, grid=FALSE,
legend.coords=list(x=7, y=4.4), col=c("red", "blue", "black"))
scatterplot(Sepal.Width ~ Sepal.Length | Species, data=iris,
ellipse=TRUE, levels=0.68, smoother=NULL, grid=FALSE,
reg.line=FALSE, cex=0,
legend.plot=FALSE, col=c("red", "blue", "black"))
par(op)
Uncentered and centered, first two variables
covEllipses(iris[,1:4], iris$Species,
fill=c(rep(FALSE,3), TRUE))
covEllipses(iris[,1:4], iris$Species, center=TRUE,
fill=c(rep(FALSE,3), TRUE), fill.alpha=.1, label.pos=c(1:3,0))
All pairs when more than two are specified
covEllipses(iris[,1:4], iris$Species,
fill=c(rep(FALSE,3), TRUE), variables=1:4,
fill.alpha=.1)
covEllipses(iris[,1:4], iris$Species, center=TRUE,
fill=c(rep(FALSE,3), TRUE), variables=1:4,
label.pos=c(1:3,0), fill.alpha=.1)
NB: scale.=FALSE by default
iris.pca <- prcomp(iris[,1:4])
summary(iris.pca)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 2.056 0.4926 0.2797 0.15439
## Proportion of Variance 0.925 0.0531 0.0171 0.00521
## Cumulative Proportion 0.925 0.9777 0.9948 1.00000
op <- par(mfcol=c(1,2), mar=c(5,4,1,1)+.1)
covEllipses(iris.pca$x, iris$Species,
fill=c(rep(FALSE,3), TRUE),
label.pos=1:4, fill.alpha=.1, asp=1)
covEllipses(iris.pca$x, iris$Species,
fill=c(rep(FALSE,3), TRUE), center=TRUE,
label.pos=1:4, fill.alpha=.1, asp=1)
par(op)
# all variables
covEllipses(iris.pca$x, iris$Species,
fill=c(rep(FALSE,3), TRUE), variables=1:4,
label.pos=1:4, fill.alpha=.1)
covEllipses(iris.pca$x, iris$Species, center=TRUE,
fill=c(rep(FALSE,3), TRUE), variables=1:4,
label.pos=1:4, fill.alpha=.1)
# Plot the last two, PC 3,4
covEllipses(iris.pca$x, iris$Species,
fill=c(rep(FALSE,3), TRUE), variables=3:4,
label.pos=c(1:3,0), fill.alpha=.1, asp=1)
covEllipses(iris.pca$x, iris$Species,
fill=c(rep(FALSE,3), TRUE), center=TRUE, variables=3:4,
label.pos=c(1:3,0), fill.alpha=.1, asp=1)
covEllipses(iris[,1:4], iris$Species)
covEllipses(iris[,1:4], iris$Species, fill=TRUE, method="mve", add=TRUE, labels="")
Box’s M test
iris.boxm <- boxM(iris[, 1:4], iris[, "Species"])
iris.boxm
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: iris[, 1:4]
## Chi-Sq (approx.) = 140, df = 20, p-value <2e-16
covEllipses has a method for "boxm"
objects
covEllipses(iris.boxm, fill=c(rep(FALSE,3), TRUE) )
covEllipses(iris.boxm, fill=c(rep(FALSE,3), TRUE), center=TRUE, label.pos=1:4 )
Boxplots of means, using car::Boxplot
op <- par(mfrow=c(1, 4), mar=c(5,4,1,1))
for (response in names(iris)[1:4]){
Boxplot(iris[, response] ~ Species, data=iris,
ylab=response, axes=FALSE, col=c("red", "blue", "gray"))
box()
axis(2)
axis(1, at=1:3, labels=c("Setosa", "Vers.", "Virginca"))
}
par(op)
iris.mod <- lm(as.matrix(iris[, 1:4]) ~ Species, data=iris)
Anova(iris.mod)
##
## Type II MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## Species 2 1.19 53.5 8 290 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iris.boxm <- boxM(iris.mod)
iris.boxm
##
## Box's M-test for Homogeneity of Covariance Matrices
##
## data: Y
## Chi-Sq (approx.) = 140, df = 20, p-value <2e-16
irisdev <- abs( colDevs(iris[,1:4], iris$Species, median) )
irisdev.mod <- lm( irisdev ~ iris$Species)
Anova(irisdev.mod)
##
## Type II MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## iris$Species 2 0.394 8.89 8 290 6.7e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
irisdev.rmod <- robmlm( irisdev ~ iris$Species)
Anova(irisdev.rmod)
##
## Type II MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## iris$Species 2 0.408 9.28 8 290 2.2e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairs(irisdev.rmod, variables=1:4, fill=TRUE, fill.alpha=.1)
covEllipses(irisdev, group=iris$Species,
variables=1:4,
fill=c(rep(FALSE,3), TRUE), fill.alpha=0.1, label.pos=c(1:3,0))
pairs(irisdev.mod, variables=1:4, fill=TRUE, fill.alpha=.1)
Canonical views
library(candisc)
irisdev.can <- candisc(irisdev.mod)
irisdev.can
##
## Canonical Discriminant Analysis for iris$Species:
##
## CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.381 0.6154 0.602 97.9 97.9
## 2 0.013 0.0132 0.602 2.1 100.0
##
## Test of H0: The canonical correlations in the
## current row and all that follow are zero
##
## LR test stat approx F num Df den Df Pr(> F)
## 1 0.611 20.39 4 292 7.9e-15 ***
## 2 0.987 1.94 1 147 0.17
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(irisdev.can, which=1)
plot(irisdev.can, ellipse=TRUE)