#hypothesis testing and confidence region for multivariate linear model
library(car)
####---------------------------------------------------------------------------------------------
#### T7-6 data, exercise 7.25
ami <- read.table(file = "datafiles/T7-6.dat", col.names=c("TCAD", "drug", "gender", "antidepressant","PR", "dBP", "QRS"))
ami
ami$gender <- as.factor(ami$gender)
ami$TCAD <- ami$TCAD/1000
ami$drug <- ami$drug/1000
library(car)
fit.lm <- lm(cbind(TCAD, drug) ~ gender + antidepressant + PR + dBP + QRS, data = ami)
fit.manova <- Manova(fit.lm)
fit.manova
C <- matrix(c(0, 0, 0, 0 , 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1), nrow = 3)
newfit <- linearHypothesis(model = fit.lm, hypothesis.matrix = C)
print(newfit)
fit1.lm <- update(fit.lm, .~ . - PR - dBP - QRS)
fit1.lm
fit1.manova <- Manova(fit1.lm)
summary(fit1.manova)
fit2.lm <- update(fit.lm, .~ . - PR - dBP - QRS + gender:antidepressant)
fit2.manova <- Manova(fit2.lm)
summary(Manova(fit2.lm))
anova(fit.lm, fit1.lm, test = "Wilks")
anova(fit2.lm, fit1.lm, test = "Wilks")
####---------------------------------------------------------------------------------------------
#### iris data
names(iris)
names(iris) <- c("SL", "SW", "PL", "PW", "SPP")
#We proceed nevertheless to ﬁt a multivariate one-way ANOVA model to the iris data
mod.iris <- lm(cbind(SL, SW, PL, PW) ~ SPP, data=iris)
mod.iris
#We use the Anova function in the car package to test the null hypothesis that the four response
#means are identical across the three species of irises:
manova.iris <- Anova(mod.iris)
summary(manova.iris)
#Because there is only one term (beyond the regression constant) on the right-hand side of the
#model, in this example the “type-II” test produced by default by Anova is the same as the sequential
#(“type-I”) test produced by the standard R anova function (output not shown):
anova.iris<-anova(mod.iris)
anova.iris
#The linearHypothesis function in the car package may be used to test more speciﬁc hypotheses
#about the parameters in the multivariate linear model. For example, to test for differences between
#setosa and the average of versicolor and virginica, and for differences between versicolor and virginica:
linearHypothesis(mod.iris, "0.5*SPPversicolor + 0.5*SPPvirginica")
linearHypothesis(mod.iris, "SPPversicolor = SPPvirginica")
#An alternative, equivalent, and in a sense more direct, approach is to ﬁt the model with custom
#contrasts for the three species of irises, followed up by a test for each contrast:
contrasts(iris$SPP) #default contrasts
C <- matrix(c(1, -0.5, -0.5, 0, 1, -1), 3, 2)
colnames(C) <- c("S:VV", "V:V")
rownames(C) <- unique(iris$SPP)
contrasts(iris$SPP) <- C
contrasts(iris$SPP)
mod.iris.2 <- update(mod.iris)
coef(mod.iris.2)
linearHypothesis(mod.iris.2, c(0, 1, 0)) # setosa vs. versicolor & virginica
linearHypothesis(mod.iris.2, c(0, 0, 1)) # versicolor vs. virginica
#Hypothesis-error (or HE) (Friendly, 2007; Fox et al., 2009) plots use ellipses to represent hypothesis and error sums of squares and product
#matrices. The plots are implemented in two and three dimensions in the heplots package for R.
#http://socserv.socsci.mcmaster.ca/jfox/heplots-dsc-paper.pdf
#the heplots package provides informative
#visualizations in 2D and 3D HE plots of multivariate hypothesis tests and "Anova.mlm" objects based
#on Eqn. LB=C. These plots show direct visual representations of the H and E matrices as (possibly
#degenerate) ellipses and ellipsoids
#Using the default signiﬁcance scaling, HE plots have the property that the H ellipsoid extends
#outside the E ellipsoid if and only if the corresponding multivariate hypothesis test is rejected by
#Roy’s maximum root test at a given a level. See Friendly (2007) and Fox et al. (2009) for details of these
#methods, and Friendly (2010) for analogous plots for repeated measure designs.
install.packages("heplots")
library(heplots)
hyp <- list("V:V"="SPPV:V", "S:VV"="SPPS:VV")
heplot(mod.iris.2, hypotheses=hyp, fill=c(TRUE, FALSE), col=c("red", "blue"))
#for hypothesis LBP=C, we can code the response-transformation matrix P to compute
#linear combinations of the responses, either via the imatrix argument to Anova (which takes a list of
#matrices) or the P argument to linearHypothesis (which takes a matrix). We illustrate trivially with a
#univariate ANOVA for the ﬁrst response variable, sepal length, extracted from the multivariate linear
#model for all four responses:
Anova(mod.iris, imatrix=list(Sepal.Length=matrix(c(1, 0, 0, 0))))
linearHypothesis(mod.iris, c("SPPversicolor = 0", "SPPvirginica = 0"),P=matrix(c(1, 0, 0, 0))) # equivalent
#In this case, the P matrix is a single column picking out the ﬁrst response. We verify that we get the
#same F-test from a univariate ANOVA for Sepal.Length:
Anova(lm(SL ~ SPP, data=iris))
####---------------------------------------------------------------------------------------------
#### Rohwer data,http://socserv.socsci.mcmaster.ca/jfox/heplots-dsc-paper.pdf
#we use data from a study by Rohwer (given in Timm, 1975: Ex. 4.3, 4.7, and 4.23) on kindergarten
#children, designed to determine how well a set of paired-associate (PA) tasks predicted performance on the
#Peabody Picture Vocabulary test (PPVT), a student achievement test (SAT), and the Raven Progressive
#matrices test (Raven). The PA tasks varied in how the stimuli were presented, and are called named (n),
#still (s), named still (ns), named action (na), and sentence still (ss). Two groups were tested: a group of
#n = 37 children from a low socioeconomic status (SES) school, and a group of n = 32 high SES children from
#an upper-class, white residential school.
Rohwer
rohwer.mod <- lm(cbind(SAT, PPVT, Raven) ~ SES + n + s + ns + na + ss,data=Rohwer)
Anova(rohwer.mod)
#This multivariate linear model is of interest because, although the multivariate tests for two of the
#covariates (ns and na) are highly signiﬁcant, univariate multiple regression tests for the separate responses
#[from summary(rohwer.mod)] are relatively weak. We can test the 5 df hypothesis that all covariates have
#null effects for all responses as a linear hypothesis (suppressing display of the error and hypothesis SSP
#matrices),
summary(rohwer.mod)
Regr <- linearHypothesis(rohwer.mod, diag(7)[3:7,])
print(Regr, digits=5, SSP=FALSE)
#As explained, in the MANCOVA model rohwer.mod we have assumed homogeneity of slopes for the
#predictors, and the test of SES relies on this assumption. We can test this as follows, adding interactions of
#SES with each of the covariates:
rohwer.mod2 <- lm(cbind(SAT, PPVT, Raven) ~ SES * (n + s + ns + na + ss),data=Rohwer)
Anova(rohwer.mod2)
#It appears from the above that there is only weak evidence of unequal slopes from the separate SES:
#terms. The evidence for heterogeneity is stronger, however, when these terms are tested collectively using
#the linearHypothesis function:
(coefs <- rownames(coef(rohwer.mod2)))
print(linearHypothesis(rohwer.mod2, coefs[grep(":", coefs)]), SSP=FALSE)
#HEplot
#Figure1
colors <- c("red", "blue", rep("black",5), "darkgrey")
heplot(rohwer.mod, col=colors,hypotheses=list("Regr"=c("n", "s", "ns", "na", "ss")))
#Figure 2 and 3
pairs(rohwer.mod, col=colors,hypotheses=list("Regr" = c("n", "s", "ns", "na", "ss")))
heplot3d(rohwer.mod, col=colors,hypotheses=list("Regr" = c("n", "s", "ns", "na", "ss")))
#It may be seen that the predicted values for all three responses are positively correlated, and
#that the hypothesized effects of the covariates span the full three dimensions of the responses. As well, the
#High SES group is higher on all responses than the Low SES group.
#pairs plot for heterogeneous regression model rohwer.mod2. The ellipses labeled “Slopes” show
#the covariation of all terms for slope differences between the High and Low SES groups.
#figure 4
pairs(rohwer.mod2, col=c(colors, "brown"),terms=c("SES", "n", "s", "ns", "na", "ss"),hypotheses=
list("Regr" = c("n", "s", "ns", "na", "ss"),"Slopes" = coefs[grep(":", coefs)]))
#Comparing Figures 2 and 4 for the homogeneous and heterogenous-slopes models, it may be seen that
#most of the covariates have ellipses of similar size and orientation, reﬂecting similar evidence against the
#null hypotheses in the baseline high-SES group and for both groups in the common-slopes model; so too
#does the effect of SES, with the High SES group performing better on all measures. The error covariation is
#noticeably smaller in some of the panels of Figure 4 (those for SAT and PPVT), reﬂecting additional variation
#accounted for by differences in slopes.
#residuals vs fitted
par(mfrow=c(1,3))
for(i in 1:3)
plot(rohwer.mod$fit[,i],rohwer.mod$res[,i])
for(i in 1:3){
qqnorm(rohwer.mod$res[,i])
qqline(rohwer.mod$res[,i])
}
#Durbin-Waston test for marginal models
library(lmtest)
dwtest(SAT ~ SES + n + s + ns + na + ss,data=Rohwer)
dwtest(PPVT ~ SES + n + s + ns + na + ss,data=Rohwer)
dwtest(Raven~ SES + n + s + ns + na + ss,data=Rohwer)