﻿#hypothesis testing and confidence region for multivariate linear model library(car) ####--------------------------------------------------------------------------------------------- #### T7-6 data, exercise 7.25 ami <- read.table(file = "datafiles/T7-6.dat", col.names=c("TCAD", "drug", "gender", "antidepressant","PR", "dBP", "QRS")) ami ami\$gender <- as.factor(ami\$gender) ami\$TCAD <- ami\$TCAD/1000 ami\$drug <- ami\$drug/1000 library(car) fit.lm <- lm(cbind(TCAD, drug) ~ gender + antidepressant + PR + dBP + QRS, data = ami) fit.manova <- Manova(fit.lm) fit.manova C <- matrix(c(0, 0, 0, 0 , 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1), nrow = 3) newfit <- linearHypothesis(model = fit.lm, hypothesis.matrix = C) print(newfit) fit1.lm <- update(fit.lm, .~ . - PR - dBP - QRS) fit1.lm fit1.manova <- Manova(fit1.lm) summary(fit1.manova) fit2.lm <- update(fit.lm, .~ . - PR - dBP - QRS + gender:antidepressant) fit2.manova <- Manova(fit2.lm) summary(Manova(fit2.lm)) anova(fit.lm, fit1.lm, test = "Wilks") anova(fit2.lm, fit1.lm, test = "Wilks") ####--------------------------------------------------------------------------------------------- #### iris data names(iris) names(iris) <- c("SL", "SW", "PL", "PW", "SPP") #We proceed nevertheless to ﬁt a multivariate one-way ANOVA model to the iris data mod.iris <- lm(cbind(SL, SW, PL, PW) ~ SPP, data=iris) mod.iris #We use the Anova function in the car package to test the null hypothesis that the four response #means are identical across the three species of irises: manova.iris <- Anova(mod.iris) summary(manova.iris) #Because there is only one term (beyond the regression constant) on the right-hand side of the #model, in this example the “type-II” test produced by default by Anova is the same as the sequential #(“type-I”) test produced by the standard R anova function (output not shown): anova.iris<-anova(mod.iris) anova.iris #The linearHypothesis function in the car package may be used to test more speciﬁc hypotheses #about the parameters in the multivariate linear model. For example, to test for differences between #setosa and the average of versicolor and virginica, and for differences between versicolor and virginica: linearHypothesis(mod.iris, "0.5*SPPversicolor + 0.5*SPPvirginica") linearHypothesis(mod.iris, "SPPversicolor = SPPvirginica") #An alternative, equivalent, and in a sense more direct, approach is to ﬁt the model with custom #contrasts for the three species of irises, followed up by a test for each contrast: contrasts(iris\$SPP) #default contrasts C <- matrix(c(1, -0.5, -0.5, 0, 1, -1), 3, 2) colnames(C) <- c("S:VV", "V:V") rownames(C) <- unique(iris\$SPP) contrasts(iris\$SPP) <- C contrasts(iris\$SPP) mod.iris.2 <- update(mod.iris) coef(mod.iris.2) linearHypothesis(mod.iris.2, c(0, 1, 0)) # setosa vs. versicolor & virginica linearHypothesis(mod.iris.2, c(0, 0, 1)) # versicolor vs. virginica #Hypothesis-error (or HE) (Friendly, 2007; Fox et al., 2009) plots use ellipses to represent hypothesis and error sums of squares and product #matrices. The plots are implemented in two and three dimensions in the heplots package for R. #http://socserv.socsci.mcmaster.ca/jfox/heplots-dsc-paper.pdf #the heplots package provides informative #visualizations in 2D and 3D HE plots of multivariate hypothesis tests and "Anova.mlm" objects based #on Eqn. LB=C. These plots show direct visual representations of the H and E matrices as (possibly #degenerate) ellipses and ellipsoids #Using the default signiﬁcance scaling, HE plots have the property that the H ellipsoid extends #outside the E ellipsoid if and only if the corresponding multivariate hypothesis test is rejected by #Roy’s maximum root test at a given a level. See Friendly (2007) and Fox et al. (2009) for details of these #methods, and Friendly (2010) for analogous plots for repeated measure designs. install.packages("heplots") library(heplots) hyp <- list("V:V"="SPPV:V", "S:VV"="SPPS:VV") heplot(mod.iris.2, hypotheses=hyp, fill=c(TRUE, FALSE), col=c("red", "blue")) #for hypothesis LBP=C, we can code the response-transformation matrix P to compute #linear combinations of the responses, either via the imatrix argument to Anova (which takes a list of #matrices) or the P argument to linearHypothesis (which takes a matrix). We illustrate trivially with a #univariate ANOVA for the ﬁrst response variable, sepal length, extracted from the multivariate linear #model for all four responses: Anova(mod.iris, imatrix=list(Sepal.Length=matrix(c(1, 0, 0, 0)))) linearHypothesis(mod.iris, c("SPPversicolor = 0", "SPPvirginica = 0"),P=matrix(c(1, 0, 0, 0))) # equivalent #In this case, the P matrix is a single column picking out the ﬁrst response. We verify that we get the #same F-test from a univariate ANOVA for Sepal.Length: Anova(lm(SL ~ SPP, data=iris)) ####--------------------------------------------------------------------------------------------- #### Rohwer data,http://socserv.socsci.mcmaster.ca/jfox/heplots-dsc-paper.pdf #we use data from a study by Rohwer (given in Timm, 1975: Ex. 4.3, 4.7, and 4.23) on kindergarten #children, designed to determine how well a set of paired-associate (PA) tasks predicted performance on the #Peabody Picture Vocabulary test (PPVT), a student achievement test (SAT), and the Raven Progressive #matrices test (Raven). The PA tasks varied in how the stimuli were presented, and are called named (n), #still (s), named still (ns), named action (na), and sentence still (ss). Two groups were tested: a group of #n = 37 children from a low socioeconomic status (SES) school, and a group of n = 32 high SES children from #an upper-class, white residential school. Rohwer rohwer.mod <- lm(cbind(SAT, PPVT, Raven) ~ SES + n + s + ns + na + ss,data=Rohwer) Anova(rohwer.mod) #This multivariate linear model is of interest because, although the multivariate tests for two of the #covariates (ns and na) are highly signiﬁcant, univariate multiple regression tests for the separate responses #[from summary(rohwer.mod)] are relatively weak. We can test the 5 df hypothesis that all covariates have #null effects for all responses as a linear hypothesis (suppressing display of the error and hypothesis SSP #matrices), summary(rohwer.mod) Regr <- linearHypothesis(rohwer.mod, diag(7)[3:7,]) print(Regr, digits=5, SSP=FALSE) #As explained, in the MANCOVA model rohwer.mod we have assumed homogeneity of slopes for the #predictors, and the test of SES relies on this assumption. We can test this as follows, adding interactions of #SES with each of the covariates: rohwer.mod2 <- lm(cbind(SAT, PPVT, Raven) ~ SES * (n + s + ns + na + ss),data=Rohwer) Anova(rohwer.mod2) #It appears from the above that there is only weak evidence of unequal slopes from the separate SES: #terms. The evidence for heterogeneity is stronger, however, when these terms are tested collectively using #the linearHypothesis function: (coefs <- rownames(coef(rohwer.mod2))) print(linearHypothesis(rohwer.mod2, coefs[grep(":", coefs)]), SSP=FALSE) #HEplot #Figure1 colors <- c("red", "blue", rep("black",5), "darkgrey") heplot(rohwer.mod, col=colors,hypotheses=list("Regr"=c("n", "s", "ns", "na", "ss"))) #Figure 2 and 3 pairs(rohwer.mod, col=colors,hypotheses=list("Regr" = c("n", "s", "ns", "na", "ss"))) heplot3d(rohwer.mod, col=colors,hypotheses=list("Regr" = c("n", "s", "ns", "na", "ss"))) #It may be seen that the predicted values for all three responses are positively correlated, and #that the hypothesized effects of the covariates span the full three dimensions of the responses. As well, the #High SES group is higher on all responses than the Low SES group. #pairs plot for heterogeneous regression model rohwer.mod2. The ellipses labeled “Slopes” show #the covariation of all terms for slope differences between the High and Low SES groups. #figure 4 pairs(rohwer.mod2, col=c(colors, "brown"),terms=c("SES", "n", "s", "ns", "na", "ss"),hypotheses= list("Regr" = c("n", "s", "ns", "na", "ss"),"Slopes" = coefs[grep(":", coefs)])) #Comparing Figures 2 and 4 for the homogeneous and heterogenous-slopes models, it may be seen that #most of the covariates have ellipses of similar size and orientation, reﬂecting similar evidence against the #null hypotheses in the baseline high-SES group and for both groups in the common-slopes model; so too #does the effect of SES, with the High SES group performing better on all measures. The error covariation is #noticeably smaller in some of the panels of Figure 4 (those for SAT and PPVT), reﬂecting additional variation #accounted for by differences in slopes. #residuals vs fitted par(mfrow=c(1,3)) for(i in 1:3) plot(rohwer.mod\$fit[,i],rohwer.mod\$res[,i]) for(i in 1:3){ qqnorm(rohwer.mod\$res[,i]) qqline(rohwer.mod\$res[,i]) } #Durbin-Waston test for marginal models library(lmtest) dwtest(SAT ~ SES + n + s + ns + na + ss,data=Rohwer) dwtest(PPVT ~ SES + n + s + ns + na + ss,data=Rohwer) dwtest(Raven~ SES + n + s + ns + na + ss,data=Rohwer)