##-------------------------------------------------------------------------------------------------- ## one sample Hotelling's T2 test for pairwise differences ##-------------------------------------------------------------------------------------------------- ## Example 6.1 edat<-read.table(file = "datafiles/T6-1.dat", header=F, col.names=c( "bod1", "ss1", "bod2", "ss2")) edat ##Paired comparisons for univariate normal data x<-edat[,1] y<-edat[,3] t.test(x,y,paired=TRUE) ##Paired comparisons for multivariate normal data xbar<-colMeans(edat) xbar xvar<-var(edat) xvar # Apply the contrasts Cstar <-matrix( c(1, 0, -1, 0, 0, 1, 0, -1), 2, 4, byrow=T) Cstar Cxbar <- Cstar %*% xbar Cxvar <- Cstar %*% xvar %*%t(Cstar) # Compute Hotelling statistic p <- nrow(Cxvar) n <- nrow(edat) nullmean <- rep(0, p) d <- Cxbar-nullmean t2<-n*t(d)%*%solve(Cxvar)%*%d; t2mod<- (n-p)*t2/(p*(n-1)) pval <- 1- pf(t2mod,p,n-p) cat("Hotelling T-squared statistic", fill=T) t2 cat("p-value", fill=T) pval # simultaneous confidence intervals alpha<-0.05 wd<-sqrt(((n-1)*p/(n-p))*qf(1-alpha,p,n-p))*sqrt(diag(Cxvar/n)) Cis<-cbind(d-wd,d+wd) cat("95% simultaneous confidence interval","\n") Cis #both intervals contain 0. #Bonferroni simultaneous confidence intervals wd.b<- qt(1-alpha/(2*p),n-1)*sqrt(diag(Cxvar/n)) Cis.b<-cbind(d-wd.b,d+wd.b) cat("95% Bonferroni simultaneous confidence interval","\n") Cis.b #both intervals contain 0. # alternative way done with simple use of Hotelling's T^2-test in the ICSNP # package library(ICSNP) HotellingsT2(as.matrix(edat) %*% t(Cstar), mu = c(0,0)) #we should reject H0 # both component-wise simultaneous confidence intervals contain 0, so they fail to find out the significant differences. # we randomly pick a number of vectors a set.seed(123) idx<-0 for(i in 1:10000){ a<-rnorm(2) la<-a%*%d wd<-sqrt(((n-1)*p/(n-p))*qf(1-alpha,p,n-p))*sqrt((t(a)%*%Cxvar%*%a)*sum(1/n)) Cis<-cbind(la-wd,la+wd) if(min(Cis)>0 | max(Cis)<0) idx<-idx+1 } idx/10000 #about 27% linear projections' CIs do not contain 0, this indicates that the difference is different from zero # check visually the confidence region and intervals. plot(Ds,xlim=c(-30,50),ylim=c(-20,50),pch=19,xlab="Diff x1",ylab="Diff x2") mu<-colMeans(Ds) sigma<-cov(Ds) n<-nrow(Ds) es <- eigen(sigma) e1 <- es\$vec %*% diag(sqrt(es\$val)) r1 <- sqrt((n-1)*2*qf(1 - alpha, 2,n-2)/(n*(n-2))) theta <- seq(0, 2 * pi, len = 250) v1 <- cbind(r1 * cos(theta), r1 * sin(theta)) pts = t(mu - (e1 %*% t(v1))) lines(pts,col=2,lwd=2) abline(h=0) abline(v=0) #simultaneous confidence interval segments(Cis[1,1],Cis[2,1],Cis[1,2],Cis[2,1]) segments(Cis[1,1],Cis[2,2],Cis[1,2],Cis[2,2]) segments(Cis[1,1],Cis[2,1],Cis[1,1],Cis[2,2]) segments(Cis[1,2],Cis[2,1],Cis[1,2],Cis[2,2]) #Bonferroni simultaneous confidence interval segments(Cis.b[1,1],Cis.b[2,1],Cis.b[1,2],Cis.b[2,1],lty=2,lwd=2) segments(Cis.b[1,1],Cis.b[2,2],Cis.b[1,2],Cis.b[2,2],lty=2,lwd=2) segments(Cis.b[1,1],Cis.b[2,1],Cis.b[1,1],Cis.b[2,2],lty=2,lwd=2) segments(Cis.b[1,2],Cis.b[2,1],Cis.b[1,2],Cis.b[2,2],lty=2,lwd=2) legend("topright",legend=c("Conf.Reg","Simu.CI","Bonf.Conf.CI"),lty=c(1,1,2),col=c(2,1,1),lwd=2) ##-------------------------------------------------------------------------------------------------- ##one-sample Hotelling's T2 test for repeated measures ##-------------------------------------------------------------------------------------------------- #example 6.2 dogdat<-read.table("datafiles/T6-2.dat", header=F, col.names=c("x1", "x2", "x3", "x4")) dogdat # Compute sample mean vector and # sample covariance matrix xbar<-colMeans(dogdat) xbar xvar<-var(dogdat) xvar # Apply the contrasts Cm<-matrix( c(-1, -1, 1, 1, 1, -1, 1, -1, 1, -1, -1, 1), 3, 4, byrow=T) Cm Cxbar <- Cm%*%xbar Cxvar <- Cm%*%xvar%*%t(Cm) # Compute Hotelling statistic p <- nrow(Cxvar) n <- nrow(dogdat) nullmean <- c(0, 0, 0) d <- Cxbar-nullmean t2<-n*t(d)%*%solve(Cxvar)%*%d; t2mod<- (n-p)*t2/(p*(n-1)) pval <- 1- pf(t2mod,p,n-p) cat("Hotelling T-squared statistic", fill=T) t2 cat("p-value", fill=T) pval # alternative using package ICSNP X <- as.matrix(dogdat) %*% t(Cm) # remember the need for using t(C). HotellingsT2(X) # simultaneous confidence intervals alpha<-0.05 wd<-sqrt(((n-1)*p/(n-p))*qf(1-alpha,p,n-p))*sqrt(diag(Cxvar/n)) Cis<-cbind(d-wd,d+wd) cat("95% simultaneous confidence interval","\n") Cis #Bonferroni simultaneous confidence intervals wd.b<- qt(1-alpha/(2*p),n-1)*sqrt(diag(Cxvar/n)) Cis.b<-cbind(d-wd.b,d+wd.b) cat("95% Bonferroni simultaneous confidence interval","\n") Cis.b ##-------------------------------------------------------------------------------------------------- ##two-sample Hotelling's T2 test ##-------------------------------------------------------------------------------------------------- #Data on (1) different rolling temperatures (1 and 2) and its effect on (2) yield point and (3) ultimate strength of steel steel <- read.table(file ="datafiles/steel.dat", header=F, col.names=c("temperature", "yield", "strength")) steel # test for normality of the variables library(energy) mvnorm.etest(x = steel[steel\$temperature == 1, -1]) mvnorm.etest(x = steel[steel\$temperature == 2, -1]) # let us check now for homogeneity of variance-covariance matrix assumptions. cov(steel[steel\$temperature == 1, -1]) cov(steel[steel\$temperature == 2, -1]) # under the rule of thumb established, it would be ok to assume homogeneity of # variances and covariances. # alternatively, we perform the Box's M test source("BoxMTest.R") BoxMTest(steel[,-1],factor(steel[,1]),alpha=0.05) # now we perform the two-sample Hotelling T^2-test HotellingsT2(steel[steel\$temperature == 1, -1], steel[steel\$temperature == 2, -1]) # since we reject the null, we use the simultaneous confidence intervals # to check the significant components n<-table(steel[,1]) p<-ncol(steel)-1 xmean1<-colMeans(steel[steel\$temperature == 1, -1]) xmean2<-colMeans(steel[steel\$temperature == 2, -1]) d<-xmean1-xmean2 S1<-var(steel[steel\$temperature == 1, -1]) S2<-var(steel[steel\$temperature == 2, -1]) Sp<-((n[1]-1)*S1+(n[2]-1)*S2)/(nrow(steel)-2) # simultaneous confidence intervals alpha<-0.05 wd<-sqrt(((n[1]+n[2]-2)*p/(n[1]+n[2]-p-1))*qf(1-alpha,p,n[1]+n[2]-p-1))*sqrt(diag(Sp)*sum(1/n)) Cis<-cbind(d-wd,d+wd) cat("95% simultaneous confidence interval","\n") Cis #Bonferroni simultaneous confidence intervals wd.b<- qt(1-alpha/(2*p),n[1]+n[2]-2) *sqrt(diag(Sp)*sum(1/n)) Cis.b<-cbind(d-wd.b,d+wd.b) cat("95% Bonferroni simultaneous confidence interval","\n") Cis.b # both component-wise simultaneous confidence intervals contain 0, so they did not find the significant differences. # we randomly pick a number of a set.seed(123) idx<-0 for(i in 1:10000){ a<-rnorm(2) la<-a%*%d wd<-sqrt(((n[1]+n[2]-2)*p/(n[1]+n[2]-p-1))*qf(1-alpha,p,n[1]+n[2]-p-1))*sqrt((t(a)%*%Sp%*%a)*sum(1/n)) Cis<-cbind(la-wd,la+wd) if(min(Cis)>0 | max(Cis)<0) idx<-idx+1 } idx/10000 #about 24% linear projections' CIs do not contain 0, this indicates that the two means are different