stocks<-read.table("datafiles/T8-4.dat") colnames(stocks)<-c("JP Morgan","Citibank","Wells Fargo","Royal Dutch Shell","Exxon Mobil") # plot standardized data on pairwise scatter plots stockss <- scale(stocks, center = T, scale = T) # Checking adequacy of factor analysis pairs(stockss, labels = c("JP Morgan","Citibank","Wells Fargo","Royal Dutch Shell","Exxon Mobil"), panel = function(x, y) { panel.smooth(x, y) abline(lsfit(x, y), lty = 2)}) Bartlett.sphericity.test(stocks) #Bartlett's test of sphericity #or use cortest.bartlett(stocks) in psych package kmo(stocks) #KMO sampling adequacy criteria with MSA # perform factor analysis with one factor factanal(x = stocks, factors = 1, scores = "regression", rotation = "varimax", method = "mle") # perform factor analysis with two factors stocks.fac <- factanal(x = stocks, factors = 2, scores = "regression", rotation = "varimax", method = "mle") stocks.fac stocks.fac\$loadings # first factor is dominated by bank companies # second factor by oil companies plot(stocks.fac\$scores) # calculate the residuals: stocks.pred <- stocks.fac\$loadings%*% t(stocks.fac\$loadings) + diag(stocks.fac\$uniquenesses) residuals <- cor(stocks) - stocks.pred residuals # changing the rotation to promax from varimax # note that R does not include the option quartimax. promax(stocks.fac\$loadings, m = 2) # of course, we could also have done it directly, if we had started using # quartimax stocks.facp <- factanal(x = stockss, factors = 2, scores = "regression", rotation = "promax", method = "mle") #use screeplot to decide the number of factors stocks.pr<-prcomp(stocks) #Unlike princomp, variances are computed with the usual divisor N - 1. screeplot(stocks.pr) #install.packages("psych") library(psych) fa.parallel(stocks) #Parallel analysis suggests that the number of factors = 2 ######----------------------------------------------------------------------------- ###### FA as a dimension reduction technique #####iris data irs<-iris[,1:4] irs.fa <- fa.parallel(irs) irs.out <- factanal(irs,factors = 1, rotation = "varimax",scores="regression") #use correlation matrix as default irs.out #one factor a<-(irs.out\$loadings)%*%t(irs.out\$loadings)+diag(irs.out\$uniq) proj<-t(irs.out\$loadings)%*%solve(a)%*%t(scale(irs)) dotchart(proj,group=iris[,5],gcolor="black",color=as.numeric(iris[,5])) #or dotchart(irs.out\$scores,group=iris[,5],gcolor="black",color=as.numeric(iris[,5])) install.packages("GPArotation") library(GPArotation) #two factors irs.fa<-fa(irs,rotate="Varimax",nfactor=2,SMC=FALSE) plot(irs.fa\$scores,col=as.numeric(iris[,5])) Bartlett.sphericity.test(irs) #Bartlett's test of sphericity #or use cortest.bartlett(irs) in psych package kmo(irs) #KMO sampling adequacy criteria with MSA ####Appendix kmo <- function(x) { x <- subset(x, complete.cases(x)) # Omit missing values r <- cor(x) # Correlation matrix r2 <- r^2 # Squared correlation coefficients i <- solve(r) # Inverse matrix of correlation matrix d <- diag(i) # Diagonal elements of inverse matrix p2 <- (-i/sqrt(outer(d, d)))^2 # Squared partial correlation coefficients diag(r2) <- diag(p2) <- 0 # Delete diagonal elements KMO <- sum(r2)/(sum(r2)+sum(p2)) MSA <- colSums(r2)/(colSums(r2)+colSums(p2)) #individual measures of sampling adequacy for each item return(list(KMO=KMO, MSA=MSA)) } Bartlett.sphericity.test <- function(x) { method <- "Bartlett's test of sphericity" data.name <- deparse(substitute(x)) x <- subset(x, complete.cases(x)) # Omit missing values n <- nrow(x) p <- ncol(x) chisq <- (1-n+(2*p+5)/6)*log(det(cor(x))) df <- p*(p-1)/2 p.value <- pchisq(chisq, df, lower.tail=FALSE) names(chisq) <- "X-squared" names(df) <- "df" return(structure(list(statistic=chisq, parameter=df, p.value=p.value, method=method, data.name=data.name), class="htest")) }