options(repos=c(CRAN="http://mirrors.ustc.edu.cn/CRAN/")) #multivariate descriptive statistics #measure of location iris.a<-iris[,1:4] sapply(iris.a,mean) sapply(iris.a,median) sapply(iris.a,IQR) sapply(iris.a,summary) #means and variances par group iris.set<-iris[iris$Species=="setosa",1:4] sapply(iris.set,mean) sapply(iris.set,var) printMeanAndSdByGroup <- function(variables,groupvariable) { # find the names of the variables variablenames <- c(names(groupvariable),names(as.data.frame(variables))) # within each group, find the mean of each variable means <- aggregate(as.matrix(variables) ~ groupvariable, FUN = mean) print(paste("Means:")) print(means) # within each group, find the standard deviation of each variable: sds <- aggregate(as.matrix(variables) ~ groupvariable, FUN = sd) print(paste("Standard deviations:")) print(sds) # within each group, find the number of samples: samplesizes <- aggregate(as.matrix(variables) ~ groupvariable, FUN = length) print(paste("Sample sizes:")) print(samplesizes) } printMeanAndSdByGroup(iris[,1:4],iris[,5]) #Covariance and Correlation Matrices cov(iris.a) cor(iris.a) cor.test(iris[,1],iris[,2]) mosthighlycorrelated <- function(mydataframe,numtoreport=ncol(mydataframe)) { # find the correlations cormatrix <- cor(mydataframe) # set the correlations on the diagonal or lower triangle to zero, # so they will not be reported as the highest ones: diag(cormatrix) <- 0 cormatrix[lower.tri(cormatrix)] <- 0 # flatten the matrix into a dataframe for easy sorting fm <- as.data.frame(as.table(cormatrix)) # assign human-friendly names names(fm) <- c("First.Variable", "Second.Variable","Correlation") # sort and print the top n correlations head(fm[order(abs(fm$Correlation),decreasing=T),],n=numtoreport) } mosthighlycorrelated(iris.a) mosthighlycorrelated(iris.a,2) #shape install.packages("moments") library(moments) sapply(iris.a,skewness) sapply(iris.a,kurtosis) #standardize variables stand.iris<-scale(iris.a) apply(stand.iris,2,mean) apply(stand.iris,2,var) #distance dist.iris<-dist(iris.a) dist.man.iris<-dist(iris.a,method='manhattan') dst <- data.matrix(dist.iris) n<-nrow(dst) image(1:n, 1:n, dst)