############################################################ ## screening, pre-selection of variants fscreen = function(data, mm, maxsize) { n = nrow(data) m = ncol(data) colnames(data) = S = 1:m hatP = matrix(colSums(data) / n, 1, m) PP = crossprod(hatP) jointP <- crossprod(data) / n scorematrix <- (jointP + 0.01) / (PP + 0.01) upper.tri(scorematrix) -> upper.tri_sz scorematrix[upper.tri_sz] = -1 diag(scorematrix) = -1 scorematrix[!upper.tri_sz] -> sz_down.tri scoremax = sort(sz_down.tri[sz_down.tri > 0])[1:maxsize] ##########shangzhen中前面maxsize个小? locas_L = vector("list", length = maxsize) for (i in 1:maxsize) { locas_L[[i]] <- which(scorematrix == scoremax[i]) } locas <- unlist(locas_L) locas <- unique(locas)[1:maxsize] for (i in 1:maxsize) { S1 = c(locas[i] %/% m + 1 * (locas[i] %% m != 0), locas[i] %% m + m * (locas[i] %% m == 0)) data1 = data[(data[, S1[1]] + data[, S1[2]]) > 0, ] data2 = data[(data[, S1[1]] + data[, S1[2]]) == 0, ] n1 = nrow(data1) n2 = nrow(data2) hatP1 = colSums(data1) / n1 hatP2 = colSums(data2) / n2 b = ((hatP1 + 1) / (hatP2 + 1))[-S1] if (i == 1) { score = min(b) SS = sort(c(S1, (1:m)[-S1][which(b <= (sort(b)[(mm - 2)]))][1:(mm - 2)])) } if (min(b) < score) { score = min(b) SS = sort(c(S1, (1:m)[-S1][which(b <= (sort(b)[(mm - 2)]))][1:(mm - 2)])) } } return(SS) }# end of fscreen ############################################################ # LD funLD = function (data, k, C1, C2) { SS = as.numeric(colnames(data)) n = nrow(data) m = ncol(data) colnames(data) = S = 1:m hatP = matrix(colSums(data)/n, 1, m) PP <- crossprod(hatP) PP2 <- crossprod(1 - hatP) jointP <- crossprod(data)/n stat = sqrt(n) * (PP - jointP)/sqrt(PP * PP2) stat[upper.tri(stat)] = 0 diag(stat) = 0 maxscore = max(stat) locas = match(maxscore, stat) S1 = NULL score = maxscore while (maxscore >= qnorm(C1^(1/((length(S1) == 0) + (length(S1)) * C2))) & length(S1) < k) { score = max(maxscore, score) if (length(S1) == 0) { S1 = c(locas%/%m + 1 * (locas%%m != 0), locas%%m + m * (locas%%m == 0)) } else S1 = c(S1, as.numeric(colnames(data[, -S1])[locas])) data1 = rowSums(data[, S1]) data1[data1 > 0] = 1 hatpnew = sum(data1)/n if (hatpnew != 1) { if (length(S1) == (m - 1)) { jointP = sum(data1 * data[, -S1])/n } else jointP = colSums(data1 * data[, -S1])/n a = sqrt(n) * (hatpnew * hatP[-S1] - jointP)/sqrt(hatpnew * (1 - hatpnew) * hatP[-S1] * (1 - hatP[-S1])) maxscore = max(a) locas = match(maxscore, a) } else maxscore = 0 } result = c(score, SS[S1]) return(result) } ############################################################ ## Main function fGAME ##Usage: # fGAME(data,k,C1,C2,maxsize,mm,permutation) ##Arguments of fGAME: #data: each row is a sample, each column is one gene (variants), entries are mutation status. #k: the maximum size of the ME set; #C1: the tuning parameter alpha; #C2: the tuning parameter xi; #maxsize: a parameter in pre-selection procedure; #mm: the gene size selected by pre-selection procedure; fGAME = function (data, k = 8, C1 = 0.95, C2 = 3, maxsize = 8, mm = 10, permutation) { data = as.matrix(data) m <- ncol(data) n = nrow(data) colnames(data) = 1:m removelocas = (1:m)[colSums(data) == 0] l_0 = length(removelocas) if (l_0 > 0) data = data[, -removelocas] if (m - l_0 > mm) result = unlist(funLD(data[, fscreen(data, mm, maxsize)], k, C1, C2)) if (m - l_0 <= mm) result = unlist(funLD(data, k, C1, C2)) score = rep(0, permutation) for (j in 1:permutation) { obsDataH0 = apply(data, 2, sample) if (m - l_0 > mm) score[j] = funLD(obsDataH0[, fscreen(obsDataH0, mm, maxsize)], k, C1, C2)[[1]] if (mm - l_0 <= mm) score[j] = funLD(obsDataH0, k, C1, C2)[[1]] } result[1]= (sum(score >= result[[1]]))/(permutation) p =result[1] me.set=result[-1] return(list(selected.me.set=me.set, p.value=p )) } ############################################################ #########Example (not run) ## mutation matrix: #data=c(0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,0,1,0,1,0,1,1,0,1,0,0,0) #data=matrix(data, nrow=20) #data ## Run the GAME function: #fGAME(data,k=8,C1=0.95,C2=3,maxsize=8,mm=10,permutation=100);