#===================================================================================================================== # The Relative R-Squared Method with a data-dependent threshold (RRSMT) #===================================================================================================================== # # RRSMT=function(p0,alpha,ToF,C,X,Z,glbl_HGNC,mlblnew) # (1) Choose a "p0" value for p-value and an "alpha" value as the upper alpha cut point of the F distribution, respectively # , for runing RRSMT. # eg. p0=0.5 , alpha=0.05 # # (2) There are two options for the data format, normalized data and original data. # Set ToF=1 for nomalize data and ToF=0 for original data # # (3) You need to upload the data first # > C=as.matrix(read.table('D:/DATA/C.xls')) # > X=as.matrix(read.table('D:/DATA/X.xls')) # > Z=as.matrix(read.table('D:/DATA/Z.xls')) # > glbl_HGNC=as.matrix(read.table('D:/DATA/glbl_HGNC.txt')) # > mlbl=as.matrix(read.table('D:/DATA/mlbl.txt')) # # # (4) For example, save program in D:\. # If users need to save the data in another drive, they have to change the pathway. # # Run the codes:(choose (a) or (b)) # (a)On a R program, open the file, press "ctrl+a", then press "ctrl+r" or # (b)Save the RRSM.txt in D:\ and on the R window type: source("D:/RRSMT.txt") # # Then use the RRSM function to find the high-confidence targets. # For example,RRSMT(0.5,0.05,0,C,X,Z,glbl_HGNC,mlblnew) #====================================================================================================================== RRSMT=function(p0,alpha,ToF,C,X,Z,glbl_HGNC,mlblnew) { mRNAnum=dim(C)[1] # Number of mRNAs miRNAnum=dim(C)[2] # Number of microRNAs tissuenum=dim(X)[2] # Number of tissue/cell types rowsumC=rowSums(C) nonzero_mRNA=which(rowsumC!=0) # mRNA with targeted nonzero_mRNA=as.matrix(nonzero_mRNA) nonzero_mRNAnum=dim(nonzero_mRNA)[1] rr=numeric(nonzero_mRNAnum) # save relative R^2 value rsquare1=numeric(nonzero_mRNAnum) # save original R^2 value rsquare2=numeric(nonzero_mRNAnum) # After regressing the mRNA on the selected miRNAs by 1st criterion p-value, save the R^2value s=numeric(nonzero_mRNAnum) #The data-dependent threshold for RRSM if(ToF==1) { L1=matrix(1,mRNAnum,1); Xmean=colMeans(X); # column mean of mRNA*tissues expression (1*tissuenum) Xstd=apply(X,2,sd); # column standard deviation of mRNA*tissues expression (1*tissuenum) Xnormalize=(X-L1%*%Xmean)/(L1%*%Xstd); # normalize mRNA*tissues expression (mRNAnum*tissuenum) L2=matrix(1,miRNAnum,1); Zmean=colMeans(Z); # column mean of miRNA*tissues expression (1*tissuenum) Zstd=apply(Z,2,sd); # column standard deviation of miRNA*tissues expression (1*tissuenum) Znormalize=(Z-L2%*%Zmean)/(L2%*%Zstd); # normalize miRNA*tissues expression (miRNAnum*tissuenum) X=Xnormalize Z=Znormalize }else{ X=X Z=Z } Selectrue=matrix(0,mRNAnum,miRNAnum) miRvalue=miRvalue2=inn=list() for(i in 1:nonzero_mRNAnum) # just for loop nonzero_mRNAnum(890), not all mRNA number(16063) { miRvalue[[i]]= which(C[nonzero_mRNA[i],]==1) # note corresponding numbers of miRNAs INDEPEND=Z[miRvalue[[i]],] all= length(miRvalue[[i]]) if(all==1) {V1=INDEPEND}else{ V1=t(INDEPEND)} YY=as.matrix(X[nonzero_mRNA[i],]) rsquare1[i]=summary(lm(YY~-1+V1))$r.square inn[[i]]= coef(summary(lm(YY~-1+V1)))[,4] # calculate each b's p-value rlabel=which(inn[[i]] < p0 ) # label which b's p-value < threshold if(length(rlabel)==0) { rsquare2[i]=0 select=0 }else{ miRvalue2[[i]]= miRvalue[[i]][rlabel] # After applying the measurement of p-value, note corresponding numbers of miRNAs. INDEPEND2=Z[miRvalue2[[i]],] select=length(miRvalue2[[i]]) if(select==1) {V2=INDEPEND2}else{ V2=t(INDEPEND2)} rsquare2[i]=summary(lm(YY~-1+V2))$r.square } if(select==all){s[i]=-100} if(select==0){s[i]=0} if(select!=all&&select!=0) {s[i]=1-qf(1-alpha,all-select,tissuenum-all-1)*(all-select)/(tissuenum-all-1)*(1/rsquare1[i]-1) } #The data-dependent threshold for RRSM rr[i]=rsquare2[i]/rsquare1[i] # relative R^2 value if(rr[i]=='NaN') rr[i]=0 if(rr[i]<= s[i]) { Selectrue=Selectrue; }else{ Selectrue[nonzero_mRNA[i],miRvalue2[[i]]]=1 } } numtotal=sum(Selectrue) # Sum all elements of Selectrue to show how many high-confidence targets being selected?(mRNAnum*miRNAnum) result=which(Selectrue!=0,arr.ind=TRUE) r=result[,1] # record which mRNAs with nonzero value c=result[,2] # record which miRNAs with nonzero value A=cbind(c,r) B=cbind(mlblnew[c,],as.matrix(glbl_HGNC[r,])) return(list(cbind(mlblnew[c,],as.matrix(glbl_HGNC[r,])),numtotal)) # return the miRNAs and their high-confidence targets }