#===================================================================================================================== # The Relative R-Squared Method (RRSM) #===================================================================================================================== # # RRSM=function(p0,s,ToF,C,X,Z,glbl_HGNC,mlbl) # (1) Choose a "p0" value and a "s" value as thresholds for p-value and the relative R^2 value, # respectively, for runing RRSM. # eg. p0=0.5 , s=0.99 # # (2) There are two options for the data format, normalized data and original data. # Set ToF=1 for nomalize data and ToF=0 for original data # # (3) You need to upload the data first # > C=as.matrix(read.table('D:/DATA/C.xls')) # > X=as.matrix(read.table('D:/DATA/X.xls')) # > Z=as.matrix(read.table('D:/DATA/Z.xls')) # > glbl_HGNC=as.matrix(read.table('D:/DATA/glbl_HGNC.txt')) # > mlbl=as.matrix(read.table('D:/DATA/mlbl.txt')) # # # (4) For example, save program in D:\. # If users need to save the data in another drive, they have to change the pathway. # # Run the codes:(choose (a) or (b)) # (a)On a R program, open the file, press "ctrl+a", then press "ctrl+r" or # (b)Save the RRSM.txt in D:\ and on the R window type: source("D:/RRSM.txt") # # Then use the RRSM function to find the high-confidence targets. # For example, RRSM(0.5,0.99,0,C,X,Z,glbl_HGNC,mlbl) #====================================================================================================================== RRSM=function(p0,s,ToF,C,X,Z,glbl_HGNC,mlbl) { mRNAnum=dim(C)[1] # Number of mRNAs miRNAnum=dim(C)[2] # Number of microRNAs tissuenum=dim(X)[2]# Number of tissue/cell types rowsumC=rowSums(C) nonzero_mRNA=which(rowsumC!=0) # mRNA with targeted nonzero_mRNA=as.matrix(nonzero_mRNA) nonzero_mRNAnum=dim(nonzero_mRNA)[1] rr=numeric(nonzero_mRNAnum) # save relative R^2 value rsquare1=numeric(nonzero_mRNAnum) # save original R^2 value rsquare2=numeric(nonzero_mRNAnum) # After regressing the mRNA on the selected miRNAs by 1st criterion p-value, save the R^2value if(ToF==1) { L1=matrix(1,mRNAnum,1); Xmean=colMeans(X); # column mean of mRNA*tissues expression (1*tissuenum) Xstd=apply(X,2,sd); # column standard deviation of mRNA*tissues expression (1*tissuenum) Xnormalize=(X-L1%*%Xmean)/(L1%*%Xstd); # normalize mRNA*tissues expression (mRNAnum*tissuenum) L2=matrix(1,miRNAnum,1); Zmean=colMeans(Z); # column mean of miRNA*tissues expression (1*tissuenum) Zstd=apply(Z,2,sd); # column standard deviation of miRNA*tissues expression (1*tissuenum) Znormalize=(Z-L2%*%Zmean)/(L2%*%Zstd); # normalize miRNA*tissues expression (miRNAnum*tissuenum) X=Xnormalize Z=Znormalize } else { X=X Z=Z } Selectrue=matrix(0,mRNAnum,miRNAnum) for(i in 1:nonzero_mRNAnum) # just for loop nonzero_mRNAnum(890), not all mRNA number(16063) { INDEPEND=NULL # save temporary miRNA across tissues corresponding their mRNA miRvalue=NULL # note corresponding numbers of miRNAs count1=0;count2=0; for (k in 1:miRNAnum) { if (C[nonzero_mRNA[i],k]==1) { INDEPEND=rbind(INDEPEND,Z[k,]) miRvalue=rbind(miRvalue,k) count1=count1+1 } else INDEPEND=INDEPEND; } YY=as.matrix(X[nonzero_mRNA[i],]) V=t(INDEPEND) S=t(V)%*%V b=solve(S)%*%t(V)%*%YY; # using least square error method to estimate the regression parameters YYbar=mean(YY) #SST=(YY-YYbar*I)'*(YY-YYbar*I) SST=t(YY)%*%YY # total sum of squares (No intercept here) SSE1=t(YY-V%*%b)%*%(YY-V%*%b) # residual sum of squares rsquare1[i]=1-SSE1/SST; # solve the R^2 value inn=2*(1-pnorm(abs(b/(diag(solve(S)))^(1/2)))) # calculate each b's p-value rlabel=inn < p0 # label which b's p-value < threshold rlabel=as.matrix(as.numeric(rlabel)) INDEPEND2=NULL # After applying the measurement of p-value, save temporary miRNA across tissues corresponding their mRNA. miRvalue2=NULL # After applying the measurement of p-value, note corresponding numbers of miRNAs. if(sum(rlabel)==0) { rsquare2[i]=0 }else{ miRvalue=miRvalue*rlabel for (j in 1:count1) { if (miRvalue[j]!=0) { count2=count2+1 miRvalue2[count2]=miRvalue[j]; INDEPEND2=rbind(INDEPEND2,Z[miRvalue2[count2],]) } } V2=t(INDEPEND2) S2=t(V2)%*%V2 b2=solve(S2)%*%t(V2)%*%YY SSE2=t(YY-V2%*%b2)%*%(YY-V2%*%b2) rsquare2[i]=1-SSE2/SST # After applying the measurement of p-value, solve the R^2 value } rr[i]=rsquare2[i]/rsquare1[i] # relative R^2 value if(rr[i]=='NaN') rr[i]=0 if(rr[i]< s) { Selectrue=Selectrue; }else{ for(h in 1:count1) { if(rlabel[h]==1) { Selectrue[nonzero_mRNA[i], miRvalue[h]]=1 }else{ Selectrue=Selectrue } } } } numtotal=sum(Selectrue) # Sum all elements of Selectrue to show how many high-confidence targets being selected?(mRNAnum*miRNAnum) result=which(Selectrue!=0,arr.ind=TRUE) r=result[,1] # record which mRNAs with nonzero value c=result[,2] # record which miRNAs with nonzero value #cbind(c,r) # combine result return(list(cbind(mlbl[c,],as.matrix(glbl_HGNC[r,])),numtotal)) # return the miRNAs and their high-confidence targets }