#=====================================================================================================================
#                                	The Relative R-Squared Method (RRSM)                                                                                                             
#=====================================================================================================================
#                                                                                                                        					 
# RRSM=function(p0,s,ToF,C,X,Z,glbl_HGNC,mlbl)                                                                                                 					  
# (1) Choose a "p0" value and a "s" value as thresholds for p-value and the relative R^2 value,                          					 
#     respectively, for runing RRSM.                                                                                     					   
#     eg. p0=0.5 , s=0.99  
#                                                                                              					  
# (2) There are two options for the data format, normalized data and original data.                                      					  
#     Set ToF=1 for nomalize data and ToF=0 for original data    
#
# (3) You need to upload the data first                	                                            
# >    C=as.matrix(read.table('D:/DATA/C.xls'))  
# >    X=as.matrix(read.table('D:/DATA/X.xls'))     
# >    Z=as.matrix(read.table('D:/DATA/Z.xls'))     
# >    glbl_HGNC=as.matrix(read.table('D:/DATA/glbl_HGNC.txt'))  
# >    mlbl=as.matrix(read.table('D:/DATA/mlbl.txt'))    
#	
#                                                        					  
# (4) For example, save program in D:\.                                                                                     					
#     If users need to save the data in another drive, they have to change the pathway.                                  					 
#                                                                                                                        					  
#     Run the codes:(choose (a) or (b))                                                                                  					   
#        (a)On a R program, open the file, press "ctrl+a", then press  "ctrl+r" or                                       					  
#        (b)Save the RRSM.txt in D:\ and on the R window type: source("D:/RRSM.txt")                                 					  
#                                                                                                             	 					  
#      Then use the RRSM function to find the high-confidence targets. 							                                     
#      For example, RRSM(0.5,0.99,0,C,X,Z,glbl_HGNC,mlbl)                                                        					                    
#======================================================================================================================

RRSM=function(p0,s,ToF,C,X,Z,glbl_HGNC,mlbl)
{


   mRNAnum=dim(C)[1]  # Number of mRNAs 
   miRNAnum=dim(C)[2] # Number of microRNAs 
   tissuenum=dim(X)[2]# Number of tissue/cell types 

   rowsumC=rowSums(C)
   nonzero_mRNA=which(rowsumC!=0)      # mRNA with targeted  
   nonzero_mRNA=as.matrix(nonzero_mRNA)
   nonzero_mRNAnum=dim(nonzero_mRNA)[1]

   rr=numeric(nonzero_mRNAnum)         #  save  relative R^2 value
   rsquare1=numeric(nonzero_mRNAnum)   #  save original R^2 value
   rsquare2=numeric(nonzero_mRNAnum)   #  After regressing the mRNA on the selected miRNAs by 1st criterion p-value, save the R^2value 

   if(ToF==1)
   {
       L1=matrix(1,mRNAnum,1);
       Xmean=colMeans(X);                     # column mean of mRNA*tissues expression  (1*tissuenum) 
       Xstd=apply(X,2,sd);                    # column standard deviation of mRNA*tissues expression (1*tissuenum)
       Xnormalize=(X-L1%*%Xmean)/(L1%*%Xstd); # normalize mRNA*tissues expression  (mRNAnum*tissuenum)
       
       L2=matrix(1,miRNAnum,1);
       Zmean=colMeans(Z);                     # column mean of miRNA*tissues expression (1*tissuenum)
       Zstd=apply(Z,2,sd);                    # column standard deviation of miRNA*tissues expression (1*tissuenum) 
       Znormalize=(Z-L2%*%Zmean)/(L2%*%Zstd); # normalize miRNA*tissues expression (miRNAnum*tissuenum)
       
       X=Xnormalize
       Z=Znormalize
    }
    else
    {
       X=X
       Z=Z
    }

    Selectrue=matrix(0,mRNAnum,miRNAnum)

    for(i in 1:nonzero_mRNAnum) # just for loop nonzero_mRNAnum(890), not all mRNA number(16063)
    {
       INDEPEND=NULL # save temporary miRNA across tissues corresponding their mRNA   
       miRvalue=NULL # note corresponding numbers of miRNAs   
       count1=0;count2=0; 
    
       for (k in 1:miRNAnum)
       {
            if (C[nonzero_mRNA[i],k]==1)
            {
                INDEPEND=rbind(INDEPEND,Z[k,])
                miRvalue=rbind(miRvalue,k)
                count1=count1+1
            }
            else
                INDEPEND=INDEPEND; 
       }

       YY=as.matrix(X[nonzero_mRNA[i],]) 
       V=t(INDEPEND)
       S=t(V)%*%V
       b=solve(S)%*%t(V)%*%YY; # using least square error method to estimate the regression parameters
    
       YYbar=mean(YY)
    
       #SST=(YY-YYbar*I)'*(YY-YYbar*I)
       SST=t(YY)%*%YY                # total sum of squares (No intercept here) 
       SSE1=t(YY-V%*%b)%*%(YY-V%*%b) # residual sum of squares
    
       rsquare1[i]=1-SSE1/SST;       # solve the R^2 value
      
       inn=2*(1-pnorm(abs(b/(diag(solve(S)))^(1/2))))      # calculate each b's p-value
       rlabel=inn < p0                                     # label which b's p-value < threshold
       rlabel=as.matrix(as.numeric(rlabel))

       INDEPEND2=NULL   # After applying the measurement of p-value, save temporary miRNA across tissues corresponding their mRNA.  
       miRvalue2=NULL   # After applying the measurement of p-value, note corresponding numbers of miRNAs.
    
       if(sum(rlabel)==0)
       {
           rsquare2[i]=0
       }else{
        
           miRvalue=miRvalue*rlabel
           for (j in 1:count1)
           {           
               if (miRvalue[j]!=0)
               {
                   count2=count2+1
                   miRvalue2[count2]=miRvalue[j];
                   INDEPEND2=rbind(INDEPEND2,Z[miRvalue2[count2],]) 
                   
               }           
           }

           V2=t(INDEPEND2)
           S2=t(V2)%*%V2
           b2=solve(S2)%*%t(V2)%*%YY
           SSE2=t(YY-V2%*%b2)%*%(YY-V2%*%b2)
        
           rsquare2[i]=1-SSE2/SST # After applying the measurement of p-value, solve the R^2 value
        
       }

       rr[i]=rsquare2[i]/rsquare1[i] # relative R^2 value
       if(rr[i]=='NaN')
           rr[i]=0
       if(rr[i]< s)
       {    
           Selectrue=Selectrue;
       }else{
        
           for(h in 1:count1)
           {
               if(rlabel[h]==1)
               {
                  Selectrue[nonzero_mRNA[i], miRvalue[h]]=1
               }else{
                  Selectrue=Selectrue
               }
           }
       }
   }
   numtotal=sum(Selectrue)                  # Sum all elements of Selectrue to show how many high-confidence targets being selected?(mRNAnum*miRNAnum)
   result=which(Selectrue!=0,arr.ind=TRUE)
   r=result[,1]                             # record which mRNAs with nonzero value
   c=result[,2]                             # record which miRNAs with nonzero value
   #cbind(c,r)                              # combine result
   return(list(cbind(mlbl[c,],as.matrix(glbl_HGNC[r,])),numtotal)) # return the miRNAs and their high-confidence targets

}