#=====================================================================================================================
#                           The Relative R-Squared Method with a data-dependent threshold (RRSMT)                               
#=====================================================================================================================
#                                                                                                                        					 
# RRSMT=function(p0,alpha,ToF,C,X,Z,glbl_HGNC,mlblnew)                                                                                                 					                                                                                                                                                        
# (1) Choose a "p0" value for p-value and an "alpha" value as the upper alpha cut point of the F distribution, respectively                      		   
#      , for runing RRSMT.                                                                                                                                                                        
#     eg. p0=0.5 , alpha=0.05   
#                                                                                              					  
# (2) There are two options for the data format, normalized data and original data.                                      					  
#     Set ToF=1 for nomalize data and ToF=0 for original data    
#
# (3) You need to upload the data first                	                                            
# >    C=as.matrix(read.table('D:/DATA/C.xls'))  
# >    X=as.matrix(read.table('D:/DATA/X.xls'))     
# >    Z=as.matrix(read.table('D:/DATA/Z.xls'))     
# >    glbl_HGNC=as.matrix(read.table('D:/DATA/glbl_HGNC.txt'))  
# >    mlbl=as.matrix(read.table('D:/DATA/mlbl.txt'))    
#	
#                                                        					  
# (4) For example, save program in D:\.                                                                                     					
#     If users need to save the data in another drive, they have to change the pathway.                                  					 
#                                                                                                                        					  
#     Run the codes:(choose (a) or (b))                                                                                  					   
#        (a)On a R program, open the file, press "ctrl+a", then press  "ctrl+r" or                                       					  
#        (b)Save the RRSM.txt in D:\ and on the R window type: source("D:/RRSMT.txt")                                 					  
#                                                                                                             	 					  
#      Then use the RRSM function to find the high-confidence targets. 							                                     
#      For example,RRSMT(0.5,0.05,0,C,X,Z,glbl_HGNC,mlblnew)                                                          					                    
#======================================================================================================================

   

RRSMT=function(p0,alpha,ToF,C,X,Z,glbl_HGNC,mlblnew)
{


   mRNAnum=dim(C)[1]  # Number of mRNAs 
   miRNAnum=dim(C)[2] # Number of microRNAs 
   tissuenum=dim(X)[2] # Number of tissue/cell types 

   rowsumC=rowSums(C)
   nonzero_mRNA=which(rowsumC!=0)      # mRNA with targeted  
   nonzero_mRNA=as.matrix(nonzero_mRNA)
   nonzero_mRNAnum=dim(nonzero_mRNA)[1]

   rr=numeric(nonzero_mRNAnum)         #  save  relative R^2 value
   rsquare1=numeric(nonzero_mRNAnum)   #  save original R^2 value
   rsquare2=numeric(nonzero_mRNAnum)   #  After regressing the mRNA on the selected miRNAs by 1st criterion p-value, save the R^2value 

   s=numeric(nonzero_mRNAnum)        #The data-dependent threshold for RRSM

   if(ToF==1)
   {
       L1=matrix(1,mRNAnum,1);
       Xmean=colMeans(X);                     # column mean of mRNA*tissues expression  (1*tissuenum) 
       Xstd=apply(X,2,sd);                        # column standard deviation of mRNA*tissues expression (1*tissuenum)
       Xnormalize=(X-L1%*%Xmean)/(L1%*%Xstd); # normalize mRNA*tissues expression  (mRNAnum*tissuenum)
       
       L2=matrix(1,miRNAnum,1);
       Zmean=colMeans(Z);                     # column mean of miRNA*tissues expression (1*tissuenum)
       Zstd=apply(Z,2,sd);                        # column standard deviation of miRNA*tissues expression (1*tissuenum) 
       Znormalize=(Z-L2%*%Zmean)/(L2%*%Zstd); # normalize miRNA*tissues expression (miRNAnum*tissuenum)
       
       X=Xnormalize
       Z=Znormalize
    }else{
       X=X
       Z=Z
    }

  Selectrue=matrix(0,mRNAnum,miRNAnum)

  miRvalue=miRvalue2=inn=list() 
  
 for(i in 1:nonzero_mRNAnum)  # just for loop nonzero_mRNAnum(890), not all mRNA number(16063)
  {        
       miRvalue[[i]]= which(C[nonzero_mRNA[i],]==1)   # note corresponding numbers of miRNAs  
       INDEPEND=Z[miRvalue[[i]],]   
       all= length(miRvalue[[i]])
          if(all==1)
          {V1=INDEPEND}else{
           V1=t(INDEPEND)}                                          

       YY=as.matrix(X[nonzero_mRNA[i],]) 
       rsquare1[i]=summary(lm(YY~-1+V1))$r.square
       inn[[i]]= coef(summary(lm(YY~-1+V1)))[,4]         # calculate each b's p-value
     
       rlabel=which(inn[[i]] < p0 )                                     # label which b's p-value < threshold
       
        if(length(rlabel)==0)
       {
           rsquare2[i]=0
           select=0

       }else{
             
           miRvalue2[[i]]= miRvalue[[i]][rlabel]   # After applying the measurement of p-value, note corresponding numbers of miRNAs.
           INDEPEND2=Z[miRvalue2[[i]],]
       
           select=length(miRvalue2[[i]])
           if(select==1)
          {V2=INDEPEND2}else{
           V2=t(INDEPEND2)}
          rsquare2[i]=summary(lm(YY~-1+V2))$r.square   
        }


        
        if(select==all){s[i]=-100}
        if(select==0){s[i]=0}
        if(select!=all&&select!=0)
        {s[i]=1-qf(1-alpha,all-select,tissuenum-all-1)*(all-select)/(tissuenum-all-1)*(1/rsquare1[i]-1) }  #The data-dependent threshold for RRSM
   
       rr[i]=rsquare2[i]/rsquare1[i]   # relative R^2 value
       if(rr[i]=='NaN') rr[i]=0
       if(rr[i]<= s[i])
       {    
           Selectrue=Selectrue;         
       }else{       
           Selectrue[nonzero_mRNA[i],miRvalue2[[i]]]=1   
       }  
   
   }


   numtotal=sum(Selectrue)               # Sum all elements of Selectrue to show how many high-confidence targets being selected?(mRNAnum*miRNAnum)
   result=which(Selectrue!=0,arr.ind=TRUE)
   r=result[,1]                                       # record which mRNAs with nonzero value
   c=result[,2]                                      # record which miRNAs with nonzero value
   A=cbind(c,r)    
   B=cbind(mlblnew[c,],as.matrix(glbl_HGNC[r,]))

return(list(cbind(mlblnew[c,],as.matrix(glbl_HGNC[r,])),numtotal)) # return the miRNAs and their high-confidence targets


}