Download test data to your local folder.
step 0 creat project space
library(SINBA)
library(openxlsx)
library(ggplot2)
test.dir<-"path to your local testing data folder"
current_date<-format(Sys.time(),"%Y-%m-%d")
project_main_dir <- test.dir
project_name <- 'SINBA_TALL.test'
## SINBA.par is very essential in the analysis!!
if(exists('SINBA.par')==TRUE) rm(SINBA.par)
SINBA.par <- SINBA.analysis.dir.create(project_main_dir=project_main_dir, project_name=project_name,tf.network.file=sprintf("%s/TALL_TFnet.txt",test.dir),sig.network.file=sprintf("%s/TALL_SIGnet.txt",test.dir))
step 1 network-load and network-combine
tf.network <- get.single.network(SINBA.par$tf.network.file)
sig.network<-get.single.network(SINBA.par$sig.network.file)
SINBA.par$sinba.network<-get.combined.network(network1 = tf.network,network2 = sig.network)
SINBA.saveRData(SINBA.par = SINBA.par,step = "network-load")
step 2 exp_load, get activity and get ftest
SINBA.loadRData(SINBA.par = SINBA.par,step = "network-load")
eset<-readRDS(sprintf("%s/Das_TALL.eset.Rds",test.dir))
SINBA.par$cal.eset<-eset
#load drug database and test if these genes have phase4 drugs
seed_genes<-c("BCL2","LCK")
SINBA::drugdb.preload()
seed_drug1<-selectSP_driver_byDB(genes = seed_genes,select_type="drug_phase",database="Drugbank_V5.1",select_phase="approved")
seed_drug2<-selectSP_driver_byDB(genes = seed_genes,select_type="drug_phase",database="ChEMBL",select_phase="4")
sinba_id<-combineNet2target_list(target_list=SINBA.par$sinba.network$target_list,seed_name=seed_genes,partner_name=NULL,return_type="SINBA_originalID")
SINBA.par$SINBA_ID<-sinba_id
#S-P driver-pair activity
sinba_ac_mat<-cal.SINBA.Activity(SINBA_originalID=sinba_id$originalID, target_list=SINBA.par$sinba.network$target_list, cal_mat=Biobase::exprs(SINBA.par$cal.eset), es.method = 'weightedmean',std=TRUE)
SINBA.par$SINBA_AC.eset<-generate.SINBA.eset(exp_mat = sinba_ac_mat,phenotype_info = Biobase::pData(SINBA.par$cal.eset))
#single driver activity
single_ac_mat<-cal.SINBA.Activity(target_list=SINBA.par$sinba.network$target_list, cal_mat=Biobase::exprs(SINBA.par$cal.eset), es.method = 'weightedmean',std=TRUE)
SINBA.par$single_AC.eset<-generate.SINBA.eset(exp_mat = single_ac_mat,phenotype_info = Biobase::pData(SINBA.par$cal.eset))
#Seed and Partner regulon target overlap testing
N<-length(unique(c(SINBA.par$sinba.network$network_dat$source,SINBA.par$sinba.network$network_dat$target)))
SINBA.ftest<-seed2partners_ftest(SINBA_originalID=SINBA.par$SINBA_ID$originalID,target_list=SINBA.par$sinba.network$target_list,N=N,padjust_method="BH")
SINBA.par$SINBA.ftest<-SINBA.ftest
SINBA.saveRData(SINBA.par = SINBA.par,step = "get_ac")
step 3 differential activity and differential expression analysis
SINBA.loadRData(SINBA.par = SINBA.par,step = "get_ac")
phe_info <- Biobase::pData(SINBA.par$cal.eset)
SINBA.par$DA <- list()
SINBA.par$DE<-list()
SINBA.par$SINBA.DA<-list()
# get compared sample names
all_subgroup <- unique(phe_info$group) ## for example need to get subgroup-specific drivers
comps<-rbind(c("Sensitive","Resistant"))
for(i in 1:dim(comps)[1]){
comp_name <- sprintf('%s.Vs.%s',comps[i,1],comps[i,2]) ## each comparison must give a name !!!
G0 <- rownames(phe_info)[which(phe_info$group==comps[i,2])] # get sample list for G0
G1 <- rownames(phe_info)[which(phe_info$group==comps[i,1])] # get sample list for G1
# get DA/DE
DA_single_driver_limma <- get.SINBA.DE.2G(eset=SINBA.par$single_AC.eset,G1=G1,G0=G0,G1_name=comps[i,1],G0_name=comps[i,2],verbose=TRUE,do.log2=F,test.use="limma",random_effect=NULL)
DA_driver_limma <- get.SINBA.DE.2G(eset=SINBA.par$SINBA_AC.eset,G1=G1,G0=G0,G1_name=comps[i,1],G0_name=comps[i,2],verbose=TRUE,do.log2=F,test.use="limma",random_effect=NULL)
DE_limma <- get.SINBA.DE.2G(eset=SINBA.par$cal.eset,G1=G1,G0=G0,G1_name=comps[i,1],G0_name=comps[i,2],verbose=TRUE,do.log2=F,test.use="limma",random_effect=NULL)
# save to SINBA.par
SINBA.par$DA[[comp_name]] <- DA_single_driver_limma
SINBA.par$SINBA.DA[[comp_name]]<-DA_driver_limma
SINBA.par$DE[[comp_name]]<-DE_limma
}
SINBA.saveRData(SINBA.par = SINBA.par,step = "get_DA")
step 4 (optional) ROCtest and check druggable top partner genes
SINBA.loadRData(SINBA.par = SINBA.par,step = "get_DA")
#ROCtest
pair.AUC<-list()
single.AUC<-list()
for(i in 1:dim(comps)[1]){
comp_name <- sprintf('%s.Vs.%s',comps[i,1],comps[i,2]) ## each comparison must give a name !!!
G0 <- rownames(phe_info)[which(phe_info$group==comps[i,2])] # get sample list for G0
G1 <- rownames(phe_info)[which(phe_info$group==comps[i,1])] # get sample list for G1
pair.AUC[[comp_name]]<-ROCTest(eset=SINBA.par$SINBA_AC.eset,G0=G0,G1=G1)
single.AUC[[comp_name]]<-ROCTest(eset=SINBA.par$single_AC.eset,G0=G0,G1=G1)
}
use_comp<-names(pair.AUC)
auc_tab<-generate.AUCTable(pair.AUC = pair.AUC,single.AUC = single.AUC,use_comp = use_comp)
auc_tab<-auc_tab[order(auc_tab$delta_AUCpower.Sensitive.Vs.Resistant,decreasing = T),]
SINBA.par$AUC_tab<-auc_tab
write.xlsx(SINBA.par$AUC_tab,file=sprintf("%s/AUC_tab_%s.xlsx",SINBA.par$out.dir.DATA,Sys.Date()))
#check druggable partners
use_auc_tab<-auc_tab[auc_tab$SINBA.seed=="LCK"&auc_tab$AUC.Sensitive.Vs.Resistant.partner>0.5,]
use_auc_tab<-use_auc_tab[order(use_auc_tab$AUC.Sensitive.Vs.Resistant.partner,decreasing = T),]
top_partner<-unique(use_auc_tab$SINBA.partner[1:150])
partner_drug1<-selectSP_driver_byDB(genes = top_partner,select_type="drug_phase",database="ChEMBL",select_phase=ChEMBL_drugphase_levels())
partner_drug2<-selectSP_driver_byDB(genes = top_partner,select_type="drug_phase",database="Repurposing_HUB",select_phase=Repurposing_HUB_drugphase_levels())
partner_drug3<-selectSP_driver_byDB(genes = top_partner,select_type="drug_phase",database="Drugbank_V5.1",select_phase=Drugbank_drugphase_levels())
step 5 generate master table
SINBA.loadRData(SINBA.par = SINBA.par,step = "get_DA")
use_comp<-names(SINBA.par$DE)
ms_tab<-generate.masterTable.SINBA(use_comp =use_comp,DA.SINBA = SINBA.par$SINBA.DA,DA=SINBA.par$DA,DE=SINBA.par$DE,ftest = SINBA.par$SINBA.ftest,z_col='Z-statistics',display_col=c('logFC','P.Value'),deltaZ_method = "max")
SINBA.par$ms_tab<-ms_tab
write.xlsx(SINBA.par$ms_tab,file=sprintf("%s/ms_tab_max_%s.xlsx",SINBA.par$out.dir.DATA,Sys.Date()))
SINBA.saveRData(SINBA.par = SINBA.par,step = "ms_tab")
step 6 draw plots
SINBA.loadRData(SINBA.par = SINBA.par,step = "ms_tab")
plot.dir<-SINBA.par$out.dir.PLOT
#1.box plot
use_sinba_id<-"LCK:HDAC8"
seed.p<-signif(ms_tab$P.Value.Sensitive.Vs.Resistant_DA.seed[ms_tab$originalID==use_sinba_id],digit=3)
sinba.p<-signif(ms_tab$P.Value.Sensitive.Vs.Resistant_SINBA.DA[ms_tab$originalID==use_sinba_id],digit=3)
partner.p<-signif(ms_tab$P.Value.Sensitive.Vs.Resistant_DA.partner[ms_tab$originalID==use_sinba_id],digit=3)
g<-draw.box_SINBA(ac_mat_sinba = Biobase::exprs(SINBA.par$SINBA_AC.eset),
ac_mat_single=Biobase::exprs(SINBA.par$single_AC.eset),
SINBA_originalID=use_sinba_id,
phenotype_info=Biobase::pData(SINBA.par$SINBA_AC.eset),
group_col="group",
group_levels=c("Sensitive","Resistant"),
group_colors=c("red","blue"),
main = sprintf("%s, p.SINBA:%s; p.seed:%s; p.partner:%s",use_sinba_id,sinba.p,seed.p,partner.p))
ggsave(g,filename = sprintf("%s/box_%s_%s.pdf",plot.dir,use_sinba_id,Sys.Date()),width = 7,height = 3.5)
#2.NetBID GSEA
ms_tab<-SINBA.par$ms_tab
comp_name<-names(SINBA.par$DA)
partner_driver<-c("HDAC8","METTL15","PTPN4","NOTCH3","PIK3R3")
seed_driver<-"LCK"
DE <- SINBA.par$DE[[comp_name]]
driver_DA_Z <- SINBA.par$DA[[comp_name]][,'Z-statistics']
names(driver_DA_Z) <- rownames(SINBA.par$DA[[comp_name]])
driver_DE_Z <- SINBA.par$DE[[comp_name]][,'Z-statistics']
names(driver_DE_Z) <- rownames(SINBA.par$DE[[comp_name]])
tmp<-SINBA.par$SINBA.DA[[comp_name]]
DA_Z_merge <-tmp[grepl("^LCK",tmp$ID),'Z-statistics']
names(DA_Z_merge) <- rownames(tmp[grepl("^LCK",tmp$ID),])
diff_Z<-ms_tab$delta_Z.Sensitive.Vs.Resistant_SINBA[which(ms_tab$SINBA.seed=="LCK")]
names(diff_Z)<-ms_tab$originalID[which(ms_tab$SINBA.seed=="LCK")]
merge_target <- combineNet2target_list(target_list=SINBA.par$sinba.network$target_list,seed_name=seed_driver,partner_name=partner_driver,return_type="SINBA_target_list")
profile_col <- 'Z-statistics'
draw.NetBID_SINBA(DE=DE,name_col = "ID",profile_col=profile_col,
profile_trend='neg2pos',
seed_driver=seed_driver,
partner_driver_list=partner_driver,
seed_driver_label=seed_driver,
partner_driver_label=partner_driver,
driver_DA_Z=driver_DA_Z,
driver_DE_Z=driver_DE_Z,
target_list_merge=merge_target,
target_list=SINBA.par$sinba.network$target_list,
DA_Z_merge=DA_Z_merge,
diff_Z = diff_Z,
target_nrow=2,target_col='RdBu',target_col_type='PN',
left_annotation="Sensitive",right_annotation="Resistant",main="LCK combination Drivers",
profile_sig_thre=0,Z_sig_thre=1.64,pdf_file=sprintf("%s/netbid_GSEA_%s_%s.pdf",plot.dir,"LCK",Sys.Date()))
#3.ROC plot
draw.ROC_SINBA(ac_mat_sinba=Biobase::exprs(SINBA.par$SINBA_AC.eset),
ac_mat_single=Biobase::exprs(SINBA.par$single_AC.eset),
SINBA_originalID="LCK:HDAC8",
phenotype_info=Biobase::pData(SINBA.par$SINBA_AC.eset),
group_col="group",
G1_name="Sensitive",
G0_name="Resistant",
main="LCK:HDAC4",
pdf_file=sprintf("%s/ROC_%s_%s.pdf",plot.dir,"LCK_HDAC8",Sys.Date()))
#4. GSEA plot
lck_fyn<-combineNet2target_list(target_list=SINBA.par$sinba.network$target_list,seed_name="LCK",partner_name="FYN",return_type="SINBA_target_list")
genes_SINBA<-lck_fyn[[1]]$target
genes_seed<-SINBA.par$sinba.network$target_list$LCK$target
genes_partner<-SINBA.par$sinba.network$target_list$FYN$target
dir_SINBA<-sign(lck_fyn[[1]]$spearman)
dir_seed<-sign(SINBA.par$sinba.network$target_list$LCK$spearman)
dir_partner<-sign(SINBA.par$sinba.network$target_list$FYN$spearman)
rank_profile <- SINBA.par$DE[[comp_name]]$`Z-statistics`
names(rank_profile)<-SINBA.par$DE[[comp_name]]$ID
rank_profile<-rank_profile[order(-rank_profile)]
draw.GSEA_SINBA(rank_profile=rank_profile,use_genes_SINBA=genes_SINBA,use_genes_seed=genes_seed,use_genes_partner=genes_partner,use_direction=TRUE, use_direction_SINBA=dir_SINBA,use_direction_seed=dir_seed,use_direction_partner=dir_partner,main="LCK_FYN",pdf_file=sprintf("%s/GSEA_LCK_FYN.pdf",plot.dir),annotation=NULL,annotation_cex=1.2,left_annotation="Sensitive",right_annotation="Resistant")
draw.GSEA_SINBA(rank_profile=rank_profile,use_genes_SINBA=genes_SINBA,use_genes_seed=genes_seed,use_genes_partner=genes_partner,use_direction=FALSE, use_direction_SINBA=NULL,use_direction_seed=NULL,use_direction_partner=NULL,main="LCK_FYN",pdf_file=sprintf("%s/GSEA_LCK_FYN_v2.pdf",plot.dir),annotation=NULL,annotation_cex=1.2,left_annotation="Sensitive",right_annotation="Resistant")