T-ALL project


Download test data to your local folder.

step 0 creat project space

library(SINBA)
library(openxlsx)
library(ggplot2)

test.dir<-"path to your local testing data folder"

current_date<-format(Sys.time(),"%Y-%m-%d")
project_main_dir <- test.dir
project_name <-  'SINBA_TALL.test'
## SINBA.par is very essential in the analysis!!
if(exists('SINBA.par')==TRUE) rm(SINBA.par)
SINBA.par  <- SINBA.analysis.dir.create(project_main_dir=project_main_dir, project_name=project_name,tf.network.file=sprintf("%s/TALL_TFnet.txt",test.dir),sig.network.file=sprintf("%s/TALL_SIGnet.txt",test.dir))

step 1 network-load and network-combine

tf.network  <- get.single.network(SINBA.par$tf.network.file)
sig.network<-get.single.network(SINBA.par$sig.network.file)

SINBA.par$sinba.network<-get.combined.network(network1 = tf.network,network2 = sig.network)

SINBA.saveRData(SINBA.par = SINBA.par,step = "network-load")

step 2 exp_load, get activity and get ftest

SINBA.loadRData(SINBA.par = SINBA.par,step = "network-load")

eset<-readRDS(sprintf("%s/Das_TALL.eset.Rds",test.dir))
SINBA.par$cal.eset<-eset

#load drug database and test if these genes have phase4 drugs
seed_genes<-c("BCL2","LCK")
SINBA::drugdb.preload()
seed_drug1<-selectSP_driver_byDB(genes = seed_genes,select_type="drug_phase",database="Drugbank_V5.1",select_phase="approved")

seed_drug2<-selectSP_driver_byDB(genes = seed_genes,select_type="drug_phase",database="ChEMBL",select_phase="4")

sinba_id<-combineNet2target_list(target_list=SINBA.par$sinba.network$target_list,seed_name=seed_genes,partner_name=NULL,return_type="SINBA_originalID")

SINBA.par$SINBA_ID<-sinba_id

#S-P driver-pair activity
sinba_ac_mat<-cal.SINBA.Activity(SINBA_originalID=sinba_id$originalID, target_list=SINBA.par$sinba.network$target_list, cal_mat=Biobase::exprs(SINBA.par$cal.eset), es.method = 'weightedmean',std=TRUE)

SINBA.par$SINBA_AC.eset<-generate.SINBA.eset(exp_mat = sinba_ac_mat,phenotype_info = Biobase::pData(SINBA.par$cal.eset))

#single driver activity
single_ac_mat<-cal.SINBA.Activity(target_list=SINBA.par$sinba.network$target_list, cal_mat=Biobase::exprs(SINBA.par$cal.eset), es.method = 'weightedmean',std=TRUE)

SINBA.par$single_AC.eset<-generate.SINBA.eset(exp_mat = single_ac_mat,phenotype_info = Biobase::pData(SINBA.par$cal.eset))

#Seed and Partner regulon target overlap testing
N<-length(unique(c(SINBA.par$sinba.network$network_dat$source,SINBA.par$sinba.network$network_dat$target)))

SINBA.ftest<-seed2partners_ftest(SINBA_originalID=SINBA.par$SINBA_ID$originalID,target_list=SINBA.par$sinba.network$target_list,N=N,padjust_method="BH")
SINBA.par$SINBA.ftest<-SINBA.ftest

SINBA.saveRData(SINBA.par = SINBA.par,step = "get_ac")

step 3 differential activity and differential expression analysis

SINBA.loadRData(SINBA.par = SINBA.par,step = "get_ac")

phe_info <- Biobase::pData(SINBA.par$cal.eset)
SINBA.par$DA <- list()
SINBA.par$DE<-list()
SINBA.par$SINBA.DA<-list()
# get compared sample names
all_subgroup <- unique(phe_info$group) ## for example need to get subgroup-specific drivers
comps<-rbind(c("Sensitive","Resistant"))
for(i in 1:dim(comps)[1]){
  comp_name <- sprintf('%s.Vs.%s',comps[i,1],comps[i,2]) ## each comparison must give a name !!!
  G0  <- rownames(phe_info)[which(phe_info$group==comps[i,2])] # get sample list for G0
  G1  <- rownames(phe_info)[which(phe_info$group==comps[i,1])] # get sample list for G1
  # get DA/DE
  DA_single_driver_limma <- get.SINBA.DE.2G(eset=SINBA.par$single_AC.eset,G1=G1,G0=G0,G1_name=comps[i,1],G0_name=comps[i,2],verbose=TRUE,do.log2=F,test.use="limma",random_effect=NULL)

  DA_driver_limma <- get.SINBA.DE.2G(eset=SINBA.par$SINBA_AC.eset,G1=G1,G0=G0,G1_name=comps[i,1],G0_name=comps[i,2],verbose=TRUE,do.log2=F,test.use="limma",random_effect=NULL)

  DE_limma <-  get.SINBA.DE.2G(eset=SINBA.par$cal.eset,G1=G1,G0=G0,G1_name=comps[i,1],G0_name=comps[i,2],verbose=TRUE,do.log2=F,test.use="limma",random_effect=NULL)
  # save to SINBA.par
  SINBA.par$DA[[comp_name]] <- DA_single_driver_limma
  SINBA.par$SINBA.DA[[comp_name]]<-DA_driver_limma
  SINBA.par$DE[[comp_name]]<-DE_limma
}

SINBA.saveRData(SINBA.par = SINBA.par,step = "get_DA")

step 4 (optional) ROCtest and check druggable top partner genes

SINBA.loadRData(SINBA.par = SINBA.par,step = "get_DA")
#ROCtest
pair.AUC<-list()
single.AUC<-list()
for(i in 1:dim(comps)[1]){
  comp_name <- sprintf('%s.Vs.%s',comps[i,1],comps[i,2]) ## each comparison must give a name !!!
  G0  <- rownames(phe_info)[which(phe_info$group==comps[i,2])] # get sample list for G0
  G1  <- rownames(phe_info)[which(phe_info$group==comps[i,1])] # get sample list for G1

  pair.AUC[[comp_name]]<-ROCTest(eset=SINBA.par$SINBA_AC.eset,G0=G0,G1=G1)
  single.AUC[[comp_name]]<-ROCTest(eset=SINBA.par$single_AC.eset,G0=G0,G1=G1)
}

use_comp<-names(pair.AUC)
auc_tab<-generate.AUCTable(pair.AUC = pair.AUC,single.AUC = single.AUC,use_comp = use_comp)
auc_tab<-auc_tab[order(auc_tab$delta_AUCpower.Sensitive.Vs.Resistant,decreasing = T),]

SINBA.par$AUC_tab<-auc_tab
write.xlsx(SINBA.par$AUC_tab,file=sprintf("%s/AUC_tab_%s.xlsx",SINBA.par$out.dir.DATA,Sys.Date()))

#check druggable partners
use_auc_tab<-auc_tab[auc_tab$SINBA.seed=="LCK"&auc_tab$AUC.Sensitive.Vs.Resistant.partner>0.5,]
use_auc_tab<-use_auc_tab[order(use_auc_tab$AUC.Sensitive.Vs.Resistant.partner,decreasing = T),]
top_partner<-unique(use_auc_tab$SINBA.partner[1:150])

partner_drug1<-selectSP_driver_byDB(genes = top_partner,select_type="drug_phase",database="ChEMBL",select_phase=ChEMBL_drugphase_levels())

partner_drug2<-selectSP_driver_byDB(genes = top_partner,select_type="drug_phase",database="Repurposing_HUB",select_phase=Repurposing_HUB_drugphase_levels())

partner_drug3<-selectSP_driver_byDB(genes = top_partner,select_type="drug_phase",database="Drugbank_V5.1",select_phase=Drugbank_drugphase_levels())

step 5 generate master table

SINBA.loadRData(SINBA.par = SINBA.par,step = "get_DA")
use_comp<-names(SINBA.par$DE)
ms_tab<-generate.masterTable.SINBA(use_comp =use_comp,DA.SINBA = SINBA.par$SINBA.DA,DA=SINBA.par$DA,DE=SINBA.par$DE,ftest = SINBA.par$SINBA.ftest,z_col='Z-statistics',display_col=c('logFC','P.Value'),deltaZ_method = "max")
SINBA.par$ms_tab<-ms_tab
write.xlsx(SINBA.par$ms_tab,file=sprintf("%s/ms_tab_max_%s.xlsx",SINBA.par$out.dir.DATA,Sys.Date()))

SINBA.saveRData(SINBA.par = SINBA.par,step = "ms_tab")

step 6 draw plots

SINBA.loadRData(SINBA.par = SINBA.par,step = "ms_tab")
plot.dir<-SINBA.par$out.dir.PLOT
#1.box plot
use_sinba_id<-"LCK:HDAC8"
seed.p<-signif(ms_tab$P.Value.Sensitive.Vs.Resistant_DA.seed[ms_tab$originalID==use_sinba_id],digit=3)
sinba.p<-signif(ms_tab$P.Value.Sensitive.Vs.Resistant_SINBA.DA[ms_tab$originalID==use_sinba_id],digit=3)
partner.p<-signif(ms_tab$P.Value.Sensitive.Vs.Resistant_DA.partner[ms_tab$originalID==use_sinba_id],digit=3)

g<-draw.box_SINBA(ac_mat_sinba = Biobase::exprs(SINBA.par$SINBA_AC.eset),
                  ac_mat_single=Biobase::exprs(SINBA.par$single_AC.eset),
                  SINBA_originalID=use_sinba_id,
                  phenotype_info=Biobase::pData(SINBA.par$SINBA_AC.eset),
                  group_col="group",
                  group_levels=c("Sensitive","Resistant"),
                  group_colors=c("red","blue"),
                  main = sprintf("%s, p.SINBA:%s; p.seed:%s; p.partner:%s",use_sinba_id,sinba.p,seed.p,partner.p))
ggsave(g,filename = sprintf("%s/box_%s_%s.pdf",plot.dir,use_sinba_id,Sys.Date()),width = 7,height = 3.5)

#2.NetBID GSEA
ms_tab<-SINBA.par$ms_tab
comp_name<-names(SINBA.par$DA)
partner_driver<-c("HDAC8","METTL15","PTPN4","NOTCH3","PIK3R3")
seed_driver<-"LCK"

DE <- SINBA.par$DE[[comp_name]]

driver_DA_Z <- SINBA.par$DA[[comp_name]][,'Z-statistics']
names(driver_DA_Z) <- rownames(SINBA.par$DA[[comp_name]])
driver_DE_Z <- SINBA.par$DE[[comp_name]][,'Z-statistics']
names(driver_DE_Z) <- rownames(SINBA.par$DE[[comp_name]])

tmp<-SINBA.par$SINBA.DA[[comp_name]]
DA_Z_merge <-tmp[grepl("^LCK",tmp$ID),'Z-statistics']
names(DA_Z_merge) <- rownames(tmp[grepl("^LCK",tmp$ID),])

diff_Z<-ms_tab$delta_Z.Sensitive.Vs.Resistant_SINBA[which(ms_tab$SINBA.seed=="LCK")]
names(diff_Z)<-ms_tab$originalID[which(ms_tab$SINBA.seed=="LCK")]

merge_target <- combineNet2target_list(target_list=SINBA.par$sinba.network$target_list,seed_name=seed_driver,partner_name=partner_driver,return_type="SINBA_target_list")

profile_col <- 'Z-statistics'


draw.NetBID_SINBA(DE=DE,name_col = "ID",profile_col=profile_col,
                  profile_trend='neg2pos',
                  seed_driver=seed_driver,
                  partner_driver_list=partner_driver,
                  seed_driver_label=seed_driver,
                  partner_driver_label=partner_driver,
                  driver_DA_Z=driver_DA_Z,
                  driver_DE_Z=driver_DE_Z,
                  target_list_merge=merge_target,
                  target_list=SINBA.par$sinba.network$target_list,
                  DA_Z_merge=DA_Z_merge,
                  diff_Z = diff_Z,
                  target_nrow=2,target_col='RdBu',target_col_type='PN',
                  left_annotation="Sensitive",right_annotation="Resistant",main="LCK combination Drivers",
                  profile_sig_thre=0,Z_sig_thre=1.64,pdf_file=sprintf("%s/netbid_GSEA_%s_%s.pdf",plot.dir,"LCK",Sys.Date()))


#3.ROC plot
draw.ROC_SINBA(ac_mat_sinba=Biobase::exprs(SINBA.par$SINBA_AC.eset),
               ac_mat_single=Biobase::exprs(SINBA.par$single_AC.eset),
               SINBA_originalID="LCK:HDAC8",
               phenotype_info=Biobase::pData(SINBA.par$SINBA_AC.eset),
               group_col="group",
               G1_name="Sensitive",
               G0_name="Resistant",
               main="LCK:HDAC4",
               pdf_file=sprintf("%s/ROC_%s_%s.pdf",plot.dir,"LCK_HDAC8",Sys.Date()))

#4. GSEA plot
lck_fyn<-combineNet2target_list(target_list=SINBA.par$sinba.network$target_list,seed_name="LCK",partner_name="FYN",return_type="SINBA_target_list")

genes_SINBA<-lck_fyn[[1]]$target
genes_seed<-SINBA.par$sinba.network$target_list$LCK$target
genes_partner<-SINBA.par$sinba.network$target_list$FYN$target
dir_SINBA<-sign(lck_fyn[[1]]$spearman)
dir_seed<-sign(SINBA.par$sinba.network$target_list$LCK$spearman)
dir_partner<-sign(SINBA.par$sinba.network$target_list$FYN$spearman)
rank_profile <- SINBA.par$DE[[comp_name]]$`Z-statistics`
names(rank_profile)<-SINBA.par$DE[[comp_name]]$ID

rank_profile<-rank_profile[order(-rank_profile)]
draw.GSEA_SINBA(rank_profile=rank_profile,use_genes_SINBA=genes_SINBA,use_genes_seed=genes_seed,use_genes_partner=genes_partner,use_direction=TRUE, use_direction_SINBA=dir_SINBA,use_direction_seed=dir_seed,use_direction_partner=dir_partner,main="LCK_FYN",pdf_file=sprintf("%s/GSEA_LCK_FYN.pdf",plot.dir),annotation=NULL,annotation_cex=1.2,left_annotation="Sensitive",right_annotation="Resistant")

draw.GSEA_SINBA(rank_profile=rank_profile,use_genes_SINBA=genes_SINBA,use_genes_seed=genes_seed,use_genes_partner=genes_partner,use_direction=FALSE, use_direction_SINBA=NULL,use_direction_seed=NULL,use_direction_partner=NULL,main="LCK_FYN",pdf_file=sprintf("%s/GSEA_LCK_FYN_v2.pdf",plot.dir),annotation=NULL,annotation_cex=1.2,left_annotation="Sensitive",right_annotation="Resistant")