Link Search Menu Expand Document

Tutorial-4 Running DR analysis for large data

Table of contents

  1. Perform pairwise comparisons on Gene level by jobs in parallel

Perform pairwise comparisons on Gene level by jobs in parallel

For whole genome libraries, it may cost sometime to run these pairwise comparisons sequentially. So we recommend to generate one R script for each comparisons and running them in parallel.

Define dependencies, function path (if package is not installed) , and input files.

library(ScreenBEAM2)

input.file<-"your_normalized_eset.tsv"
input.file<-normalizePath(input.file)
dir.tmp<-"DR_hpc/" # directory to store individual results
load([your_stored_eset])

# define your comparisons
d<-openxlsx::read.xlsx("your_CRISPR_comparisons.xlsx")
compare.pairs<-t(d) 

# turn off filtering 
for(i in 1:dim(compare.pairs)[2]){
  
  case.groupname<-compare.pairs[1,i]
  control.groupname<-compare.pairs[2,i]
  
  compare.name<-paste0(case.groupname,".vs.",control.groupname)

  case.sample.id<-which(eset$group==compare.pairs[1,i])
  control.sample.id<-which(eset$group==compare.pairs[2,i])
  
  control.samples<-eset$group[control.sample.id]
  case.samples<-eset$group[case.sample.id]
  
  inputs<-paste0("control.samples ='",control.samples ,"',case.samples = '", case.samples, 
              "',control.groupname='", control.groupname,"',case.groupname='",case.groupname,"'")

  other.args<-"data.type = 'NGS',gene.columnId=2, do.normalization=FALSE, filterLowCount=FALSE,
              count.cutoff=0, rna.size = 6, sample.rna.time = 100, pooling = 'partial', method='Bayesian'"
  
  fileR<-paste0('DR_hpc/',compare.name,".R")
  sink(fileR)
  cat('rm(list=ls())\n',sep='')
  cat('setwd(\'',normalizePath(dir.tmp),'\')\n',sep='')
  cat("source('dependencies.R')\n",sep='')
  cat('source(\'',func.path,'\')\n',sep='')
  cat('source(\'',func.util.path,'\')\n',sep='')
  cat('de.gene.list<-list()\n',sep='')
  cat('print(\'',compare.name,'\')\n',sep='')
  cat('de<-ScreenBEAM.gene.level(\'',input.file,'\',',inputs, ',', other.args,')\n',sep='')
  cat('names(de)[1]<-"geneID"\n',sep='')
  cat('de.gene.list[[\'',compare.name,'\']]<-de', '\n',sep='')
  cat('save(de.gene.list, file=\'DR.gene.c',i,'\')\n',sep = '')
  sink()
  
  compare.name<-gsub(".","_",compare.name,fixed = TRUE)
  
  file.sh<-file.path(dir.tmp,paste(compare.name,'sh',sep='.'))
  project.name<-compare.name
  memory<-paste('#BSUB -R \"rusage[mem=8000]\"')
  project<-paste('#BSUB -P ',project.name,'')
  
  sh.header<-paste(
    '#!/bin/bash',
    project,
    memory,
    '#BSUB -q standard',
    sep='\n')
  
  sink(file.sh)
  cat(sh.header,'\n', sep = '')
  cat('Rscript', normalizePath(fileR), sep=' ')
  sink()
  
  cmd<-paste('bsub < ',normalizePath(file.sh),'\n',sep='')
  system(cmd)
  system(paste('sleep ',1,'\n'))
  cat(cmd)
}