Tutorial-4 Running DR analysis for large data

Perform pairwise comparisons on Gene level by jobs in parallel

Perform pairwise comparisons on Gene level by jobs in parallel

For whole genome libraries, it may cost sometime to run these pairwise comparisons sequentially. So we recommend to generate one R script for each comparisons and running them in parallel.

Define dependencies, function path (if package is not installed) , and input files.

library(ScreenBEAM2)

input.file<-"your_normalized_eset.tsv"
input.file<-normalizePath(input.file)
dir.tmp<-"DR_hpc/" # directory to store individual results

load([your_stored_eset])

# define your comparisons
d<-openxlsx::read.xlsx("your_CRISPR_comparisons.xlsx")
compare.pairs<-t(d) 

# turn off filtering 
for(i in 1:dim(compare.pairs)[2]){
  
  case.groupname<-compare.pairs[1,i]
  control.groupname<-compare.pairs[2,i]
  
  compare.name<-paste0(case.groupname,".vs.",control.groupname)

  case.sample.id<-which(eset$group==compare.pairs[1,i])
  control.sample.id<-which(eset$group==compare.pairs[2,i])
  
  control.samples<-eset$group[control.sample.id]
  case.samples<-eset$group[case.sample.id]
  
  inputs<-paste0("control.samples ='",control.samples ,"',case.samples = '", case.samples, 
              "',control.groupname='", control.groupname,"',case.groupname='",case.groupname,"'")

  other.args<-"data.type = 'NGS',gene.columnId=2, do.normalization=FALSE, filterLowCount=FALSE,
              count.cutoff=0, rna.size = 6, sample.rna.time = 100, pooling = 'partial', method='Bayesian'"
  
  fileR<-paste0('DR_hpc/',compare.name,".R")
  sink(fileR)
  cat('rm(list=ls())\n',sep='')
  cat('setwd(\'',normalizePath(dir.tmp),'\')\n',sep='')
  cat("source('dependencies.R')\n",sep='')
  cat('source(\'',func.path,'\')\n',sep='')
  cat('source(\'',func.util.path,'\')\n',sep='')
  cat('de.gene.list<-list()\n',sep='')
  cat('print(\'',compare.name,'\')\n',sep='')
  cat('de<-ScreenBEAM.gene.level(\'',input.file,'\',',inputs, ',', other.args,')\n',sep='')
  cat('names(de)[1]<-"geneID"\n',sep='')
  cat('de.gene.list[[\'',compare.name,'\']]<-de', '\n',sep='')
  cat('save(de.gene.list, file=\'DR.gene.c',i,'\')\n',sep = '')
  sink()
  
  compare.name<-gsub(".","_",compare.name,fixed = TRUE)
  
  file.sh<-file.path(dir.tmp,paste(compare.name,'sh',sep='.'))
  project.name<-compare.name
  memory<-paste('#BSUB -R \"rusage[mem=8000]\"')
  project<-paste('#BSUB -P ',project.name,'')
  
  sh.header<-paste(
    '#!/bin/bash',
    project,
    memory,
    '#BSUB -q standard',
    sep='\n')
  
  sink(file.sh)
  cat(sh.header,'\n', sep = '')
  cat('Rscript', normalizePath(fileR), sep=' ')
  sink()
  
  cmd<-paste('bsub < ',normalizePath(file.sh),'\n',sep='')
  system(cmd)
  system(paste('sleep ',1,'\n'))
  cat(cmd)
}

Tutorial-4 Running DR analysis for large data

Table of contents

Perform pairwise comparisons on Gene level by jobs in parallel