-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_snakemake.sh
44 lines (38 loc) · 2.25 KB
/
run_snakemake.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/bash
##### 0. HOW TO RUN#
# verify the CONTAINER in section 1 below is valid
# check requested #nodes, memory, time etc. in section 3 and 4
# save and exit
# run this shell script from command line with the following command
# bash run_snakemake.sh
##### 1. USER INPUTS
CONTAINER=/oak/stanford/groups/wjg/bliu/containers/atac.sif
##### 2. PREP
# unlock working directory, dry run to check code, should see a lot of green text displayed
${CONTAINER} "snakemake --unlock -s Snakefile.py; snakemake -ns Snakefile.py"
##### 3. SINGLES
# for computation-intensive tasks that require no info from other samples in the group
# (e.g. alignment, peak calling), split meta file into single-sample files and
# submit individual snakemake jobs to slurm. store the job IDs so the group analysis
# only starts after all jobs are completed.
# NOTE: the single/group split analysis was implemented because we had difficulties
# calling sbatch from within the container. This also enables greater portability to
# non-slurm computing clusters in the future, e.g. google cloud, aws)
META=$(grep "METADATA_FILE = " snakeATAC_config.py |tr "'\"" "\n"| sed -n "2p")
rm -rf .tmp; mkdir .tmp
mkdir -p log
for ((NUM=2; NUM<=$(wc -l < $META); NUM++))
do
METAPATH=.tmp/tmp_meta_$(echo $((NUM-1))).txt
sed -n "1p;${NUM}p" $META > ${METAPATH}
name=$(sed -n "${NUM}p" $META | cut -f1)
# wrap sbatch in another bash because sbatch exits shell immediately after job submission
SNAKE_CMD="snakemake --rerun-incomplete --nolock -T -p -j 10 -s Snakefile.py --config RULE_GROUP='single' META=${METAPATH}"
bash -c "sbatch --parsable -p sfgf,wjg,biochem -n 8 -t 24:00:00 --job-name=${name} -o ./log/slurm-%j-${name}.out --mem-per-cpu 64g --wrap \"${CONTAINER} ${SNAKE_CMD}\" >> .tmp/tmp_joblist.txt"
done
echo "Submitted single analysis jobs:"
cat .tmp/tmp_joblist.txt
##### 4. GROUP
# after all single-sample jobs are completed, run group analysis tasks
SNAKE_CMD_GROUP="snakemake -T -p -j 10 -s Snakefile.py --config RULE_GROUP='group' META=${META}"
sbatch --dependency=afterok:$(cat .tmp/tmp_joblist.txt|tr '\n' ',' | sed 's/,$/\n/') -p sfgf,wjg,biochem -n 8 -t 24:00:00 -o ./log/slurm-%j-group.out --job-name=snake_group --mem-per-cpu 64g --wrap "${CONTAINER} '${SNAKE_CMD_GROUP}'"