From 4d5b8bcdd4fc98fc3c064f7f6d619cdf54b48e9c Mon Sep 17 00:00:00 2001 From: yeising <161250010+yeising@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:37:24 +0200 Subject: [PATCH] feat: map qc (#38) * feat: added qualimap rule + dependencies, moved sam_stats from Snakefile to qc.smk * fix: add rule all input for qualimap, linting * feat: added map_qc compression, moved sam_stats from Snakefile to qc.smk * fix: renamed rule in profile/config.yaml to align with rule name in qc.smk --- workflow/Snakefile | 24 ++++----------- workflow/envs/env.yml | 3 +- workflow/profile/config.yaml | 10 +++++++ workflow/rules/qc.smk | 57 +++++++++++++++++++++++++++++++++++- 4 files changed, 73 insertions(+), 21 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 806781e..ac46d82 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -42,6 +42,7 @@ rule all: de_heatmap="de_analysis/heatmap.svg", lfc_analysis="de_analysis/lfc_analysis.csv", samstats=expand("QC/samstats/{sample}.txt", sample=samples["sample"]), + map_qc=expand("QC/qualimap/{sample}.tar.gz", sample=samples["sample"]), rule genome_to_transcriptome: @@ -100,13 +101,13 @@ rule map_reads: """ -rule sam_sort: +rule sam_view: input: sam=rules.map_reads.output, output: "sorted_alignments/{sample}.bam", log: - "logs/samtools/samsort_{sample}.log", + "logs/samtools/samview_{sample}.log", conda: "envs/env.yml" shell: @@ -115,7 +116,7 @@ rule sam_sort: rule sam_index: input: - sbam=rules.sam_sort.output, + sbam=rules.sam_view.output, output: ibam="sorted_alignments/{sample}_index.bam", log: @@ -129,24 +130,9 @@ rule sam_index: """ -rule sam_stats: - input: - bam=rules.sam_sort.output, - output: - "QC/samstats/{sample}.txt", - log: - "logs/samtools/samstats_{sample}.log", - conda: - "envs/env.yml" - shell: - """ - samtools stats -@ {resources.cpus_per_task} {input.bam} > {output} 2> {log} - """ - - rule count_reads: input: - bam=rules.sam_sort.output, + bam=rules.sam_view.output, trs=rules.genome_to_transcriptome.output, output: tsv="counts/{sample}_salmon/quant.sf", diff --git a/workflow/envs/env.yml b/workflow/envs/env.yml index 4e962ce..ad28446 100644 --- a/workflow/envs/env.yml +++ b/workflow/envs/env.yml @@ -28,4 +28,5 @@ dependencies: - anndata>=0.8.0 - ensureconda - gffread>=0.12.7 - + - qualimap>=2.3 + - snakemake-wrapper-utils>=0.6.2 diff --git a/workflow/profile/config.yaml b/workflow/profile/config.yaml index 56d4d33..c54be69 100644 --- a/workflow/profile/config.yaml +++ b/workflow/profile/config.yaml @@ -29,7 +29,17 @@ set-resources: mem_mb_per_cpu: 1800 runtime: "2h" + map_qc: + cpus_per_task: 8 + mem_mb_per_cpu: 1800 + runtime: "1h" + sam_sort: + cpus_per_task: 4 + mem_mb_per_cpu: 1800 + runtime: "2h" + + sam_view: cpus_per_task: 1 mem_mb_per_cpu: 1800 runtime: "1h" diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index 4a0c62a..5aa5978 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -4,12 +4,13 @@ import os localrules: compress_nplot, compress_nplot_all, + compress_map_qc, configfile: "config/config.yml" -inputdir = config["inputdir"] #"/lustre/project/m2_zdvhpc/transcriptome_data/" +inputdir = config["inputdir"] # "/lustre/project/m2_zdvhpc/transcriptome_data/" # QC and metadata with NanoPlot @@ -108,3 +109,57 @@ else: None shell: "tar zcvf {output} {input} &> {log}" + + +rule sam_sort: + input: + sam="alignments/{sample}.sam", + output: + "sorted_alignments/{sample}_sorted.bam", + log: + "logs/samtools/samsort_{sample}.log", + conda: + "../envs/env.yml" + shell: + "samtools sort -@ {resources.cpus_per_task} {input.sam} -o {output} -O bam &> {log}" + + +rule map_qc: + input: + sorted_bam=rules.sam_sort.output, + output: + directory("QC/qualimap/{sample}"), + log: + "logs/qualimap/{sample}.log", + conda: + "../envs/env.yml" + wrapper: + "v3.12.1/bio/qualimap/bamqc" + + +rule compress_map_qc: + input: + map_qc=rules.map_qc.output, + output: + "QC/qualimap/{sample}.tar.gz", + log: + "logs/qualimap/compress_{sample}.log", + conda: + None + shell: + "tar zcvf {output} {input} &> {log}" + + +rule sam_stats: + input: + bam="sorted_alignments/{sample}.bam", + output: + "QC/samstats/{sample}.txt", + log: + "logs/samtools/samstats_{sample}.log", + conda: + "../envs/env.yml" + shell: + """ + samtools stats -@ {resources.cpus_per_task} {input.bam} > {output} 2> {log} + """