Skip to content

Commit

Permalink
fix: read quantification (#22)
Browse files Browse the repository at this point in the history
* fix: start salmon in --ont mode

* fix: samtools creates BAM files without sorting for salmon

* feat: create transcriptome from genome.fa and annotation.gff

* fix: resources and linting

* fix: removed obsolete comment
  • Loading branch information
yeising authored Jun 19, 2024
1 parent 3237e2c commit 71422a1
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 25 deletions.
25 changes: 12 additions & 13 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ repo: "/~https://github.com/snakemake-workflows/transriptome-differential-expressi

## Workflow-specific Parameters:

# Transcriptome fasta (absolute path)
transcriptome: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.fa"
# Genome fasta (absolute path)
genome: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.fa"
# Annotation GFF/GTF (absolute path)
annotation: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.gff"
# these samples ought to contain all samples comprising of the
Expand All @@ -23,7 +23,7 @@ annotation: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.
max_cpus: 40

# Optional: NanoPlot QC using a summary file from the sequencer. If this file is not supplied, put the parameter to "None". It will then do an independent QC run per FASTQ input.
summary: None #"/lustre/project/m2_zdvhpc/transcriptome_data/seqencing_summary/sequencing_summary.txt"
summary: None # "/lustre/project/m2_zdvhpc/transcriptome_data/seqencing_summary/sequencing_summary.txt"

# Minimap2 indexing options
minimap_index_opts: ""
Expand Down Expand Up @@ -56,27 +56,26 @@ min_feature_expr: 3
# The (log2) log fold change under the null hypothesis. (default: 0).
lfc_null: 0.1
#
# The alternative hypothesis for computing wald p-values. By default,
# the normal Wald test assesses deviation of the estimated log fold
# change from the null hypothesis, as given by lfc_null.
# One of ["greaterAbs", "lessAbs", "greater", "less"] or None.
# The alternative hypothesis corresponds to what the user wants to
# The alternative hypothesis for computing wald p-values. By default,
# the normal Wald test assesses deviation of the estimated log fold
# change from the null hypothesis, as given by lfc_null.
# One of ["greaterAbs", "lessAbs", "greater", "less"] or None.
# The alternative hypothesis corresponds to what the user wants to
# find rather than the null hypothesis. (default: None).
alt_hypothesis: "greaterAbs"
#
# The marker size in points**2 (typographic points are 1/72 in.).
# Default is rcParams['lines.markersize'] ** 2.# minimum count to
# The marker size in points**2 (typographic points are 1/72 in.).
# Default is rcParams['lines.markersize'] ** 2.# minimum count to
# be considered for subsequent analysis
point_width: 20
#
#
mincount: 10
#
# in addition to the full heatmap, plot the top number of different
# in addition to the full heatmap, plot the top number of different
# values, ranked by the top ratio between the two traits
threshold_plot: 10
#
# the heatmap color map
# see https://seaborn.pydata.org/tutorial/color_palettes.htm for an overview
colormap: "flare"

colormap: "flare"
35 changes: 26 additions & 9 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ min_version("8.10.7")


localrules:
genome_to_transcriptome,
merge_counts,
write_coldata,
write_de_params,
Expand Down Expand Up @@ -43,9 +44,25 @@ rule all:
samstats=expand("QC/samstats/{sample}.txt", sample=samples["sample"]),


rule genome_to_transcriptome:
input:
genome=config["genome"],
annotation=config["annotation"],
output:
transcriptome="transcriptome/transcriptome.fa",
log:
"logs/gffread.log",
conda:
"envs/env.yml"
shell:
"""
gffread -t {resources.cpus_per_task} -w {output} -g {input.genome} {input.annotation} &> {log}
"""


rule build_minimap_index: ## build minimap2 index
input:
genome=config["transcriptome"],
transcriptome=rules.genome_to_transcriptome.output,
output:
index="index/transcriptome_index.mmi",
params:
Expand All @@ -56,7 +73,7 @@ rule build_minimap_index: ## build minimap2 index
"envs/env.yml"
shell:
"""
minimap2 -t {resources.cpus_per_task} {params.opts} -d {output.index} {input.genome} 2> {log}
minimap2 -t {resources.cpus_per_task} {params.opts} -ax map-ont -d {output.index} {input.transcriptome} 2> {log}
"""


Expand All @@ -68,7 +85,7 @@ rule map_reads:
samples["sample"][wildcards.sample]
),
output:
"alignments/{sample}.bam",
"alignments/{sample}.sam",
log:
"logs/minimap2/mapping_{sample}.log",
params:
Expand All @@ -93,7 +110,7 @@ rule sam_sort:
conda:
"envs/env.yml"
shell:
"samtools sort -@ {resources.cpus_per_task} {input.sam} -o {output} -O bam &> {log}"
"samtools view -@ {resources.cpus_per_task} -bS {input.sam} > {output} 2> {log}"


rule sam_index:
Expand All @@ -114,7 +131,7 @@ rule sam_index:

rule sam_stats:
input:
bam=rules.map_reads.output,
bam=rules.sam_sort.output,
output:
"QC/samstats/{sample}.txt",
log:
Expand All @@ -129,8 +146,8 @@ rule sam_stats:

rule count_reads:
input:
bam=rules.sam_index.output.ibam,
trs=config["transcriptome"],
bam=rules.sam_sort.output,
trs=rules.genome_to_transcriptome.output,
output:
tsv="counts/{sample}_salmon/quant.sf",
params:
Expand All @@ -142,8 +159,8 @@ rule count_reads:
"envs/env.yml"
shell:
"""
salmon --no-version-check quant -p {resources.cpus_per_task} \
-t {input.trs} -l {params.libtype} -a {input.bam} -o {params.tsv_dir} &> {log}
salmon --no-version-check quant --ont -p {resources.cpus_per_task} \
-t {input.trs} -l {params.libtype} -a {input.bam} -o {params.tsv_dir} 2> {log}
"""


Expand Down
1 change: 1 addition & 0 deletions workflow/envs/env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ dependencies:
- pydeseq2=0.4.4
- anndata>=0.8.0
- ensureconda
- gffread>=0.12.7

11 changes: 8 additions & 3 deletions workflow/profile/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ default-resources:
slurm_partition: "smp"

set-resources:
genome_to_transcriptome:
cpus_per_task: 1
mem_mb_per_cpu: 1800
runtime: "2h"

build_minimap_index:
cpus_per_task: 4
mem_mb_per_cpu: 3600
Expand All @@ -15,17 +20,17 @@ set-resources:
slurm_partition: "parallel"

plot_samples:
cpus_per_task: 1
cpus_per_task: 4
mem_mb_per_cpu: 1800
runtime: "5h"
runtime: "3h"

plot_all_samples:
cpus_per_task: 8
mem_mb_per_cpu: 1800
runtime: "2h"

sam_sort:
cpus_per_task: 8
cpus_per_task: 1
mem_mb_per_cpu: 1800
runtime: "1h"

Expand Down

0 comments on commit 71422a1

Please sign in to comment.