Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: read quantification #22

Merged
merged 5 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 12 additions & 13 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ repo: "/~https://github.com/snakemake-workflows/transriptome-differential-expressi

## Workflow-specific Parameters:

# Transcriptome fasta (absolute path)
transcriptome: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.fa"
# Genome fasta (absolute path)
genome: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.fa"
# Annotation GFF/GTF (absolute path)
annotation: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.gff"
# these samples ought to contain all samples comprising of the
Expand All @@ -23,7 +23,7 @@ annotation: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.
max_cpus: 40

# Optional: NanoPlot QC using a summary file from the sequencer. If this file is not supplied, put the parameter to "None". It will then do an independent QC run per FASTQ input.
summary: None #"/lustre/project/m2_zdvhpc/transcriptome_data/seqencing_summary/sequencing_summary.txt"
summary: None # "/lustre/project/m2_zdvhpc/transcriptome_data/seqencing_summary/sequencing_summary.txt"

# Minimap2 indexing options
minimap_index_opts: ""
Expand Down Expand Up @@ -56,27 +56,26 @@ min_feature_expr: 3
# The (log2) log fold change under the null hypothesis. (default: 0).
lfc_null: 0.1
#
# The alternative hypothesis for computing wald p-values. By default,
# the normal Wald test assesses deviation of the estimated log fold
# change from the null hypothesis, as given by lfc_null.
# One of ["greaterAbs", "lessAbs", "greater", "less"] or None.
# The alternative hypothesis corresponds to what the user wants to
# The alternative hypothesis for computing wald p-values. By default,
# the normal Wald test assesses deviation of the estimated log fold
# change from the null hypothesis, as given by lfc_null.
# One of ["greaterAbs", "lessAbs", "greater", "less"] or None.
# The alternative hypothesis corresponds to what the user wants to
# find rather than the null hypothesis. (default: None).
alt_hypothesis: "greaterAbs"
#
# The marker size in points**2 (typographic points are 1/72 in.).
# Default is rcParams['lines.markersize'] ** 2.# minimum count to
# The marker size in points**2 (typographic points are 1/72 in.).
# Default is rcParams['lines.markersize'] ** 2.# minimum count to
# be considered for subsequent analysis
point_width: 20
#
#
mincount: 10
#
# in addition to the full heatmap, plot the top number of different
# in addition to the full heatmap, plot the top number of different
# values, ranked by the top ratio between the two traits
threshold_plot: 10
#
# the heatmap color map
# see https://seaborn.pydata.org/tutorial/color_palettes.htm for an overview
colormap: "flare"

colormap: "flare"
35 changes: 26 additions & 9 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ min_version("8.10.7")


localrules:
genome_to_transcriptome,
merge_counts,
write_coldata,
write_de_params,
Expand Down Expand Up @@ -43,9 +44,25 @@ rule all:
samstats=expand("QC/samstats/{sample}.txt", sample=samples["sample"]),


rule genome_to_transcriptome:
input:
genome=config["genome"],
annotation=config["annotation"],
output:
transcriptome="transcriptome/transcriptome.fa",
log:
"logs/gffread.log",
conda:
"envs/env.yml"
shell:
"""
gffread -t {resources.cpus_per_task} -w {output} -g {input.genome} {input.annotation} &> {log}
"""


rule build_minimap_index: ## build minimap2 index
input:
genome=config["transcriptome"],
transcriptome=rules.genome_to_transcriptome.output,
output:
index="index/transcriptome_index.mmi",
params:
Expand All @@ -56,7 +73,7 @@ rule build_minimap_index: ## build minimap2 index
"envs/env.yml"
shell:
"""
minimap2 -t {resources.cpus_per_task} {params.opts} -d {output.index} {input.genome} 2> {log}
minimap2 -t {resources.cpus_per_task} {params.opts} -ax map-ont -d {output.index} {input.transcriptome} 2> {log}
"""


Expand All @@ -68,7 +85,7 @@ rule map_reads:
samples["sample"][wildcards.sample]
),
output:
"alignments/{sample}.bam",
"alignments/{sample}.sam",
log:
"logs/minimap2/mapping_{sample}.log",
params:
Expand All @@ -93,7 +110,7 @@ rule sam_sort:
conda:
"envs/env.yml"
shell:
"samtools sort -@ {resources.cpus_per_task} {input.sam} -o {output} -O bam &> {log}"
"samtools view -@ {resources.cpus_per_task} -bS {input.sam} > {output} 2> {log}"


rule sam_index:
Expand All @@ -114,7 +131,7 @@ rule sam_index:

rule sam_stats:
input:
bam=rules.map_reads.output,
bam=rules.sam_sort.output,
output:
"QC/samstats/{sample}.txt",
log:
Expand All @@ -129,8 +146,8 @@ rule sam_stats:

rule count_reads:
input:
bam=rules.sam_index.output.ibam,
trs=config["transcriptome"],
bam=rules.sam_sort.output, # use unsorted .bam files; minimap2 does not create .bam -> new input needed; also filtering may be necessary (cutadapt)
trs=rules.genome_to_transcriptome.output,
output:
tsv="counts/{sample}_salmon/quant.sf",
params:
Expand All @@ -142,8 +159,8 @@ rule count_reads:
"envs/env.yml"
shell:
"""
salmon --no-version-check quant -p {resources.cpus_per_task} \
-t {input.trs} -l {params.libtype} -a {input.bam} -o {params.tsv_dir} &> {log}
salmon --no-version-check quant --ont -p {resources.cpus_per_task} \
-t {input.trs} -l {params.libtype} -a {input.bam} -o {params.tsv_dir} 2> {log}
"""


Expand Down
1 change: 1 addition & 0 deletions workflow/envs/env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ dependencies:
- pydeseq2=0.4.4
- anndata>=0.8.0
- ensureconda
- gffread>=0.12.7

11 changes: 8 additions & 3 deletions workflow/profile/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ default-resources:
slurm_partition: "smp"

set-resources:
genome_to_transcriptome:
cpus_per_task: 1
mem_mb_per_cpu: 1800
runtime: "2h"

build_minimap_index:
cpus_per_task: 4
mem_mb_per_cpu: 3600
Expand All @@ -15,17 +20,17 @@ set-resources:
slurm_partition: "parallel"

plot_samples:
cpus_per_task: 1
cpus_per_task: 4
mem_mb_per_cpu: 1800
runtime: "5h"
runtime: "3h"

plot_all_samples:
cpus_per_task: 8
mem_mb_per_cpu: 1800
runtime: "2h"

sam_sort:
cpus_per_task: 8
cpus_per_task: 1
mem_mb_per_cpu: 1800
runtime: "1h"

Expand Down
Loading