diff --git a/config/config.yml b/config/config.yml index 97d917b..8e1b3cf 100644 --- a/config/config.yml +++ b/config/config.yml @@ -13,8 +13,8 @@ repo: "/~https://github.com/snakemake-workflows/transriptome-differential-expressi ## Workflow-specific Parameters: -# Transcriptome fasta (absolute path) -transcriptome: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.fa" +# Genome fasta (absolute path) +genome: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.fa" # Annotation GFF/GTF (absolute path) annotation: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325.4_PGI_CHIRRI_v4_genomic.gff" # these samples ought to contain all samples comprising of the @@ -23,7 +23,7 @@ annotation: "/lustre/miifs01/project/m2_zdvhpc/transcriptome_data/GCA_917627325. max_cpus: 40 # Optional: NanoPlot QC using a summary file from the sequencer. If this file is not supplied, put the parameter to "None". It will then do an independent QC run per FASTQ input. -summary: None #"/lustre/project/m2_zdvhpc/transcriptome_data/seqencing_summary/sequencing_summary.txt" +summary: None # "/lustre/project/m2_zdvhpc/transcriptome_data/seqencing_summary/sequencing_summary.txt" # Minimap2 indexing options minimap_index_opts: "" @@ -56,27 +56,26 @@ min_feature_expr: 3 # The (log2) log fold change under the null hypothesis. (default: 0). lfc_null: 0.1 # -# The alternative hypothesis for computing wald p-values. By default, -# the normal Wald test assesses deviation of the estimated log fold -# change from the null hypothesis, as given by lfc_null. -# One of ["greaterAbs", "lessAbs", "greater", "less"] or None. -# The alternative hypothesis corresponds to what the user wants to +# The alternative hypothesis for computing wald p-values. By default, +# the normal Wald test assesses deviation of the estimated log fold +# change from the null hypothesis, as given by lfc_null. +# One of ["greaterAbs", "lessAbs", "greater", "less"] or None. +# The alternative hypothesis corresponds to what the user wants to # find rather than the null hypothesis. (default: None). alt_hypothesis: "greaterAbs" # -# The marker size in points**2 (typographic points are 1/72 in.). -# Default is rcParams['lines.markersize'] ** 2.# minimum count to +# The marker size in points**2 (typographic points are 1/72 in.). +# Default is rcParams['lines.markersize'] ** 2.# minimum count to # be considered for subsequent analysis point_width: 20 # # mincount: 10 # -# in addition to the full heatmap, plot the top number of different +# in addition to the full heatmap, plot the top number of different # values, ranked by the top ratio between the two traits threshold_plot: 10 # # the heatmap color map # see https://seaborn.pydata.org/tutorial/color_palettes.htm for an overview -colormap: "flare" - +colormap: "flare" diff --git a/workflow/Snakefile b/workflow/Snakefile index b78bef4..806781e 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -9,6 +9,7 @@ min_version("8.10.7") localrules: + genome_to_transcriptome, merge_counts, write_coldata, write_de_params, @@ -43,9 +44,25 @@ rule all: samstats=expand("QC/samstats/{sample}.txt", sample=samples["sample"]), +rule genome_to_transcriptome: + input: + genome=config["genome"], + annotation=config["annotation"], + output: + transcriptome="transcriptome/transcriptome.fa", + log: + "logs/gffread.log", + conda: + "envs/env.yml" + shell: + """ + gffread -t {resources.cpus_per_task} -w {output} -g {input.genome} {input.annotation} &> {log} + """ + + rule build_minimap_index: ## build minimap2 index input: - genome=config["transcriptome"], + transcriptome=rules.genome_to_transcriptome.output, output: index="index/transcriptome_index.mmi", params: @@ -56,7 +73,7 @@ rule build_minimap_index: ## build minimap2 index "envs/env.yml" shell: """ - minimap2 -t {resources.cpus_per_task} {params.opts} -d {output.index} {input.genome} 2> {log} + minimap2 -t {resources.cpus_per_task} {params.opts} -ax map-ont -d {output.index} {input.transcriptome} 2> {log} """ @@ -68,7 +85,7 @@ rule map_reads: samples["sample"][wildcards.sample] ), output: - "alignments/{sample}.bam", + "alignments/{sample}.sam", log: "logs/minimap2/mapping_{sample}.log", params: @@ -93,7 +110,7 @@ rule sam_sort: conda: "envs/env.yml" shell: - "samtools sort -@ {resources.cpus_per_task} {input.sam} -o {output} -O bam &> {log}" + "samtools view -@ {resources.cpus_per_task} -bS {input.sam} > {output} 2> {log}" rule sam_index: @@ -114,7 +131,7 @@ rule sam_index: rule sam_stats: input: - bam=rules.map_reads.output, + bam=rules.sam_sort.output, output: "QC/samstats/{sample}.txt", log: @@ -129,8 +146,8 @@ rule sam_stats: rule count_reads: input: - bam=rules.sam_index.output.ibam, - trs=config["transcriptome"], + bam=rules.sam_sort.output, + trs=rules.genome_to_transcriptome.output, output: tsv="counts/{sample}_salmon/quant.sf", params: @@ -142,8 +159,8 @@ rule count_reads: "envs/env.yml" shell: """ - salmon --no-version-check quant -p {resources.cpus_per_task} \ - -t {input.trs} -l {params.libtype} -a {input.bam} -o {params.tsv_dir} &> {log} + salmon --no-version-check quant --ont -p {resources.cpus_per_task} \ + -t {input.trs} -l {params.libtype} -a {input.bam} -o {params.tsv_dir} 2> {log} """ diff --git a/workflow/envs/env.yml b/workflow/envs/env.yml index 44dd4ff..4e962ce 100644 --- a/workflow/envs/env.yml +++ b/workflow/envs/env.yml @@ -27,4 +27,5 @@ dependencies: - pydeseq2=0.4.4 - anndata>=0.8.0 - ensureconda + - gffread>=0.12.7 diff --git a/workflow/profile/config.yaml b/workflow/profile/config.yaml index 75b0ac5..56d4d33 100644 --- a/workflow/profile/config.yaml +++ b/workflow/profile/config.yaml @@ -3,6 +3,11 @@ default-resources: slurm_partition: "smp" set-resources: + genome_to_transcriptome: + cpus_per_task: 1 + mem_mb_per_cpu: 1800 + runtime: "2h" + build_minimap_index: cpus_per_task: 4 mem_mb_per_cpu: 3600 @@ -15,9 +20,9 @@ set-resources: slurm_partition: "parallel" plot_samples: - cpus_per_task: 1 + cpus_per_task: 4 mem_mb_per_cpu: 1800 - runtime: "5h" + runtime: "3h" plot_all_samples: cpus_per_task: 8 @@ -25,7 +30,7 @@ set-resources: runtime: "2h" sam_sort: - cpus_per_task: 8 + cpus_per_task: 1 mem_mb_per_cpu: 1800 runtime: "1h"