Skip to content

Commit

Permalink
everything everywhere all at once, sigh
Browse files Browse the repository at this point in the history
  • Loading branch information
sorelfitzgibbon committed Nov 1, 2023
1 parent f960817 commit a46299e
Show file tree
Hide file tree
Showing 12 changed files with 165 additions and 106 deletions.
20 changes: 15 additions & 5 deletions config/methods.config
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ methods {
}
}

sanitize_string = { raw ->
if (![String, GString].any{ raw in it }) {
throw new Exception("Input to sanitize is either empty or not a string! Provide a non-empty string.")
}
def disallowed_characters = /[^a-zA-Z\d\/_.-]/
return raw.replaceAll(disallowed_characters, '').replace('_', '-')
}

get_ids_from_bams = {
params.samples_to_process = [] as Set
params.input.each { k, v ->
Expand All @@ -103,7 +111,8 @@ methods {
if (sm_tags.size() > 1) {
throw new Exception("${bam_path} contains multiple samples! Please run pipeline with single sample BAMs.")
}
params.samples_to_process.add(['id': sm_tags[0], 'path': bam_path, 'sample_type': k])
sm_tag = methods.sanitize_string(sm_tags[0])
params.samples_to_process.add(['orig-id': sm_tags[0], 'id': sm_tag, 'path': bam_path, 'sample_type': k])
}
}
}
Expand All @@ -116,6 +125,7 @@ methods {

if (!params?['input']?['normal']?['BAM'] ) {
params.tumor_only_mode = true
params.input['normal'] = [BAM: "${params.work_dir}/NO_FILE.bam"]
} else {
if ( params.input['tumor'].size() > 1 ) {
params.multi_tumor_sample = true
Expand All @@ -126,16 +136,16 @@ methods {
}

if (params.multi_tumor_sample || params.multi_normal_sample) {
params.sample_name = params.patient_id
params.sample_id = params.patient_id
} else {
def tumorSample = params.samples_to_process.find { it['sample_type'] == 'tumor' }
if (tumorSample) {
params.sample_name = tumorSample['id']
params.sample_id = tumorSample['id']
params.tumor_id = params.sample_id
} else {
throw new Exception("Warning: No sample with sample_type 'tumor' found in ${params.samples_to_process}")
}
if (params.tumor_only_mode) {
params.input['normal']['BAM'] = "${params.work_dir}/NO_FILE.bam"
params.normal_id = 'Empty_id'
} else {
params.normal_id = params.samples_to_process.find { it['sample_type'] == 'normal' }['id']
Expand Down Expand Up @@ -181,7 +191,7 @@ methods {
set_output_directory = {
def tz = TimeZone.getTimeZone("UTC")
def date = new Date().format("yyyyMMdd'T'HHmmss'Z'", tz)
params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample_name}"
params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample_id}"
params.log_output_dir = "${params.output_dir_base}/log-${manifest.name}-${manifest.version}-${date}"
}

Expand Down
2 changes: 0 additions & 2 deletions input/call-sSNV-template-overwrite.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ patient_id: 'patient_id'
# For multi samples, list the BAMs under the corresponding state (normal or tumor).
input:
normal:
- id: normal_sample_id
BAM: /path/to/normal.bam

Check failure on line 6 in input/call-sSNV-template-overwrite.yaml

View workflow job for this annotation

GitHub Actions / CICD-base

6:7 [indentation] wrong indentation: expected 4 but found 6
tumor:
- id: tumor_sample_id
BAM: /path/to/tumor.bam

Check failure on line 8 in input/call-sSNV-template-overwrite.yaml

View workflow job for this annotation

GitHub Actions / CICD-base

8:7 [indentation] wrong indentation: expected 4 but found 6
contamination_table: /path/to/contamination.table
# Add extra parameters to overwrite parameters in template.config
Expand Down
3 changes: 0 additions & 3 deletions input/example-test-multi-sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,9 @@ patient_id: 'TWGSAMIN000001'
# For multi samples, list the BAMs under the corresponding state (normal or tumor).
input:
normal:
- id: 'TWGSAMIN000001-N001-S01-F'
BAM: /hot/resource/SMC-HET/normal/bams/A-mini/n2/output/HG002.N-n2.bam

Check failure on line 6 in input/example-test-multi-sample.yaml

View workflow job for this annotation

GitHub Actions / CICD-base

6:7 [indentation] wrong indentation: expected 4 but found 6
tumor:
- id: 'TWGSAMIN000001-T001-S01-F'
BAM: /hot/resource/SMC-HET/tumours/A-mini/bams/n2/output/S2.T-n2.bam

Check failure on line 8 in input/example-test-multi-sample.yaml

View workflow job for this annotation

GitHub Actions / CICD-base

8:7 [indentation] wrong indentation: expected 4 but found 6
contamination_table: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/input/data/A-mini/S2.T-n2_getpileupsummaries_calculatecontamination.table
- id: 'TWGSAMIN000001-T002-S02-F'
BAM: /hot/resource/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam

Check failure on line 10 in input/example-test-multi-sample.yaml

View workflow job for this annotation

GitHub Actions / CICD-base

10:7 [key-duplicates] duplication of key "BAM" in mapping
contamination_table: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/input/data/A-mini/S2.T-n1_getpileupsummaries_calculatecontamination.table

Check failure on line 11 in input/example-test-multi-sample.yaml

View workflow job for this annotation

GitHub Actions / CICD-base

11:7 [key-duplicates] duplication of key "contamination_table" in mapping
1 change: 0 additions & 1 deletion input/example-test-tumor-only.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,5 @@
patient_id: 'TWGSAMIN000001'
input:
tumor:
- id: 'TWGSAMIN000001-T001-S01-F'
BAM: /hot/resource/SMC-HET/tumours/A-mini/bams/n2/output/S2.T-n2.bam

Check failure on line 5 in input/example-test-tumor-only.yaml

View workflow job for this annotation

GitHub Actions / CICD-base

5:7 [indentation] wrong indentation: expected 4 but found 6
contamination_table: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/input/data/A-mini/S2.T-n1_getpileupsummaries_calculatecontamination.table
2 changes: 0 additions & 2 deletions input/example-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ patient_id: 'TWGSAMIN000001'
# For multi samples, just list all the bams under the normal or tumor.
input:
normal:
- id: 'TWGSAMIN000001-N001-S01-F'
BAM: /hot/resource/SMC-HET/normal/bams/A-mini/n2/output/HG002.N-n2.bam

Check failure on line 6 in input/example-test.yaml

View workflow job for this annotation

GitHub Actions / CICD-base

6:7 [indentation] wrong indentation: expected 4 but found 6
tumor:
- id: 'TWGSAMIN000001-T001-S01-F'
BAM: /hot/resource/SMC-HET/tumours/A-mini/bams/n2/output/S2.T-n2.bam

Check failure on line 8 in input/example-test.yaml

View workflow job for this annotation

GitHub Actions / CICD-base

8:7 [indentation] wrong indentation: expected 4 but found 6
contamination_table: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/input/data/A-mini/S2.T-n1_getpileupsummaries_calculatecontamination.table
64 changes: 26 additions & 38 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,6 @@ include { run_validate_PipeVal } from './external/pipeline-Nextflow-module/modul
params.reference_index = "${params.reference}.fai"
params.reference_dict = "${file(params.reference).parent / file(params.reference).baseName}.dict"

params.sample_id = sanitize_string(params.sample_name)
params.output_base = params.output_dir_base.replace(params.sample_name, params.sample_id)
params.log_output = params.log_output_dir.replace(params.sample_name, params.sample_id)

if (params.single_NT_paired) {
def tumorSample = params.samples_to_process.find { it['sample_type'] == 'tumor' }
params.tumor_id = sanitize_string(tumorSample['id'])
def normalSample = params.samples_to_process.find { it['sample_type'] == 'normal' }
params.normal_id = sanitize_string(normalSample['id'])
}

log.info """\
------------------------------------
C A L L - S S N V P I P E L I N E
Expand All @@ -46,8 +35,8 @@ log.info """\
intersect_regions: ${params.intersect_regions}
- output:
output_dir: ${params.output_base}
log_output_dir: ${params.log_output}
output_dir: ${params.output_dir_base}
log_output_dir: ${params.log_output_dir}
- option:
ucla_cds: ${params.ucla_cds}
Expand All @@ -60,50 +49,49 @@ log.info """\
tumor_only_mode: ${params.tumor_only_mode}
"""

if (params.max_cpus < 16 || params.max_memory < 30) {
if (params.algorithm.contains('muse') || params.algorithm.contains('mutect2')) {
error """\
------------------------------------
ERROR: Insufficient resources: ${params.max_cpus} CPUs and ${params.max_memory} of memory.
------------------------------------
To run Mutect2 or MuSE. this pipeline requires at least 16 CPUs and 32 GB of memory.
"""
}
}

//if (params.max_cpus < 16 || params.max_memory < 30) {
// if (params.algorithm.contains('muse') || params.algorithm.contains('mutect2')) {
// error """\
// ------------------------------------
// ERROR: Insufficient resources: ${params.max_cpus} CPUs and ${params.max_memory} of memory.
// ------------------------------------
// To run Mutect2 or MuSE. this pipeline requires at least 16 CPUs and 32 GB of memory.
// """
// }
// }

include { somaticsniper } from './module/somaticsniper' addParams(
workflow_output_dir: "${params.output_base}/SomaticSniper-${params.somaticsniper_version}",
workflow_log_output_dir: "${params.log_output}/process-log/SomaticSniper-${params.somaticsniper_version}",
workflow_output_dir: "${params.output_dir_base}/SomaticSniper-${params.somaticsniper_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/SomaticSniper-${params.somaticsniper_version}",
output_filename: generate_standard_filename("SomaticSniper-${params.somaticsniper_version}",
params.dataset_id,
params.sample_id,
[:]))
include { strelka2 } from './module/strelka2' addParams(
workflow_output_dir: "${params.output_base}/Strelka2-${params.strelka2_version}",
workflow_log_output_dir: "${params.log_output}/process-log/Strelka2-${params.strelka2_version}",
workflow_output_dir: "${params.output_dir_base}/Strelka2-${params.strelka2_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/Strelka2-${params.strelka2_version}",
output_filename: generate_standard_filename("Strelka2-${params.strelka2_version}",
params.dataset_id,
params.sample_id,
[:]))
include { mutect2 } from './module/mutect2' addParams(
workflow_output_dir: "${params.output_base}/Mutect2-${params.GATK_version}",
workflow_log_output_dir: "${params.log_output}/process-log/Mutect2-${params.GATK_version}",
workflow_output_dir: "${params.output_dir_base}/Mutect2-${params.GATK_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/Mutect2-${params.GATK_version}",
output_filename: generate_standard_filename("Mutect2-${params.GATK_version}",
params.dataset_id,
params.sample_id,
[:]))
include { muse } from './module/muse' addParams(
workflow_output_dir: "${params.output_base}/MuSE-${params.MuSE_version}",
workflow_log_output_dir: "${params.log_output}/process-log/MuSE-${params.MuSE_version}",
workflow_output_dir: "${params.output_dir_base}/MuSE-${params.MuSE_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/MuSE-${params.MuSE_version}",
output_filename: generate_standard_filename("MuSE-${params.MuSE_version}",
params.dataset_id,
params.sample_id,
[:]))

include { intersect } from './module/intersect' addParams(
workflow_output_dir: "${params.output_base}/Intersect-BCFtools-${params.BCFtools_version}",
workflow_log_output_dir: "${params.log_output}/process-log/Intersect-BCFtools-${params.BCFtools_version}",
workflow_output_dir: "${params.output_dir_base}/Intersect-BCFtools-${params.BCFtools_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/Intersect-BCFtools-${params.BCFtools_version}",
output_filename: generate_standard_filename("BCFtools-${params.BCFtools_version}",
params.dataset_id,
params.sample_id,
Expand Down Expand Up @@ -171,7 +159,7 @@ workflow {
run_validate_PipeVal(file_to_validate)
run_validate_PipeVal.out.validation_result.collectFile(
name: 'input_validation.txt', newLine: true,
storeDir: "${params.output_base}/validation"
storeDir: "${params.output_dir_base}/validation"
)

// Set empty channels so any unused tools don't cause failure at intersect step
Expand All @@ -190,7 +178,7 @@ workflow {
tumor_input.tumor_bam,
tumor_input.tumor_index,
normal_input.normal_bam,
normal_input.normal_index,
normal_input.normal_index
)
somaticsniper.out.gzvcf.set { somaticsniper_gzvcf_ch }
somaticsniper.out.idx.set { somaticsniper_idx_ch }
Expand All @@ -200,7 +188,7 @@ workflow {
tumor_input.tumor_bam,
tumor_input.tumor_index,
normal_input.normal_bam,
normal_input.normal_index,
normal_input.normal_index
)
strelka2.out.gzvcf.set { strelka2_gzvcf_ch }
strelka2.out.idx.set { strelka2_idx_ch }
Expand All @@ -210,7 +198,7 @@ workflow {
tumor_input.tumor_bam,
tumor_input.tumor_index,
normal_input.normal_bam,
normal_input.normal_index,
normal_input.normal_index
)
muse.out.gzvcf.set { muse_gzvcf_ch }
muse.out.idx.set { muse_idx_ch }
Expand Down
8 changes: 5 additions & 3 deletions module/common.nf
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ process rename_samples_BCFtools {
saveAs: { "${task.process.split(':')[-1]}-${var_type}/log${file(it).getName()}" }

input:
val normal_id
val tumor_id
val normal_id
val tumor_id
tuple val(var_type), path(vcf)

output:
Expand All @@ -86,7 +86,9 @@ process rename_samples_BCFtools {
set -euo pipefail
echo -e 'NORMAL\t${normal_id}' > ${params.output_filename}_samples.txt
echo -e 'TUMOR\t${tumor_id}' >> ${params.output_filename}_samples.txt
bcftools reheader -s ${params.output_filename}_samples.txt --output ${params.output_filename}_${var_type}.vcf.gz ${vcf}
bcftools reheader -s ${params.output_filename}_samples.txt \
--output ${params.output_filename}_${var_type}.vcf.gz \
${vcf}
"""
}

Expand Down
72 changes: 41 additions & 31 deletions module/mutect2-processes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -54,35 +54,6 @@ process run_SplitIntervals_GATK {
"""
}


process run_GetSampleName_Mutect2 {
container params.docker_image_GATK
publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}",
mode: "copy",
pattern: "*.txt",
enabled: params.save_intermediate_files
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" }
input:
path bam

output:
env sample_name, emit: name_ch
path "sampleName.txt"
path ".command.*"

script:
"""
set -euo pipefail
gatk GetSampleName -I $bam -O sampleName.txt
sample_name=`cat sampleName.txt`
"""
}

process call_sSNV_Mutect2 {
container params.docker_image_GATK

Expand Down Expand Up @@ -265,7 +236,7 @@ process run_FilterMutectCalls_GATK {

process split_VCF_BCFtools {
container params.docker_image_BCFtools
publishDir path: "${params.workflow_output_dir}/output",
publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}",
mode: "copy",
pattern: "*.vcf.gz"
publishDir path: "${params.workflow_log_output_dir}",
Expand All @@ -284,6 +255,45 @@ process split_VCF_BCFtools {
script:
"""
set -euo pipefail
bcftools view --types $var_type --output-type z --output ${params.output_filename}_${var_type.replace('snps', 'SNV').replace('indels', 'Indel').replace('mnps', 'MNV')}.vcf.gz ${vcf}
bcftools view \
--types $var_type \
--output-type z \
--output ${params.output_filename}_${var_type.replace('snps', 'SNV').replace('indels', 'Indel').replace('mnps', 'MNV')}-split.vcf.gz \
${vcf}
"""
}

process rename_samples_Mutect2_BCFtools {
container params.docker_image_BCFtools
publishDir path: "${params.workflow_output_dir}/output",
mode: "copy",
pattern: "*.vcf.gz*"
publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}",
mode: "copy",
pattern: "*_samples.txt",
enabled: params.save_intermediate_files
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
saveAs: { "${task.process.split(':')[-1]}-${var_type}/log${file(it).getName()}" }

input:
tuple val(old_normal_id), val(old_tumor_id)
tuple val(new_normal_id), val(new_tumor_id)
tuple val(var_type), path(vcf)

output:
tuple val(var_type), path("*.vcf.gz"), emit: gzvcf
path ".command.*"
path "*_samples.txt"

script:
"""
set -euo pipefail
echo -e '${old_normal_id}\t${new_normal_id}' > ${params.output_filename}_samples.txt
echo -e '${old_tumor_id}\t${new_tumor_id}' >> ${params.output_filename}_samples.txt
bcftools reheader -s ${params.output_filename}_samples.txt \
--output ${params.output_filename}_${var_type.replace('snps', 'SNV').replace('indels', 'Indel').replace('mnps', 'MNV')}.vcf.gz \
${vcf}
"""
}
Loading

0 comments on commit a46299e

Please sign in to comment.