Skip to content

Commit

Permalink
Merge pull request #339 from jonasscheid/global-fdr
Browse files Browse the repository at this point in the history
  • Loading branch information
jonasscheid authored Nov 26, 2024
2 parents 0861ea1 + 2c36477 commit 07cdcbb
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 72 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v2.6.1dev - [date]
## v2.7.0dev - [date]

### `Added`

- Added `PYOPENMS_CHROMATOGRAMEXTRACTOR` extracting MS1 Chromatograms and visualize them in multiQC report [#329](/~https://github.com/nf-core/mhcquant/pull/329)
- Added `OPENMS_IDMASSACCURACY` and `DATAMASH_HISTOGRAM` to compute fragment mass errors and visualizte them in multiQC report [#332](/~https://github.com/nf-core/mhcquant/pull/332)
- Added global fdr evaluation in new local subworkflow `RESCORE` [#338](/~https://github.com/nf-core/mhcquant/pull/338)

### `Fixed`

Expand Down
69 changes: 67 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,17 @@ process {
]
}

withName: 'OPENMS_IDMERGER*' {
withName: 'OPENMS_IDMERGER|OPENMS_IDMERGER_QUANT' {
ext.args = [
"-annotate_file_origin true",
"-merge_proteins_add_PSMs"
].join(' ').trim()
publishDir = [
enabled: false
]
}
withName: 'OPENMS_IDMERGER_GLOBAL' {
label = 'process_high_memory'
ext.args = [
"-annotate_file_origin true",
"-merge_proteins_add_PSMs"
Expand All @@ -106,6 +116,36 @@ process {
]
}

withName: 'OPENMS_IDFILTER_Q_VALUE_GLOBAL' {
label = 'process_high_memory'
ext.prefix = {"${meta.id}_pout_filtered"}
ext.args = [
"-remove_decoys",
"-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'",
"-delete_unreferenced_peptide_hits",
(params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold
].join(' ').trim()
publishDir = [
enabled: false
]
}

withName: 'OPENMS_IDFILTER_GLOBAL' {
label = 'process_high_memory'
ext.prefix = {"${meta.id}_pout_filtered"}
ext.args = [
"-remove_decoys",
"-precursor:length '${params.peptide_min_length}:${params.peptide_max_length}'",
"-delete_unreferenced_peptide_hits",
(params.fdr_threshold == '0.01') ? "-score:pep 0.05" : "-score:pep " + params.fdr_threshold
].join(' ').trim()
publishDir = [
path: {"${params.outdir}/intermediate_results/rescoring"},
mode: params.publish_dir_mode,
pattern: '*.idXML'
]
}

withName: 'OPENMS_IDFILTER_QUANT' {
ext.prefix = {"${meta.spectra}_fdr_filtered"}
ext.args = "-best:spectrum_per_peptide 'sequence+charge+modification'"
Expand Down Expand Up @@ -149,7 +189,7 @@ process {

process {
if (params.rescoring_engine == 'mokapot') {
withName: 'NFCORE_MHCQUANT:MHCQUANT:OPENMS_IDSCORESWITCHER' {
withName: 'NFCORE_MHCQUANT:MHCQUANT:RESCORE:OPENMS_IDSCORESWITCHER' {
ext.prefix = {"${meta.id}"}
ext.args = [
"-new_score q-value",
Expand Down Expand Up @@ -293,6 +333,22 @@ process {
]
}

withName: 'OPENMS_PERCOLATORADAPTER_GLOBAL' {
label = 'process_high'
ext.args = [
"-seed 4711",
"-trainFDR 0.05",
"-testFDR 0.05",
"-enzyme no_enzyme",
"-subset_max_train ${params.subset_max_train}",
"-post_processing_tdc",
(params.fdr_level != 'psm_level_fdrs') ? "-" + params.fdr_level : ""
].join(' ').trim()
publishDir = [
enabled: false
]
}

withName: 'OPENMS_PSMFEATUREEXTRACTOR' {
publishDir = [
path: {"${params.outdir}/intermediate_results/rescoring"},
Expand Down Expand Up @@ -339,6 +395,15 @@ process {
]
}

withName: 'OPENMS_TEXTEXPORTER_GLOBAL' {
label = 'process_high_memory'
publishDir = [
path: {"${params.outdir}/global_fdr"},
mode: params.publish_dir_mode,
pattern: '*.tsv'
]
}

withName: 'OPENMS_IDCONFLICTRESOLVER' {
publishDir = [
path: {"${params.outdir}/intermediate_results/features"},
Expand Down
2 changes: 2 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ This folder contains the intermediate results from various steps of the MHCquant
- `{Sample}_{Condition}_pout.idXML`: Unfiltered percolator output.
- `{Sample}_{Condition}_pout_filtered.idXML`: FDR-filtered percolator output.

- `global_fdr`: Contains global FDR-filtered list of all runs in a `tsv` file

- `features`: Holds information of quantified features in `featureXML` files as a result of the [FeatureFinderIdentification](https://openms.de/doxygen/release/3.0.0/html/TOPP_FeatureFinderIdentification.html) in the quantification mode.

- `ion_annotations`
Expand Down
54 changes: 28 additions & 26 deletions modules/local/openms_percolatoradapter.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process OPENMS_PERCOLATORADAPTER {
tag "$meta.id"
label 'process_low'
label 'process_medium'

conda "bioconda::openms-thirdparty=3.1.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -18,31 +18,33 @@ process OPENMS_PERCOLATORADAPTER {
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: "${meta.id}_pout"
def args = task.ext.args ?: ''

"""
OMP_NUM_THREADS=$task.cpus \\
PercolatorAdapter -in $merged_with_features \\
-out ${prefix}.idXML \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
openms-thirdparty: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//')
END_VERSIONS
"""
def prefix = task.ext.prefix ?: "${meta.id}_pout"
def args = task.ext.args ?: ''
"""
PercolatorAdapter \\
-in $merged_with_features \\
-out ${prefix}.idXML \\
-threads $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
PercolatorAdapter: \$(PercolatorAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1)
percolator: \$(percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}_pout"
def args = task.ext.args ?: ''

"""
touch ${prefix}.idXML
cat <<-END_VERSIONS > versions.yml
"${task.process}":
openms-thirdparty: \$(echo \$(FileInfo --help 2>&1) | sed 's/^.*Version: //; s/-.*\$//' | sed 's/ -*//; s/ .*\$//')
END_VERSIONS
"""
def prefix = task.ext.prefix ?: "${meta.id}_pout"
def args = task.ext.args ?: ''

"""
touch ${prefix}.idXML
cat <<-END_VERSIONS > versions.yml
"${task.process}":
PercolatorAdapter: \$(PercolatorAdapter 2>&1 | grep -E '^Version(.*)' | sed 's/Version: //g' | cut -d ' ' -f 1)
percolator: \$(percolator -h 2>&1 | grep -E '^Percolator version(.*)' | sed 's/Percolator version //g')
END_VERSIONS
"""
}
3 changes: 2 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@ params {
ms2pip_model_dir = null
deeplc_calibration_set_size = 0.15

// Percolator settings
// Rescoring engine settings
fdr_threshold = 0.01
fdr_level = 'peptide_level_fdrs'
subset_max_train = 0
global_fdr = false

// IDfilter settings
peptide_min_length = 8
Expand Down
7 changes: 7 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,13 @@
"default": 0.01,
"description": "Specify the false discovery rate threshold at which peptide hits should be selected."
},
"global_fdr": {
"type": "boolean",
"default": false,
"fa_icon": "fas fa-less-than",
"description": "Compute global FDR and backfilter sample-specific FDRs"
},

"subset_max_train": {
"type": "integer",
"hidden": true,
Expand Down
89 changes: 89 additions & 0 deletions subworkflows/local/rescore.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Prepares the raw or compressed data holding spectra information for the subsequent database search.
*/

//
// MODULE: Loaded from modules/local/
//

include { MS2RESCORE } from '../../modules/local/ms2rescore'
include { OPENMS_PSMFEATUREEXTRACTOR } from '../../modules/local/openms_psmfeatureextractor'
include { OPENMS_PERCOLATORADAPTER;
OPENMS_PERCOLATORADAPTER as OPENMS_PERCOLATORADAPTER_GLOBAL } from '../../modules/local/openms_percolatoradapter'
include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_GLOBAL } from '../../modules/local/openms_textexporter'
//
// MODULE: Installed directly from nf-core/modules
//

include { OPENMS_IDMERGER as OPENMS_IDMERGER_GLOBAL } from '../../modules/nf-core/openms/idmerger/main'
include { OPENMS_IDSCORESWITCHER } from '../../modules/nf-core/openms/idscoreswitcher/main.nf'
include { OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE;
OPENMS_IDFILTER as OPENMS_IDFILTER_Q_VALUE_GLOBAL;
OPENMS_IDFILTER as OPENMS_IDFILTER_GLOBAL } from '../../modules/nf-core/openms/idfilter/main'

workflow RESCORE {

take:
ch_merged_runs

main:
ch_versions = Channel.empty()

// Compute features via ms2rescore
MS2RESCORE(ch_merged_runs)
ch_versions = ch_versions.mix(MS2RESCORE.out.versions)

if (params.rescoring_engine == 'mokapot') {
log.warn "The rescoring engine is set to mokapot. This rescoring engine currently only supports psm-level-fdr via ms2rescore."
if (params.global_fdr) {
log.warn "Global FDR is currently not supported by mokapot. The global_fdr parameter will be ignored."
}
// Switch comet e-value to mokapot q-value
OPENMS_IDSCORESWITCHER(MS2RESCORE.out.idxml)
ch_versions = ch_versions.mix(OPENMS_IDSCORESWITCHER.out.versions)
ch_rescored_runs = OPENMS_IDSCORESWITCHER.out.idxml

// Filter by mokapot q-value
OPENMS_IDFILTER_Q_VALUE(ch_rescored_runs.map {group_meta, idxml -> [group_meta, idxml, []]})
ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions)
ch_filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.filtered

} else {
// Extract PSM features for Percolator
OPENMS_PSMFEATUREEXTRACTOR(MS2RESCORE.out.idxml.join(MS2RESCORE.out.feature_names))
ch_versions = ch_versions.mix(OPENMS_PSMFEATUREEXTRACTOR.out.versions)

// Run Percolator with local FDR
OPENMS_PERCOLATORADAPTER(OPENMS_PSMFEATUREEXTRACTOR.out.idxml)
ch_versions = ch_versions.mix(OPENMS_PERCOLATORADAPTER.out.versions)
ch_pout = OPENMS_PERCOLATORADAPTER.out.idxml

if (params.global_fdr) {
// Merge all samples into one group
OPENMS_IDMERGER_GLOBAL(OPENMS_PSMFEATUREEXTRACTOR.out.idxml.map {group_meta, idxml -> [[id:'global'], idxml] }.groupTuple())
// Run Percolator with global FDR
OPENMS_PERCOLATORADAPTER_GLOBAL(OPENMS_IDMERGER_GLOBAL.out.idxml)
ch_rescored_runs = OPENMS_PERCOLATORADAPTER_GLOBAL.out.idxml
// Filter by global percolator q-value
OPENMS_IDFILTER_Q_VALUE_GLOBAL(ch_rescored_runs.map {id, idxml -> [id, idxml, []]})
ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE_GLOBAL.out.versions)
// Backfilter sample_condition runs according to global FDR
OPENMS_IDFILTER_GLOBAL(ch_pout.combine(OPENMS_IDFILTER_Q_VALUE_GLOBAL.out.filtered.map{ it[1] }))
ch_filter_q_value = OPENMS_IDFILTER_GLOBAL.out.filtered
// Save globally merged runs in tsv
OPENMS_TEXTEXPORTER_GLOBAL(OPENMS_PERCOLATORADAPTER_GLOBAL.out.idxml)

} else {
ch_rescored_runs = ch_pout
// Filter by percolator q-value
OPENMS_IDFILTER_Q_VALUE(ch_rescored_runs.map {group_meta, idxml -> [group_meta, idxml, []]})
ch_versions = ch_versions.mix(OPENMS_IDFILTER_Q_VALUE.out.versions)
ch_filter_q_value = OPENMS_IDFILTER_Q_VALUE.out.filtered
}
}

emit:
rescored_runs = ch_rescored_runs
fdr_filtered = ch_filter_q_value
versions = ch_versions
}
Loading

0 comments on commit 07cdcbb

Please sign in to comment.