From c8904845ad2d0be12c7544781d8fe65397c791af Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 14 Feb 2024 13:33:14 -0800 Subject: [PATCH 1/7] Add working code and test --- run-nextflow-tests/Dockerfile | 33 + run-nextflow-tests/betterconfig.groovy | 216 +++++++ run-nextflow-tests/configtest.py | 265 ++++++++ run-nextflow-tests/entry.py | 27 + run-nextflow-tests/pom.xml | 24 + run-nextflow-tests/recalibrate-bam.json | 811 ++++++++++++++++++++++++ run-nextflow-tests/utils.py | 182 ++++++ 7 files changed, 1558 insertions(+) create mode 100644 run-nextflow-tests/Dockerfile create mode 100644 run-nextflow-tests/betterconfig.groovy create mode 100644 run-nextflow-tests/configtest.py create mode 100755 run-nextflow-tests/entry.py create mode 100644 run-nextflow-tests/pom.xml create mode 100644 run-nextflow-tests/recalibrate-bam.json create mode 100644 run-nextflow-tests/utils.py diff --git a/run-nextflow-tests/Dockerfile b/run-nextflow-tests/Dockerfile new file mode 100644 index 0000000..ee8c5e7 --- /dev/null +++ b/run-nextflow-tests/Dockerfile @@ -0,0 +1,33 @@ +ARG MAVEN_VERSION=3-amazoncorretto-8 +ARG NEXTFLOW_VERSION=23.10.0 + +# Download dependencies using Maven +FROM maven:${MAVEN_VERSION} AS builder +COPY pom.xml /pom.xml +RUN mvn --batch-mode dependency:copy-dependencies -DoutputDirectory=/bljars + +FROM nextflow/nextflow:${NEXTFLOW_VERSION} + +COPY --from=builder /bljars /bljars + +ARG NEXTFLOW_VERSION +# This should be fixed for a given version +ARG NEXTFLOW_MD5=acbb51bf66024671292c890f7d60ca8b +ENV NXF_LAUNCHER=/.nextflow/tmp/launcher/nextflow-one_${NEXTFLOW_VERSION}/buildkitsandbox +ENV NXF_DISABLE_CHECK_LATEST=true + +# Modify the Nextflow launcher script to: +# 1. Append the new jars to the classpath +# 2. Replace the Nextflow entrypoint with groovy +RUN BL_JARS=$(find /bljars/ -not -name 'groovy-3*' -type f -printf ":%p") && \ + sed \ + -i \ + -e "s|\" \"nextflow.cli.Launcher\"|$BL_JARS\" \"groovy.ui.GroovyMain\"|" \ + ${NXF_LAUNCHER}/classpath-${NEXTFLOW_MD5} + +# Copy in the `nextflow config`-like groovy script +COPY betterconfig.groovy /usr/local/bltests/ +WORKDIR /mnt/pipeline + +ENTRYPOINT ["nextflow"] +CMD ["/usr/local/bltests/betterconfig.groovy"] diff --git a/run-nextflow-tests/betterconfig.groovy b/run-nextflow-tests/betterconfig.groovy new file mode 100644 index 0000000..7f0c1e7 --- /dev/null +++ b/run-nextflow-tests/betterconfig.groovy @@ -0,0 +1,216 @@ +import java.nio.file.Paths + +import groovy.json.JsonSlurper +import groovy.lang.Closure +import groovy.lang.ProxyMetaClass +import groovy.util.ConfigObject + +import nextflow.cli.CliOptions +import nextflow.cli.CmdRun +import nextflow.config.ConfigBuilder +import nextflow.plugin.Plugins +import nextflow.util.ConfigHelper + +// Adapted from +// https://blog.mrhaki.com/2009/11/groovy-goodness-intercept-methods-with.html +class UserInterceptor implements Interceptor { + // This class intercepts every method call on ConfigObjects. If the method + // name is in the list of mocked methods, the original method is not called + // and a static value is returned instead. This class cannot mock static + // methods. + + boolean invokeMethod = true + Map mocks + + UserInterceptor(String mock_file) { + def jsonSlurper = new JsonSlurper() + + this.mocks = jsonSlurper.parse(new File(mock_file)) + assert this.mocks instanceof Map + } + + boolean doInvoke() { + invokeMethod + } + + Object beforeInvoke(Object obj, String name, Object[] args) { + if (mocks.containsKey(name)) { + invokeMethod = false + return mocks[name] + } + + } + + Object afterInvoke(Object obj, String name, Object[] args, Object result) { + if (!invokeMethod) { + invokeMethod = true + } + + result + } +} + +class NeedsTaskException extends Exception { + NeedsTaskException(String message) { + super(message) + } +} + +// Adapted from +// https://blog.mrhaki.com/2009/11/groovy-goodness-intercept-methods-with.html +class TaskInterceptor implements Interceptor { + // This class is specifically intended to mock closures with a string + // representing their contents. + boolean invokeMethod = true + String current_process = null + int current_attempt = 1 + boolean allow_getting_task = false + boolean do_representation = false + def represented_methods = ["check_limits", "retry_updater"] + + boolean doInvoke() { + invokeMethod + } + + Object beforeInvoke(Object obj, String name, Object[] args) { + if (name == "get" && args[0] == "task") { + if (!allow_getting_task) { + throw new NeedsTaskException("Problem!") + } + + obj.task.process = current_process + obj.task.cpus = '$task.cpus' + + if (do_representation) { + obj.task.attempt = '$task.attempt' + } else { + obj.task.attempt = current_attempt + } + } + + if (do_representation && represented_methods.contains(name) ) { + invokeMethod = false + return "$name(${args.join(', ')})" + } + } + + Object afterInvoke(Object obj, String name, Object[] args, Object result) { + if (!invokeMethod) { + invokeMethod = true + } + + result + } +} + +void walk(interceptor, root, config_obj) { + config_obj.each { key, value -> + if (root == "process") { + interceptor.current_process = key + } + + if (value instanceof Closure) { + try { + try { + config_obj[key] = value.call() + } catch (NeedsTaskException e) { + // Okay, see what resources it demands on the first three + // attempts + interceptor.allow_getting_task = true + config_obj[key] = [:] + + // Add the representation value + interceptor.do_representation = true + try { + config_obj[key]['closure'] = value.call() + } catch (Exception) { + // This is probably an attempt to evaluate + // method(1 * task.attempt) - the argument is evaulated + // with a static method (java.lang.Integer.multiply), + // and I can't figure out a way around that + config_obj[key]['closure'] = "closure()" + } + interceptor.do_representation = false + + // Add the results from attempts 1-3 + interceptor.current_attempt = 1 + config_obj[key][1] = value.call() + interceptor.current_attempt = 2 + config_obj[key][2] = value.call() + interceptor.current_attempt = 3 + config_obj[key][3] = value.call() + + interceptor.allow_getting_task = false + } + } catch (Exception e) { + System.out.println("Problem while expanding closure $root.$key") + throw e + } + } else if (value instanceof ConfigObject) { + walk(interceptor, "$root.$key", value) + } + + if (root == "process") { + interceptor.current_process = null + } + } +} + +// This method is a mix of +// /~https://github.com/nextflow-io/nextflow/blob/7caffef977e0fa16177b0e7838e2b2b114c223b6/modules/nextflow/src/main/groovy/nextflow/cli/CmdConfig.groovy#L71-L114 +// and +// /~https://github.com/nextflow-io/nextflow/blob/5e2ce9ed82ccbc70ec24a83e04f24b8d45855a78/modules/nextflow/src/main/groovy/nextflow/config/ConfigBuilder.groovy#L901-L906 +void print_configuration() { + // I don't know if this is necessary, but it seems harmless to leave in-place + Plugins.init() + + // This is the equivalent of '-c '. The config file itself is + // generated on-the-fly to mock out the System.* calls before including the + // true config files. + def launcher_options = new CliOptions() + launcher_options.userConfig = [System.getenv("BL_CONFIG_FILE")] + + // This is the equivalent of '-params-file ' + def cmdRun = new CmdRun() + cmdRun.paramsFile = System.getenv("BL_PARAMS_FILE") + + // This is the equivalent of '--param1=value1 --param2=value2' + def jsonSlurper = new JsonSlurper() + def cli_config = jsonSlurper.parse(new File(System.getenv("BL_CLI_PARAMS_FILE"))) + assert cli_config instanceof Map + cli_config.each { key, value -> + cmdRun.params."${key}" = value + } + + def builder = new ConfigBuilder() + .setShowClosures(false) + .showMissingVariables(true) + .setOptions(launcher_options) + .setCmdRun(cmdRun) + // Without this, both baseDir and projectDir would be incorrect + .setBaseDir(Paths.get(System.getenv("BL_PIPELINE_DIR"))) + + // Build the configuration with an interceptor to mock out user-defined + // functions + def proxy = ProxyMetaClass.getInstance(ConfigObject) + proxy.interceptor = new UserInterceptor(System.getenv("BL_MOCKS_FILE")) + + def config + + proxy.use { + config = builder.buildConfigObject() + } + + // Attempt to expand all of the remaining closures under process with some + // fancy mocking of `task`. + def interceptor = new TaskInterceptor() + proxy.interceptor = interceptor + // Walk the config and resolve all of the closures + proxy.use { + walk(interceptor, "process", config.process) + } + + System.out << ConfigHelper.toPropertiesString(config, false) +} + +print_configuration() diff --git a/run-nextflow-tests/configtest.py b/run-nextflow-tests/configtest.py new file mode 100644 index 0000000..105fd85 --- /dev/null +++ b/run-nextflow-tests/configtest.py @@ -0,0 +1,265 @@ +""" +The class representation of a Nextflow configuration test. +""" +import dataclasses +import itertools +import json +import re +import subprocess +import tempfile +import textwrap + +from contextlib import ExitStack +from pathlib import Path + +from utils import build_image, parse_config, diff_json + + +@dataclasses.dataclass +class ConfigTest: + "A class representing a single Nextflow configuration test." + # pylint: disable=too-many-instance-attributes + config: list[str] + params_file: str + cpus: int + memory_gb: float + + empty_files: list[str] + mapped_files: dict[str, str] + nf_params: dict[str, str] + envvars: dict[str, str] + mocks: dict + + dated_fields: list[str] + + expected_result: dict + + @classmethod + def from_file(cls, filepath: Path): + "Load a ConfigTest from a file." + with filepath.open(mode="rb") as infile: + data = json.load(infile) + + return cls(**data) + + def check_results(self, pipeline_dir: Path) -> bool: + "Run the test against the given pipeline directory." + raise NotImplementedError() + + def to_file(self, filepath): + "Serialize a ConfigTest to a file." + with filepath.open(mode="w") as outfile: + json.dump( + dataclasses.asdict(self), + outfile, + indent=2, + sort_keys=False + ) + + +class NextflowConfigTest(ConfigTest): + "A subclass." + SENTINEL = "=========SENTINEL_OUTPUT==========" + CONTAINER_DIR = Path("/mnt/bl_tests") + + @classmethod + def from_file(cls, filepath: Path): + "Load a ConfigTest from a file." + result = super().from_file(filepath) + result.filepath = filepath + return result + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.filepath = None + + def _run_test(self, pipeline_dir: Path): + "Get the resolved config of this pipepline." + # pylint: disable=too-many-locals + with ExitStack() as stack: + # Make a temporary directory on the host to hold all of the + # scaffolding files for this test + tempdir = stack.enter_context(tempfile.TemporaryDirectory()) + + # Make a wrapper config file that will mock out the system calls + # before including the real config file(s) + config_file = Path(tempdir, "docker_test.config") + with config_file.open(mode="w", encoding="utf-8") as outfile: + outfile.write(textwrap.dedent("""\ + import nextflow.util.SysHelper + import nextflow.util.MemoryUnit + import static org.mockito.Mockito.* + import org.mockito.MockedStatic + + """)) + + outfile.write(textwrap.dedent(f"""\ + try (MockedStatic dummyhelper = mockStatic( + SysHelper.class, + CALLS_REAL_METHODS)) {{ + dummyhelper + .when(SysHelper::getAvailCpus) + .thenReturn({self.cpus}); + dummyhelper + .when(SysHelper::getAvailMemory) + .thenReturn(MemoryUnit.of("{self.memory_gb}GB")); + """)) + + for configfile in self.config: + outfile.write( + f' includeConfig "{pipeline_dir / configfile}"\n' + ) + + # The config files can print arbitrary text to stdout; include + # this sentinel value so that we only parse the result of + # printing the configuration + outfile.write(f'}}\n\nSystem.out.println("{self.SENTINEL}")\n') + + # Write the Nextflow command-line parameters to a JSON file + cli_params_file = Path(tempdir, "cli_params.json") + cli_params_file.write_text( + json.dumps(self.nf_params), + encoding="utf-8" + ) + + # Write the mocked methods and results to a JSON file + mocks_file = Path(tempdir, "test_mocks.json") + mocks_file.write_text( + json.dumps(self.mocks), + encoding="utf-8" + ) + + # Generate a list of volume-mount arguments + mounts = [ + (pipeline_dir, pipeline_dir), + (tempdir, self.CONTAINER_DIR), + ] + + for empty_file in self.empty_files: + mounts.append([ + stack.enter_context(tempfile.NamedTemporaryFile()).name, + empty_file + ]) + + mount_args = [] + + for hpath, cpath in itertools.chain(mounts, self.mapped_files): + mount_args.extend( + ["--volume", f"{pipeline_dir / hpath}:{cpath}"] + ) + + # Generate a list of environment variable arguments + envvars = { + **self.envvars, + "BL_PIPELINE_DIR": pipeline_dir, + "BL_CONFIG_FILE": self.CONTAINER_DIR / config_file.name, + "BL_MOCKS_FILE": self.CONTAINER_DIR / mocks_file.name, + "BL_CLI_PARAMS_FILE": + self.CONTAINER_DIR / cli_params_file.name, + } + + if self.params_file: + envvars["BL_PARAMS_FILE"] = pipeline_dir / self.params_file + + envvar_args = [] + for key, value in envvars.items(): + envvar_args.extend(["--env", f"{key}={value}"]) + + container_id = None + + try: + # Launch the docker container in the background and immediately + # capture the container ID (so that we can clean up afterwards) + container_id = subprocess.run( + [ + "docker", + "run", + "--detach", + *mount_args, + *envvar_args, + build_image(), + ], + capture_output=True, + check=True, + ).stdout.decode("utf-8").strip() + + process = subprocess.run( + ["docker", "attach", container_id], + capture_output=True, + check=True + ) + config_output = process.stdout.decode("utf-8") + + except subprocess.CalledProcessError as err: + print(err.cmd) + print(err.stdout.decode("utf-8")) + print(err.stderr.decode("utf-8")) + raise + + finally: + if container_id is not None: + subprocess.run( + ["docker", "stop", container_id], + capture_output=True, + check=False, + ) + subprocess.run( + ["docker", "rm", container_id], + capture_output=True, + check=False + ) + + config_text = config_output.rsplit(self.SENTINEL, maxsplit=1)[-1] + + try: + return parse_config(config_text) + except Exception: + print(config_output) + raise + + def check_results(self, pipeline_dir: Path) -> bool: + "Compare the results." + result = self._run_test(pipeline_dir) + + # These are namespaces defined in the common submodules + boring_keys = { + 'csv_parser', + 'custom_schema_types', + 'methods', + 'retry', + 'schema', + 'bam_parser', + 'json_extractor', + } + + for key in boring_keys: + result.pop(key, None) + + differences = diff_json(self.expected_result, result) + + # Filter out any differences resulting from dates + date_re = re.compile(r"\d{8}T\d{6}Z") + for index, (jsonpath, original, updated) in \ + reversed(list(enumerate(differences))): + if re.sub(r"^\.+", "", jsonpath) in self.dated_fields: + if date_re.sub("", original) == date_re.sub("", updated): + differences.pop(index) + + if differences: + for key, original, updated in differences: + print(key) + print(original) + print(updated) + print("------") + + if self.filepath: + outpath = self.filepath.with_stem(self.filepath.stem + "-out") + print("Saving updated file to", outpath) + dataclasses.replace( + self, + expected_result=result + ).to_file(outpath) + + return False + + return True diff --git a/run-nextflow-tests/entry.py b/run-nextflow-tests/entry.py new file mode 100755 index 0000000..fc783d1 --- /dev/null +++ b/run-nextflow-tests/entry.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +"Quick entrypoint for this tool." +import argparse +import sys +from pathlib import Path + +from configtest import NextflowConfigTest + + +def run_recalibrate_bam_test(pipeline: Path): + "Run the bundled pipeline-recalibrate-BAM test." + testobj = NextflowConfigTest.from_file( + Path(__file__).resolve().parent / "recalibrate-bam.json" + ) + + if testobj.check_results(pipeline): + print("No changes!") + else: + sys.exit(1) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("recalibrate_bam_path") + + args = parser.parse_args() + run_recalibrate_bam_test(Path(args.recalibrate_bam_path)) diff --git a/run-nextflow-tests/pom.xml b/run-nextflow-tests/pom.xml new file mode 100644 index 0000000..5509548 --- /dev/null +++ b/run-nextflow-tests/pom.xml @@ -0,0 +1,24 @@ + + + 4.0.0 + boutros.lab + nextflow-config-validator + 1 + + + org.codehaus.groovy + groovy-test + 3.0.19 + + + com.github.stefanbirkner + system-rules + 1.19.0 + + + org.mockito + mockito-core + 5.10.0 + + + diff --git a/run-nextflow-tests/recalibrate-bam.json b/run-nextflow-tests/recalibrate-bam.json new file mode 100644 index 0000000..ef5e98d --- /dev/null +++ b/run-nextflow-tests/recalibrate-bam.json @@ -0,0 +1,811 @@ +{ + "config": [ + "test/nftest.config" + ], + "params_file": "test/single.yaml", + "cpus": 16, + "memory_gb": 31, + "empty_files": [ + "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", + "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz", + "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz", + "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz", + "/hot/resource/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam" + ], + "mapped_files": [], + "nf_params": { + "output_dir": "/tmp/outputs" + }, + "envvars": { + "SLURM_JOB_ID": "851543" + }, + "mocks": { + "parse_bam_header": { + "read_group": [ + { + "SM": "4915723" + } + ] + } + }, + "dated_fields": [ + "params.log_output_dir", + "report.file", + "timeline.file", + "trace.file" + ], + "expected_result": { + "docker": { + "all_group_ids": "$(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "enabled": true, + "runOptions": "-u $(id -u):$(id -g) $(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "uid_and_gid": "-u $(id -u):$(id -g)" + }, + "manifest": { + "author": "Yash Patel", + "description": "Nextflow pipeline to perform Indel Realignment and Base Quality Score Recalibration", + "name": "recalibrate-BAM", + "version": "1.0.0-rc.4" + }, + "params": { + "aligner": "BWA-MEM2-2.2.1", + "blcds_registered_dataset": false, + "bundle_contest_hapmap_3p3_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz", + "bundle_known_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz", + "bundle_mills_and_1000g_gold_standard_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", + "bundle_v0_dbsnp138_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz", + "cache_intermediate_pipeline_steps": false, + "dataset_id": "A-mini", + "docker_container_registry": "ghcr.io/uclahs-cds", + "docker_image_gatk": "broadinstitute/gatk:4.2.4.1", + "docker_image_gatk3": "ghcr.io/uclahs-cds/call-gsnp:GATK-3.7.0", + "docker_image_picard": "ghcr.io/uclahs-cds/picard:2.26.10", + "docker_image_pipeval": "ghcr.io/uclahs-cds/pipeval:4.0.0-rc.2", + "docker_image_samtools": "ghcr.io/uclahs-cds/samtools:1.17", + "gatk3_version": "GATK-3.7.0", + "gatk_command_mem_diff": "0", + "gatk_ir_compression": "1", + "gatk_version": "4.2.4.1", + "input": { + "BAM": { + "tumor": [ + "/hot/resource/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam" + ] + }, + "recalibration_table": [ + "/scratch/851543/NO_FILE.grp" + ] + }, + "intervals": "", + "is_DOC_run": false, + "is_emit_original_quals": true, + "is_targeted": false, + "log_output_dir": "/tmp/outputs/recalibrate-BAM-1.0.0-rc.4/TWGSAMIN000001/log-recalibrate-BAM-1.0.0-rc.4-20240214T213139Z", + "max_cpus": "16", + "max_memory": "31 GB", + "metapipeline_delete_input_bams": false, + "metapipeline_states_to_delete": [ + "normal", + "tumor" + ], + "min_cpus": "1", + "min_memory": "1 MB", + "output_dir": "/tmp/outputs", + "output_dir_base": "/tmp/outputs/recalibrate-BAM-1.0.0-rc.4/TWGSAMIN000001/GATK-4.2.4.1", + "parallelize_by_chromosome": true, + "patient_id": "TWGSAMIN000001", + "picard_version": "2.26.10", + "pipeval_version": "4.0.0-rc.2", + "proc_resource_params": { + "deduplicate_records_SAMtools": { + "cpus": "2", + "memory": "27.9 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "extract_GenomeIntervals": { + "cpus": "1", + "memory": "1 GB" + }, + "remove_intermediate_files": { + "cpus": "1", + "memory": "1 GB" + }, + "remove_merged_BAM": { + "cpus": "1", + "memory": "1 GB" + }, + "remove_unmerged_BAMs": { + "cpus": "1", + "memory": "1 GB" + }, + "run_ApplyBQSR_GATK": { + "cpus": "1", + "memory": "2 GB", + "retry_strategy": { + "memory": { + "operand": "4", + "strategy": "exponential" + } + } + }, + "run_BaseRecalibrator_GATK": { + "cpus": "1", + "memory": "27.9 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_CalculateContamination_GATK": { + "cpus": "1", + "memory": "14 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_DepthOfCoverage_GATK": { + "cpus": "1", + "memory": "14 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_GetPileupSummaries_GATK": { + "cpus": "1", + "memory": "14 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_IndelRealigner_GATK": { + "cpus": "2", + "memory": "4 GB", + "retry_strategy": { + "memory": { + "operand": "4", + "strategy": "exponential" + } + } + }, + "run_MergeSamFiles_Picard": { + "cpus": "2", + "memory": "27.9 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_RealignerTargetCreator_GATK": { + "cpus": "2", + "memory": "4 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_SplitIntervals_GATK": { + "cpus": "1", + "memory": "1 GB" + }, + "run_index_SAMtools": { + "cpus": "1", + "memory": "2 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "reference_fasta": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "samples_to_process": [ + { + "id": "4915723", + "path": "/hot/resource/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam", + "sample_type": "tumor" + } + ], + "samtools_version": "1.17", + "save_intermediate_files": false, + "scatter_count": "50", + "split_intervals_extra_args": "", + "ucla_cds": true, + "use_recal_tables": false, + "work_dir": "/scratch/851543" + }, + "params_schema": { + "aligner": { + "help": "Aligner used to align input BAMs. Provided as -", + "required": true, + "type": "AlignerTool" + }, + "base_resource_update": { + "elements": { + "cpus": { + "help": "List of CPU updates", + "required": false, + "type": "ResourceUpdateList" + }, + "memory": { + "help": "List of memory updates", + "required": false, + "type": "ResourceUpdateList" + } + }, + "help": "User-defined modifications for adjusting base resource allocations for processes", + "required": false, + "type": "ResourceUpdateNamespace" + }, + "bundle_contest_hapmap_3p3_vcf_gz": { + "help": "Absolute path to ConEst HapMap 3p3 VCF", + "mode": "r", + "required": true, + "type": "Path" + }, + "bundle_known_indels_vcf_gz": { + "help": "Absolute path to known INDELs VCF", + "mode": "r", + "required": true, + "type": "Path" + }, + "bundle_mills_and_1000g_gold_standard_indels_vcf_gz": { + "help": "Absolute path to Mills and 1000g gold standard INDELs VCF", + "mode": "r", + "required": true, + "type": "Path" + }, + "bundle_v0_dbsnp138_vcf_gz": { + "help": "Absolute path to v0 dbSNP 138 VCF", + "mode": "r", + "required": true, + "type": "Path" + }, + "dataset_id": { + "help": "Dataset ID", + "required": true, + "type": "String" + }, + "gatk_ir_compression": { + "choices": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], + "default": "1", + "help": "", + "required": false, + "type": "Integer" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input normal BAMs", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAMs", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAMs for calling", + "required": true, + "type": "InputBAMNamespace" + }, + "recalibration_table": { + "allow_empty": false, + "help": "List of any available recalibration tables", + "required": false, + "type": "RecalibrationTableList" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "intervals": { + "allow_empty": true, + "help": "Target intervals to process for DNA panel/targeted sequencing samples; leave empty for WGS", + "required": true, + "type": "String" + }, + "is_DOC_run": { + "default": false, + "help": "Whether to run the DepthOfCoverage process, which is very time-consuming for large BAMs", + "required": true, + "type": "Bool" + }, + "is_emit_original_quals": { + "default": true, + "help": "Whether to emit original quality scores after recalibration", + "required": true, + "type": "Bool" + }, + "metapipeline_delete_input_bams": { + "default": false, + "help": "Whether to delete the input BAMs", + "required": true, + "type": "Bool" + }, + "metapipeline_final_output_dir": { + "help": "Directory containing final outputs to check before input deletion", + "required": false, + "type": "String" + }, + "metapipeline_states_to_delete": { + "choice": [ + "normal", + "tumor" + ], + "default": [ + "normal", + "tumor" + ], + "help": "List of states for which to delete input BAMs", + "required": true, + "type": "List" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient ID", + "required": true, + "type": "String" + }, + "reference_fasta": { + "help": "Absolute path to reference genome fasta", + "mode": "r", + "required": true, + "type": "Path" + }, + "save_intermediate_files": { + "default": false, + "help": "Whether to save intermediate files", + "required": true, + "type": "Bool" + }, + "scatter_count": { + "default": "50", + "help": "How many intervals to divide the genome into for parallelization", + "required": true, + "type": "Integer" + }, + "split_intervals_extra_args": { + "allow_empty": true, + "help": "Extra arguments for interval splitting", + "required": false, + "type": "String" + } + }, + "proc_name_keys": [ + "withName:run_validate_PipeVal", + "withName:extract_GenomeIntervals", + "withName:run_SplitIntervals_GATK", + "withName:run_RealignerTargetCreator_GATK", + "withName:run_IndelRealigner_GATK", + "withName:run_BaseRecalibrator_GATK", + "withName:run_ApplyBQSR_GATK", + "withName:run_MergeSamFiles_Picard", + "withName:deduplicate_records_SAMtools", + "withName:run_index_SAMtools", + "withName:run_GetPileupSummaries_GATK", + "withName:run_CalculateContamination_GATK", + "withName:run_DepthOfCoverage_GATK", + "withName:remove_intermediate_files", + "withName:remove_unmerged_BAMs", + "withName:remove_merged_BAM" + ], + "proc_names": "[Ljava.lang.String;@7cf166db", + "process": { + "cache": false, + "containerOptions": { + "1": "--cpu-shares 1024 --cpus $task.cpus", + "2": "--cpu-shares 1024 --cpus $task.cpus", + "3": "--cpu-shares 1024 --cpus $task.cpus", + "closure": "--cpu-shares 1024 --cpus $task.cpus" + }, + "cpus": { + "1": "1", + "2": "2", + "3": "3", + "closure": "closure()" + }, + "errorStrategy": { + "1": "terminate", + "2": "terminate", + "3": "terminate", + "closure": "terminate" + }, + "executor": "local", + "maxRetries": "1", + "memory": "31 GB", + "withLabel:process_high": { + "cpus": { + "1": "12", + "2": "12", + "3": "12", + "closure": "retry_updater(12, add, 0, $task.attempt, cpus)" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "retry_updater(84 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withLabel:process_low": { + "cpus": { + "1": "2", + "2": "2", + "3": "2", + "closure": "retry_updater(2, add, 0, $task.attempt, cpus)" + }, + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "12 GB", + "closure": "retry_updater(3 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withLabel:process_medium": { + "cpus": { + "1": "6", + "2": "6", + "3": "6", + "closure": "retry_updater(6, add, 0, $task.attempt, cpus)" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "retry_updater(42 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:deduplicate_records_SAMtools": { + "cpus": "2", + "memory": { + "1": "27.9 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "retry_updater(27.9 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:extract_GenomeIntervals": { + "cpus": "1", + "memory": "1 GB" + }, + "withName:remove_intermediate_files": { + "cpus": "1", + "memory": "1 GB" + }, + "withName:remove_merged_BAM": { + "cpus": "1", + "memory": "1 GB" + }, + "withName:remove_unmerged_BAMs": { + "cpus": "1", + "memory": "1 GB" + }, + "withName:run_ApplyBQSR_GATK": { + "cpus": "1", + "memory": { + "1": "2 GB", + "2": "8 GB", + "3": "31 GB", + "closure": "retry_updater(2 GB, exponential, 4, $task.attempt, memory)" + } + }, + "withName:run_BaseRecalibrator_GATK": { + "cpus": "1", + "memory": { + "1": "27.9 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "retry_updater(27.9 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_CalculateContamination_GATK": { + "cpus": "1", + "memory": { + "1": "14 GB", + "2": "27.9 GB", + "3": "31 GB", + "closure": "retry_updater(14 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_DepthOfCoverage_GATK": { + "cpus": "1", + "memory": { + "1": "14 GB", + "2": "27.9 GB", + "3": "31 GB", + "closure": "retry_updater(14 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_GetPileupSummaries_GATK": { + "cpus": "1", + "memory": { + "1": "14 GB", + "2": "27.9 GB", + "3": "31 GB", + "closure": "retry_updater(14 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_IndelRealigner_GATK": { + "cpus": "2", + "memory": { + "1": "4 GB", + "2": "16 GB", + "3": "31 GB", + "closure": "retry_updater(4 GB, exponential, 4, $task.attempt, memory)" + } + }, + "withName:run_MergeSamFiles_Picard": { + "cpus": "2", + "memory": { + "1": "27.9 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "retry_updater(27.9 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_RealignerTargetCreator_GATK": { + "cpus": "2", + "memory": { + "1": "4 GB", + "2": "8 GB", + "3": "16 GB", + "closure": "retry_updater(4 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_SplitIntervals_GATK": { + "cpus": "1", + "memory": "1 GB" + }, + "withName:run_index_SAMtools": { + "cpus": "1", + "memory": { + "1": "2 GB", + "2": "4 GB", + "3": "8 GB", + "closure": "retry_updater(2 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "report": { + "enabled": true, + "file": "/tmp/outputs/recalibrate-BAM-1.0.0-rc.4/TWGSAMIN000001/log-recalibrate-BAM-1.0.0-rc.4-20240214T213139Z/nextflow-log/report.html" + }, + "timeline": { + "enabled": true, + "file": "/tmp/outputs/recalibrate-BAM-1.0.0-rc.4/TWGSAMIN000001/log-recalibrate-BAM-1.0.0-rc.4-20240214T213139Z/nextflow-log/timeline.html" + }, + "trace": { + "enabled": true, + "file": "/tmp/outputs/recalibrate-BAM-1.0.0-rc.4/TWGSAMIN000001/log-recalibrate-BAM-1.0.0-rc.4-20240214T213139Z/nextflow-log/trace.txt" + }, + "workDir": "/scratch/851543", + "yaml": { + "aligner": { + "help": "Aligner used to align input BAMs. Provided as -", + "required": true, + "type": "AlignerTool" + }, + "base_resource_update": { + "elements": { + "cpus": { + "help": "List of CPU updates", + "required": false, + "type": "ResourceUpdateList" + }, + "memory": { + "help": "List of memory updates", + "required": false, + "type": "ResourceUpdateList" + } + }, + "help": "User-defined modifications for adjusting base resource allocations for processes", + "required": false, + "type": "ResourceUpdateNamespace" + }, + "bundle_contest_hapmap_3p3_vcf_gz": { + "help": "Absolute path to ConEst HapMap 3p3 VCF", + "mode": "r", + "required": true, + "type": "Path" + }, + "bundle_known_indels_vcf_gz": { + "help": "Absolute path to known INDELs VCF", + "mode": "r", + "required": true, + "type": "Path" + }, + "bundle_mills_and_1000g_gold_standard_indels_vcf_gz": { + "help": "Absolute path to Mills and 1000g gold standard INDELs VCF", + "mode": "r", + "required": true, + "type": "Path" + }, + "bundle_v0_dbsnp138_vcf_gz": { + "help": "Absolute path to v0 dbSNP 138 VCF", + "mode": "r", + "required": true, + "type": "Path" + }, + "dataset_id": { + "help": "Dataset ID", + "required": true, + "type": "String" + }, + "gatk_ir_compression": { + "choices": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], + "default": "1", + "help": "", + "required": false, + "type": "Integer" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input normal BAMs", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAMs", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAMs for calling", + "required": true, + "type": "InputBAMNamespace" + }, + "recalibration_table": { + "allow_empty": false, + "help": "List of any available recalibration tables", + "required": false, + "type": "RecalibrationTableList" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "intervals": { + "allow_empty": true, + "help": "Target intervals to process for DNA panel/targeted sequencing samples; leave empty for WGS", + "required": true, + "type": "String" + }, + "is_DOC_run": { + "default": false, + "help": "Whether to run the DepthOfCoverage process, which is very time-consuming for large BAMs", + "required": true, + "type": "Bool" + }, + "is_emit_original_quals": { + "default": true, + "help": "Whether to emit original quality scores after recalibration", + "required": true, + "type": "Bool" + }, + "metapipeline_delete_input_bams": { + "default": false, + "help": "Whether to delete the input BAMs", + "required": true, + "type": "Bool" + }, + "metapipeline_final_output_dir": { + "help": "Directory containing final outputs to check before input deletion", + "required": false, + "type": "String" + }, + "metapipeline_states_to_delete": { + "choice": [ + "normal", + "tumor" + ], + "default": [ + "normal", + "tumor" + ], + "help": "List of states for which to delete input BAMs", + "required": true, + "type": "List" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient ID", + "required": true, + "type": "String" + }, + "reference_fasta": { + "help": "Absolute path to reference genome fasta", + "mode": "r", + "required": true, + "type": "Path" + }, + "save_intermediate_files": { + "default": false, + "help": "Whether to save intermediate files", + "required": true, + "type": "Bool" + }, + "scatter_count": { + "default": "50", + "help": "How many intervals to divide the genome into for parallelization", + "required": true, + "type": "Integer" + }, + "split_intervals_extra_args": { + "allow_empty": true, + "help": "Extra arguments for interval splitting", + "required": false, + "type": "String" + } + } + } +} \ No newline at end of file diff --git a/run-nextflow-tests/utils.py b/run-nextflow-tests/utils.py new file mode 100644 index 0000000..5079b0f --- /dev/null +++ b/run-nextflow-tests/utils.py @@ -0,0 +1,182 @@ +""" +Utility methods. +""" +import collections.abc +import subprocess +import itertools +import re +import json + +from pathlib import Path + + +ESCAPE_RE = re.compile(r"([^\\])\\([ =:])") +CLOSURE_RE = re.compile(r"^Script\S+_run_closure") + + +def build_image() -> str: + "Build the image and return the name." + image = "configtester" + context_dir = Path(__file__).resolve().parent + try: + subprocess.run( + ["docker", "build", context_dir, "-t", image], + capture_output=True, + check=True + ) + except subprocess.CalledProcessError as err: + print(err.stdout.decode("utf-8")) + print(err.stderr.decode("utf-8")) + raise + + return image + + +def diff_json(alpha, beta): + """ + Recursively generate differences. + + Differences are returned a list of (jsonpath, before, after) tuples. + """ + # pylint: disable=too-many-branches + results = [] + + if alpha == beta: + # They're the same - great! + pass + + elif not isinstance(alpha, type(beta)): + # Incomparable - bail out + results.append(("", alpha, beta)) + + elif isinstance(alpha, collections.abc.Mapping): + for key, value in alpha.items(): + if key in beta: + for sub_result in diff_json(value, beta[key]): + results.append(( + f".{key}{sub_result[0]}", + sub_result[1], + sub_result[2] + )) + else: + results.append((key, value, None)) + + for key, value in beta.items(): + if key not in alpha: + results.append((key, None, value)) + + elif isinstance(alpha, collections.abc.Sequence) and \ + not isinstance(alpha, str): + + for index, (alpha_val, beta_val) in \ + enumerate(itertools.zip_longest(alpha, beta)): + for sub_result in diff_json(alpha_val, beta_val): + results.append(( + f"[{index}]{sub_result[0]}", + sub_result[1], + sub_result[2] + )) + + else: + # They're not collections and they are different + results.append(("", alpha, beta)) + + return results + + +def parse_value(value_str: str): + "Parse a value." + # pylint: disable=too-many-branches + try: + if CLOSURE_RE.match(value_str): + return "closure()" + except TypeError: + print(value_str) + raise + + if value_str and value_str[0] == "[" and value_str[-1] == "]": + value = [] + stack = [] + + list_str = value_str[1:-1] + + index = 0 + first_index = 0 + for index, character in enumerate(list_str): + if character == "{": + stack.append("}") + elif character == "(": + stack.append(")") + elif character in ("}", ")"): + assert stack[-1] == character + stack.pop() + + elif character == "," and not stack: + # Do not include the comma + value.append(parse_value(list_str[first_index:index])) + first_index = index + 1 + + assert not stack + + if index > first_index: + value.append(parse_value(list_str[first_index:])) + + return value + + if value_str and value_str[0] == "{" and value_str[-1] == "}": + value = {} + for token in value_str[1:-1].split(", "): + try: + token_key, token_value = token.split("\\=", maxsplit=1) + except ValueError: + print(f"The bad value is `{value_str}`") + print(f"The specific token is `{token}`") + raise + + value[parse_value(token_key)] = parse_value(token_value) + + return value + + if value_str == "true": + return True + + if value_str == "false": + return False + + return ESCAPE_RE.sub(r"\1\2", value_str.strip()) + + +def parse_config(config_str: str) -> dict: + "Parse a string of Java properties." + param_re = re.compile(r"^(?P\S+?[^\\])=(?P.*)$") + + def assign_value(closure, key, value): + if "." not in key: + # This needs to be def'd + if key != "json_object": + closure[key] = parse_value(value) + else: + local_key, remainder = key.split(".", maxsplit=1) + + if local_key not in closure: + closure[local_key] = {} + + assign_value(closure[local_key], remainder, value) + + config = {} + + for line in config_str.splitlines(): + line = line.strip() + if not line: + continue + + try: + key, value = param_re.match(line).groups() + except AttributeError: + print(f"The offending line is `{line}`") + raise + + assign_value(config, ESCAPE_RE.sub(r"\1\2", key), value) + + # Specifically sort the config + return json.loads(json.dumps(config, sort_keys=True)) From f8210ca71edad191f6135927837c4c76fc28644f Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Wed, 14 Feb 2024 14:38:13 -0800 Subject: [PATCH 2/7] Remove unnecessary dependencies from pom.xml --- run-nextflow-tests/pom.xml | 10 ---------- run-nextflow-tests/recalibrate-bam.json | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/run-nextflow-tests/pom.xml b/run-nextflow-tests/pom.xml index 5509548..5671a18 100644 --- a/run-nextflow-tests/pom.xml +++ b/run-nextflow-tests/pom.xml @@ -5,16 +5,6 @@ nextflow-config-validator 1 - - org.codehaus.groovy - groovy-test - 3.0.19 - - - com.github.stefanbirkner - system-rules - 1.19.0 - org.mockito mockito-core diff --git a/run-nextflow-tests/recalibrate-bam.json b/run-nextflow-tests/recalibrate-bam.json index ef5e98d..5837d1e 100644 --- a/run-nextflow-tests/recalibrate-bam.json +++ b/run-nextflow-tests/recalibrate-bam.json @@ -435,7 +435,7 @@ "withName:remove_unmerged_BAMs", "withName:remove_merged_BAM" ], - "proc_names": "[Ljava.lang.String;@7cf166db", + "proc_names": "[Ljava.lang.String;@55a29589", "process": { "cache": false, "containerOptions": { @@ -808,4 +808,4 @@ } } } -} \ No newline at end of file +} From cd08fa766f610388aaddc345bd47beb524977142 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 23 Feb 2024 16:52:40 -0800 Subject: [PATCH 3/7] Modify entry script to be generic, add call-gSV test --- run-nextflow-tests/call-gsv.json | 464 +++++++++++++++++++++++++++++++ run-nextflow-tests/entry.py | 11 +- 2 files changed, 469 insertions(+), 6 deletions(-) create mode 100644 run-nextflow-tests/call-gsv.json diff --git a/run-nextflow-tests/call-gsv.json b/run-nextflow-tests/call-gsv.json new file mode 100644 index 0000000..506a88a --- /dev/null +++ b/run-nextflow-tests/call-gsv.json @@ -0,0 +1,464 @@ +{ + "config": [ + "test/global.config", + "test/config/gsv_discovery-all-tools.config" + ], + "params_file": "test/yaml/gsv_test-std-input.yaml", + "cpus": 16, + "memory_gb": 31, + "empty_files": [], + "mapped_files": [], + "nf_params": { + "output_dir": "/tmp/outputs" + }, + "envvars": { + "SLURM_JOB_ID": "8543" + }, + "mocks": { + "check_path": "", + "parse_bam_header": { + "read_group": [ + { + "SM": "4915723" + } + ] + } + }, + "dated_fields": [ + "params.log_output_dir", + "report.file", + "timeline.file", + "trace.file", + "params.date" + ], + "expected_result": { + "docker": { + "all_group_ids": "$(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "enabled": true, + "runOptions": "-u $(id -u):$(id -g) $(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "uid_and_gid": "-u $(id -u):$(id -g)" + }, + "manifest": { + "author": "Yu Pan, Tim Sanders, Yael Berkovich, Mohammed Faizal Eeman Mootor", + "description": "A pipeline to call germline structural variants utilizing Delly and Manta", + "name": "call-gSV", + "version": "5.0.0-rc.1" + }, + "node_cpus": "16", + "node_memory_GB": "31", + "params": { + "GCNV": "gCNV", + "GSV": "gSV", + "RGCNV": "regenotyped-gCNV", + "RGSV": "regenotyped-gSV", + "bcftools_version": "1.15.1", + "blcds_registered_dataset": false, + "cache_intermediate_pipeline_steps": false, + "dataset_id": "TEST", + "dataset_registry_prefix": "/hot/data", + "date": "20240224T003911Z", + "delly_version": "1.2.6", + "docker_container_registry": "ghcr.io/uclahs-cds", + "docker_image_bcftools": "ghcr.io/uclahs-cds/bcftools:1.15.1", + "docker_image_delly": "ghcr.io/uclahs-cds/delly:1.2.6", + "docker_image_manta": "ghcr.io/uclahs-cds/manta:1.6.0", + "docker_image_rtgtools": "ghcr.io/uclahs-cds/rtg-tools:3.12", + "docker_image_validate": "ghcr.io/uclahs-cds/pipeval:4.0.0-rc.2", + "docker_image_vcftools": "ghcr.io/uclahs-cds/vcftools:0.1.16", + "exclusion_file": "/hot/ref/tool-specific-input/Delly/GRCh38/human.hg38.excl.tsv", + "input": { + "BAM": { + "normal": [ + "/hot/software/pipeline/pipeline-call-gSV/Nextflow/development/input/data/CPCG0196-B1-downsampled-a-partial-sorted.bam" + ] + } + }, + "log_output_dir": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723/log-call-gSV-5.0.0-rc.1-20240224T003911Z", + "manta_version": "1.6.0", + "map_qual": "20", + "mappability_map": "/hot/ref/tool-specific-input/Delly/GRCh38/Homo_sapiens.GRCh38.dna.primary_assembly.fa.r101.s501.blacklist.gz", + "max_cpus": "16", + "max_memory": "31 GB", + "output_dir": "/tmp/outputs", + "output_dir_base": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723", + "pipeval_version": "4.0.0-rc.2", + "reference_fasta": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "rtgtools_version": "3.12", + "run_delly": true, + "run_discovery": true, + "run_manta": true, + "run_qc": true, + "run_regenotyping": false, + "sample": "4915723", + "sample_to_process": [ + { + "id": "4915723", + "path": "/hot/software/pipeline/pipeline-call-gSV/Nextflow/development/input/data/CPCG0196-B1-downsampled-a-partial-sorted.bam", + "sample_type": "normal" + } + ], + "save_intermediate_files": false, + "ucla_cds": true, + "variant_type": [ + "gSV", + "gCNV" + ], + "vcftools_version": "0.1.16", + "work_dir": "/scratch/8543" + }, + "params_schema": { + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "exclusion_file": { + "help": "Absolute path to an exclusion file", + "mode": "r", + "required": true, + "type": "Path" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input BAM", + "required": true, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAM. Only to be used in exceptional use cases. Remove `normal` BAM if this is used.", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAM for germline SV/CNV calling", + "required": true, + "type": "InputBAMNamespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "map_qual": { + "default": "20", + "help": "Minimum Paired-End (PE) mapping quality for Delly", + "required": true, + "type": "Integer" + }, + "mappability_map": { + "help": "Absolute path to a mappability file", + "mode": "r", + "required": true, + "type": "Path" + }, + "merged_sites_gCNV": { + "help": "Absolute path to a BCF/VCF file containing merged gCNV sites across samples. Required when `run_regenotyping = true`.", + "mode": "r", + "required": false, + "type": "Path" + }, + "merged_sites_gSV": { + "help": "Absolute path to a BCF/VCF file containing merged gSV sites across samples. Required when `run_regenotyping = true`.", + "mode": "r", + "required": false, + "type": "Path" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "reference_fasta": { + "help": "Absolute path to a reference FASTA file", + "mode": "r", + "required": true, + "type": "Path" + }, + "run_delly": { + "default": [ + true + ], + "help": "Run Delly", + "required": true, + "type": "Bool" + }, + "run_discovery": { + "default": [ + true + ], + "help": "Identify SVs using Delly/Manta", + "required": true, + "type": "Bool" + }, + "run_manta": { + "default": [ + true + ], + "help": "Run Manta", + "required": true, + "type": "Bool" + }, + "run_qc": { + "default": [ + true + ], + "help": "Run QC", + "required": true, + "type": "Bool" + }, + "run_regenotyping": { + "default": [ + false + ], + "help": "Regenotype SVs/CNVs using Delly", + "required": true, + "type": "Bool" + }, + "save_intermediate_files": { + "default": [ + false + ], + "help": "Save intermediate files from the pipeline", + "required": true, + "type": "Bool" + }, + "variant_type": { + "choices": [ + "gSV", + "gCNV" + ], + "default": [ + "gSV", + "gCNV" + ], + "help": "List of germline variant types to be called", + "required": true, + "type": "List" + } + }, + "process": { + "cache": false, + "cpus": { + "1": "1", + "2": "2", + "3": "3", + "closure": "closure()" + }, + "echo": true, + "errorStrategy": { + "1": "finish", + "2": "finish", + "3": "finish", + "closure": "finish" + }, + "executor": "local", + "maxRetries": "1", + "withLabel:process_high": { + "cpus": { + "1": "12", + "2": "16", + "3": "16", + "closure": "closure()" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "closure()" + } + }, + "withLabel:process_low": { + "cpus": { + "1": "2", + "2": "4", + "3": "6", + "closure": "closure()" + }, + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + } + }, + "withLabel:process_medium": { + "cpus": { + "1": "6", + "2": "12", + "3": "16", + "closure": "closure()" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "closure()" + } + }, + "withName:call_gSV_Delly": { + "cpus": "1", + "memory": "20 GB" + }, + "withName:call_gSV_Manta": { + "cpus": "1", + "memory": "20 GB" + }, + "withName:run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "report": { + "enabled": true, + "file": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723/log-call-gSV-5.0.0-rc.1-20240224T003911Z/nextflow-log/report.html" + }, + "timeline": { + "enabled": true, + "file": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723/log-call-gSV-5.0.0-rc.1-20240224T003911Z/nextflow-log/timeline.html" + }, + "trace": { + "enabled": true, + "file": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723/log-call-gSV-5.0.0-rc.1-20240224T003911Z/nextflow-log/trace.txt" + }, + "tz": "sun.util.calendar.ZoneInfo[id=\"UTC\",offset=0,dstSavings=0,useDaylight=false,transitions=0,lastRule=null]", + "workDir": "/scratch/8543", + "yaml": { + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "exclusion_file": { + "help": "Absolute path to an exclusion file", + "mode": "r", + "required": true, + "type": "Path" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input BAM", + "required": true, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAM. Only to be used in exceptional use cases. Remove `normal` BAM if this is used.", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAM for germline SV/CNV calling", + "required": true, + "type": "InputBAMNamespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "map_qual": { + "default": "20", + "help": "Minimum Paired-End (PE) mapping quality for Delly", + "required": true, + "type": "Integer" + }, + "mappability_map": { + "help": "Absolute path to a mappability file", + "mode": "r", + "required": true, + "type": "Path" + }, + "merged_sites_gCNV": { + "help": "Absolute path to a BCF/VCF file containing merged gCNV sites across samples. Required when `run_regenotyping = true`.", + "mode": "r", + "required": false, + "type": "Path" + }, + "merged_sites_gSV": { + "help": "Absolute path to a BCF/VCF file containing merged gSV sites across samples. Required when `run_regenotyping = true`.", + "mode": "r", + "required": false, + "type": "Path" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "reference_fasta": { + "help": "Absolute path to a reference FASTA file", + "mode": "r", + "required": true, + "type": "Path" + }, + "run_delly": { + "default": [ + true + ], + "help": "Run Delly", + "required": true, + "type": "Bool" + }, + "run_discovery": { + "default": [ + true + ], + "help": "Identify SVs using Delly/Manta", + "required": true, + "type": "Bool" + }, + "run_manta": { + "default": [ + true + ], + "help": "Run Manta", + "required": true, + "type": "Bool" + }, + "run_qc": { + "default": [ + true + ], + "help": "Run QC", + "required": true, + "type": "Bool" + }, + "run_regenotyping": { + "default": [ + false + ], + "help": "Regenotype SVs/CNVs using Delly", + "required": true, + "type": "Bool" + }, + "save_intermediate_files": { + "default": [ + false + ], + "help": "Save intermediate files from the pipeline", + "required": true, + "type": "Bool" + }, + "variant_type": { + "choices": [ + "gSV", + "gCNV" + ], + "default": [ + "gSV", + "gCNV" + ], + "help": "List of germline variant types to be called", + "required": true, + "type": "List" + } + } + } +} diff --git a/run-nextflow-tests/entry.py b/run-nextflow-tests/entry.py index fc783d1..d18a4a0 100755 --- a/run-nextflow-tests/entry.py +++ b/run-nextflow-tests/entry.py @@ -7,11 +7,9 @@ from configtest import NextflowConfigTest -def run_recalibrate_bam_test(pipeline: Path): +def run_pipeline_test(pipeline: Path, test_case: Path): "Run the bundled pipeline-recalibrate-BAM test." - testobj = NextflowConfigTest.from_file( - Path(__file__).resolve().parent / "recalibrate-bam.json" - ) + testobj = NextflowConfigTest.from_file(test_case) if testobj.check_results(pipeline): print("No changes!") @@ -21,7 +19,8 @@ def run_recalibrate_bam_test(pipeline: Path): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("recalibrate_bam_path") + parser.add_argument("pipeline_path") + parser.add_argument("test_path") args = parser.parse_args() - run_recalibrate_bam_test(Path(args.recalibrate_bam_path)) + run_pipeline_test(Path(args.pipeline_path), Path(args.test_path)) From a187b4e771da5882815f167cda5bf5bda2ac7680 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 23 Feb 2024 17:05:31 -0800 Subject: [PATCH 4/7] Add multiple test files for call-gSV --- .../{call-gsv.json => call-gsv-F16.json} | 0 run-nextflow-tests/call-gsv-F32.json | 464 ++++++++++++++++++ 2 files changed, 464 insertions(+) rename run-nextflow-tests/{call-gsv.json => call-gsv-F16.json} (100%) create mode 100644 run-nextflow-tests/call-gsv-F32.json diff --git a/run-nextflow-tests/call-gsv.json b/run-nextflow-tests/call-gsv-F16.json similarity index 100% rename from run-nextflow-tests/call-gsv.json rename to run-nextflow-tests/call-gsv-F16.json diff --git a/run-nextflow-tests/call-gsv-F32.json b/run-nextflow-tests/call-gsv-F32.json new file mode 100644 index 0000000..cb6ec86 --- /dev/null +++ b/run-nextflow-tests/call-gsv-F32.json @@ -0,0 +1,464 @@ +{ + "config": [ + "test/global.config", + "test/config/gsv_discovery-all-tools.config" + ], + "params_file": "test/yaml/gsv_test-std-input.yaml", + "cpus": 32, + "memory_gb": 64, + "empty_files": [], + "mapped_files": [], + "nf_params": { + "output_dir": "/tmp/outputs" + }, + "envvars": { + "SLURM_JOB_ID": "8543" + }, + "mocks": { + "check_path": "", + "parse_bam_header": { + "read_group": [ + { + "SM": "4915723" + } + ] + } + }, + "dated_fields": [ + "params.log_output_dir", + "report.file", + "timeline.file", + "trace.file", + "params.date" + ], + "expected_result": { + "docker": { + "all_group_ids": "$(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "enabled": true, + "runOptions": "-u $(id -u):$(id -g) $(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "uid_and_gid": "-u $(id -u):$(id -g)" + }, + "manifest": { + "author": "Yu Pan, Tim Sanders, Yael Berkovich, Mohammed Faizal Eeman Mootor", + "description": "A pipeline to call germline structural variants utilizing Delly and Manta", + "name": "call-gSV", + "version": "5.0.0-rc.1" + }, + "node_cpus": "32", + "node_memory_GB": "64", + "params": { + "GCNV": "gCNV", + "GSV": "gSV", + "RGCNV": "regenotyped-gCNV", + "RGSV": "regenotyped-gSV", + "bcftools_version": "1.15.1", + "blcds_registered_dataset": false, + "cache_intermediate_pipeline_steps": false, + "dataset_id": "TEST", + "dataset_registry_prefix": "/hot/data", + "date": "20240224T005807Z", + "delly_version": "1.2.6", + "docker_container_registry": "ghcr.io/uclahs-cds", + "docker_image_bcftools": "ghcr.io/uclahs-cds/bcftools:1.15.1", + "docker_image_delly": "ghcr.io/uclahs-cds/delly:1.2.6", + "docker_image_manta": "ghcr.io/uclahs-cds/manta:1.6.0", + "docker_image_rtgtools": "ghcr.io/uclahs-cds/rtg-tools:3.12", + "docker_image_validate": "ghcr.io/uclahs-cds/pipeval:4.0.0-rc.2", + "docker_image_vcftools": "ghcr.io/uclahs-cds/vcftools:0.1.16", + "exclusion_file": "/hot/ref/tool-specific-input/Delly/GRCh38/human.hg38.excl.tsv", + "input": { + "BAM": { + "normal": [ + "/hot/software/pipeline/pipeline-call-gSV/Nextflow/development/input/data/CPCG0196-B1-downsampled-a-partial-sorted.bam" + ] + } + }, + "log_output_dir": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723/log-call-gSV-5.0.0-rc.1-20240224T005807Z", + "manta_version": "1.6.0", + "map_qual": "20", + "mappability_map": "/hot/ref/tool-specific-input/Delly/GRCh38/Homo_sapiens.GRCh38.dna.primary_assembly.fa.r101.s501.blacklist.gz", + "max_cpus": "32", + "max_memory": "64 GB", + "output_dir": "/tmp/outputs", + "output_dir_base": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723", + "pipeval_version": "4.0.0-rc.2", + "reference_fasta": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "rtgtools_version": "3.12", + "run_delly": true, + "run_discovery": true, + "run_manta": true, + "run_qc": true, + "run_regenotyping": false, + "sample": "4915723", + "sample_to_process": [ + { + "id": "4915723", + "path": "/hot/software/pipeline/pipeline-call-gSV/Nextflow/development/input/data/CPCG0196-B1-downsampled-a-partial-sorted.bam", + "sample_type": "normal" + } + ], + "save_intermediate_files": false, + "ucla_cds": true, + "variant_type": [ + "gSV", + "gCNV" + ], + "vcftools_version": "0.1.16", + "work_dir": "/scratch/8543" + }, + "params_schema": { + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "exclusion_file": { + "help": "Absolute path to an exclusion file", + "mode": "r", + "required": true, + "type": "Path" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input BAM", + "required": true, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAM. Only to be used in exceptional use cases. Remove `normal` BAM if this is used.", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAM for germline SV/CNV calling", + "required": true, + "type": "InputBAMNamespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "map_qual": { + "default": "20", + "help": "Minimum Paired-End (PE) mapping quality for Delly", + "required": true, + "type": "Integer" + }, + "mappability_map": { + "help": "Absolute path to a mappability file", + "mode": "r", + "required": true, + "type": "Path" + }, + "merged_sites_gCNV": { + "help": "Absolute path to a BCF/VCF file containing merged gCNV sites across samples. Required when `run_regenotyping = true`.", + "mode": "r", + "required": false, + "type": "Path" + }, + "merged_sites_gSV": { + "help": "Absolute path to a BCF/VCF file containing merged gSV sites across samples. Required when `run_regenotyping = true`.", + "mode": "r", + "required": false, + "type": "Path" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "reference_fasta": { + "help": "Absolute path to a reference FASTA file", + "mode": "r", + "required": true, + "type": "Path" + }, + "run_delly": { + "default": [ + true + ], + "help": "Run Delly", + "required": true, + "type": "Bool" + }, + "run_discovery": { + "default": [ + true + ], + "help": "Identify SVs using Delly/Manta", + "required": true, + "type": "Bool" + }, + "run_manta": { + "default": [ + true + ], + "help": "Run Manta", + "required": true, + "type": "Bool" + }, + "run_qc": { + "default": [ + true + ], + "help": "Run QC", + "required": true, + "type": "Bool" + }, + "run_regenotyping": { + "default": [ + false + ], + "help": "Regenotype SVs/CNVs using Delly", + "required": true, + "type": "Bool" + }, + "save_intermediate_files": { + "default": [ + false + ], + "help": "Save intermediate files from the pipeline", + "required": true, + "type": "Bool" + }, + "variant_type": { + "choices": [ + "gSV", + "gCNV" + ], + "default": [ + "gSV", + "gCNV" + ], + "help": "List of germline variant types to be called", + "required": true, + "type": "List" + } + }, + "process": { + "cache": false, + "cpus": { + "1": "1", + "2": "2", + "3": "3", + "closure": "closure()" + }, + "echo": true, + "errorStrategy": { + "1": "finish", + "2": "finish", + "3": "finish", + "closure": "finish" + }, + "executor": "local", + "maxRetries": "1", + "withLabel:process_high": { + "cpus": { + "1": "12", + "2": "24", + "3": "32", + "closure": "closure()" + }, + "memory": { + "1": "64 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "closure()" + } + }, + "withLabel:process_low": { + "cpus": { + "1": "2", + "2": "4", + "3": "6", + "closure": "closure()" + }, + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + } + }, + "withLabel:process_medium": { + "cpus": { + "1": "6", + "2": "12", + "3": "18", + "closure": "closure()" + }, + "memory": { + "1": "42 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "closure()" + } + }, + "withName:call_gSV_Delly": { + "cpus": "1", + "memory": "30 GB" + }, + "withName:call_gSV_Manta": { + "cpus": "1", + "memory": "30 GB" + }, + "withName:run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "report": { + "enabled": true, + "file": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723/log-call-gSV-5.0.0-rc.1-20240224T003911Z/nextflow-log/report.html" + }, + "timeline": { + "enabled": true, + "file": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723/log-call-gSV-5.0.0-rc.1-20240224T003911Z/nextflow-log/timeline.html" + }, + "trace": { + "enabled": true, + "file": "/tmp/outputs/call-gSV-5.0.0-rc.1/4915723/log-call-gSV-5.0.0-rc.1-20240224T003911Z/nextflow-log/trace.txt" + }, + "tz": "sun.util.calendar.ZoneInfo[id=\"UTC\",offset=0,dstSavings=0,useDaylight=false,transitions=0,lastRule=null]", + "workDir": "/scratch/8543", + "yaml": { + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "exclusion_file": { + "help": "Absolute path to an exclusion file", + "mode": "r", + "required": true, + "type": "Path" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input BAM", + "required": true, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAM. Only to be used in exceptional use cases. Remove `normal` BAM if this is used.", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAM for germline SV/CNV calling", + "required": true, + "type": "InputBAMNamespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "map_qual": { + "default": "20", + "help": "Minimum Paired-End (PE) mapping quality for Delly", + "required": true, + "type": "Integer" + }, + "mappability_map": { + "help": "Absolute path to a mappability file", + "mode": "r", + "required": true, + "type": "Path" + }, + "merged_sites_gCNV": { + "help": "Absolute path to a BCF/VCF file containing merged gCNV sites across samples. Required when `run_regenotyping = true`.", + "mode": "r", + "required": false, + "type": "Path" + }, + "merged_sites_gSV": { + "help": "Absolute path to a BCF/VCF file containing merged gSV sites across samples. Required when `run_regenotyping = true`.", + "mode": "r", + "required": false, + "type": "Path" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "reference_fasta": { + "help": "Absolute path to a reference FASTA file", + "mode": "r", + "required": true, + "type": "Path" + }, + "run_delly": { + "default": [ + true + ], + "help": "Run Delly", + "required": true, + "type": "Bool" + }, + "run_discovery": { + "default": [ + true + ], + "help": "Identify SVs using Delly/Manta", + "required": true, + "type": "Bool" + }, + "run_manta": { + "default": [ + true + ], + "help": "Run Manta", + "required": true, + "type": "Bool" + }, + "run_qc": { + "default": [ + true + ], + "help": "Run QC", + "required": true, + "type": "Bool" + }, + "run_regenotyping": { + "default": [ + false + ], + "help": "Regenotype SVs/CNVs using Delly", + "required": true, + "type": "Bool" + }, + "save_intermediate_files": { + "default": [ + false + ], + "help": "Save intermediate files from the pipeline", + "required": true, + "type": "Bool" + }, + "variant_type": { + "choices": [ + "gSV", + "gCNV" + ], + "default": [ + "gSV", + "gCNV" + ], + "help": "List of germline variant types to be called", + "required": true, + "type": "List" + } + } + } +} From 73d3bb7d42aea06f123149fd24d93cc0cc742250 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 26 Feb 2024 09:23:50 -0800 Subject: [PATCH 5/7] Add tests for pipeline-call-sSNV --- run-nextflow-tests/call-sSNV-F16.json | 822 ++++++++++++++++++++++++++ run-nextflow-tests/call-sSNV-F32.json | 822 ++++++++++++++++++++++++++ run-nextflow-tests/entry.py | 2 +- 3 files changed, 1645 insertions(+), 1 deletion(-) create mode 100644 run-nextflow-tests/call-sSNV-F16.json create mode 100644 run-nextflow-tests/call-sSNV-F32.json diff --git a/run-nextflow-tests/call-sSNV-F16.json b/run-nextflow-tests/call-sSNV-F16.json new file mode 100644 index 0000000..31bd587 --- /dev/null +++ b/run-nextflow-tests/call-sSNV-F16.json @@ -0,0 +1,822 @@ +{ + "config": [ + "test/global.config", + "test/config/a_mini-all-tools.config" + ], + "params_file": "test/yaml/a_mini_n2-std-input.yaml", + "cpus": 16, + "memory_gb": 31, + "empty_files": [], + "mapped_files": [], + "nf_params": { + "output_dir": "/tmp/outputs" + }, + "envvars": { + "SLURM_JOB_ID": "8543" + }, + "mocks": { + "check_path": "", + "parse_bam_header": { + "read_group": [ + { + "SM": "4915723" + } + ] + } + }, + "dated_fields": [ + "params.log_output_dir", + "report.file", + "timeline.file", + "trace.file", + "params.date" + ], + "expected_result": { + "docker": { + "all_group_ids": "$(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "enabled": true, + "runOptions": "-u $(id -u):$(id -g) $(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "uid_and_gid": "-u $(id -u):$(id -g)" + }, + "manifest": { + "author": "Yuan Zhe (Caden) Bugh, Mao Tian, Sorel Fitz-Gibbon", + "homePage": "/~https://github.com/uclahs-cds/pipeline-call-sSNV", + "mainScript": "main.nf", + "name": "call-sSNV", + "nextflowVersion": ">=20.07.1", + "version": "8.0.0" + }, + "params": { + "BCFtools_version": "1.17", + "GATK_version": "4.5.0.0", + "MuSE_version": "2.0.4", + "algorithm": [ + "somaticsniper", + "strelka2", + "mutect2", + "muse" + ], + "bam_readcount_version": "0.8.0", + "bgzip_extra_args": "", + "cache_intermediate_pipeline_steps": false, + "call_ssnv_r_version": "dev", + "dataset_id": "TWGSAMIN", + "dbSNP": "/hot/ref/database/dbSNP-155/original/GRCh38/GCF_000001405.39.gz", + "docker_container_registry": "ghcr.io/uclahs-cds", + "docker_image_BCFtools": "ghcr.io/uclahs-cds/bcftools:1.17", + "docker_image_GATK": "broadinstitute/gatk:4.5.0.0", + "docker_image_MuSE": "ghcr.io/uclahs-cds/muse:2.0.4", + "docker_image_bam_readcount": "ghcr.io/uclahs-cds/bam-readcount:0.8.0", + "docker_image_manta": "ghcr.io/uclahs-cds/manta:1.6.0", + "docker_image_r_VennDiagram": "ghcr.io/uclahs-cds/call-ssnv-r:dev", + "docker_image_somaticsniper": "ghcr.io/uclahs-cds/somaticsniper:1.0.5.0", + "docker_image_strelka2": "ghcr.io/uclahs-cds/strelka2:2.9.10", + "docker_image_ubuntu": "ubuntu:20.04", + "docker_image_validate_params": "ghcr.io/uclahs-cds/pipeval:4.0.0-rc.2", + "docker_image_vcf2maf": "ghcr.io/mskcc/vcf2maf/vcf2maf:v1.6.18", + "exome": false, + "filter_mutect_calls_extra_args": "", + "gatk_command_mem_diff": "500 MB", + "germline": true, + "germline_resource_gnomad_vcf": "/hot/ref/tool-specific-input/GATK/GRCh38/af-only-gnomad.hg38.vcf.gz", + "germline_resource_gnomad_vcf_index": "/hot/ref/tool-specific-input/GATK/GRCh38/af-only-gnomad.hg38.vcf.gz.tbi", + "input": { + "normal": [ + { + "BAM": "/hot/resource/SMC-HET/normal/bams/A-mini/n2/output/HG002.N-n2.bam" + } + ], + "tumor": [ + { + "BAM": "/hot/resource/SMC-HET/tumours/A-mini/bams/n2/output/S2.T-n2.bam", + "contamination_table": "/hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/input/data/A-mini/S2.T-n2_getpileupsummaries_calculatecontamination.table" + } + ] + }, + "intersect_regions": "/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/Homo_sapiens_assembly38_no-decoy.bed.gz", + "intersect_regions_index": "/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/Homo_sapiens_assembly38_no-decoy.bed.gz.tbi", + "log_output_dir": "/tmp/outputs/call-sSNV-8.0.0/4915723/log-call-sSNV-8.0.0-20240226T172135Z", + "manta_version": "1.6.0", + "max_cpus": "16", + "max_memory": "31 GB", + "min_cpus": "1", + "min_memory": "1 MB", + "mutect2_extra_args": "", + "ncbi_build": "GRCh38", + "normal_id": "4915723", + "output_dir": "/tmp/outputs", + "output_dir_base": "/tmp/outputs/call-sSNV-8.0.0/4915723", + "patient_id": "TWGSAMIN000001", + "pipeval_version": "4.0.0-rc.2", + "proc_resource_params": { + "call_sIndel_Manta": { + "cpus": "6", + "memory": "6 GB", + "retry_strategy": { + "memory": { + "operand": "3 GB", + "strategy": "add" + } + } + }, + "call_sSNV_MuSE": { + "cpus": "6", + "memory": "24 GB", + "retry_strategy": { + "memory": { + "operand": "8 GB", + "strategy": "add" + } + } + }, + "call_sSNV_Mutect2": { + "cpus": "1", + "memory": "3 GB", + "retry_strategy": { + "memory": { + "operand": "2 GB", + "strategy": "add" + } + } + }, + "call_sSNV_SomaticSniper": { + "cpus": "1", + "memory": "1 GB", + "retry_strategy": { + "memory": { + "operand": "3 GB", + "strategy": "add" + } + } + }, + "call_sSNV_Strelka2": { + "cpus": "6", + "ext": { + "retry_codes": [] + }, + "memory": "2 GB", + "retry_strategy": { + "memory": { + "operand": "12 GB", + "strategy": "add" + } + } + }, + "concat_VCFs_BCFtools": { + "cpus": "2", + "memory": "5 GB", + "retry_strategy": { + "memory": { + "operand": "10 GB", + "strategy": "add" + } + } + }, + "convert_BAM2Pileup_SAMtools": { + "cpus": "1", + "memory": "1 GB", + "retry_strategy": { + "memory": { + "operand": "3 GB", + "strategy": "add" + } + } + }, + "create_IndelCandidate_SAMtools": { + "cpus": "1", + "memory": "1 GB", + "retry_strategy": { + "memory": { + "operand": "3 GB", + "strategy": "add" + } + } + }, + "plot_VennDiagram_R": { + "cpus": "2", + "memory": "5 GB", + "retry_strategy": { + "memory": { + "operand": "10 GB", + "strategy": "add" + } + } + }, + "run_LearnReadOrientationModel_GATK": { + "cpus": "1", + "memory": "8 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_sump_MuSE": { + "cpus": "8", + "memory": "24 GB", + "retry_strategy": { + "memory": { + "operand": "8 GB", + "strategy": "add" + } + } + }, + "run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "reference": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "sample_id": "4915723", + "samples_to_process": [ + { + "contamination_table": "null", + "id": "4915723", + "orig_id": "4915723", + "path": "/hot/resource/SMC-HET/normal/bams/A-mini/n2/output/HG002.N-n2.bam", + "sample_type": "normal" + }, + "{orig_id=4915723, id=4915723, path=/hot/resource/SMC-HET/tumours/A-mini/bams/n2/output/S2.T-n2.bam, contamination_table=/hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/input/data/A-mini/S2.T-n2_getpileupsummaries_calculatecontamination.table, sample_type=tumor}" + ], + "save_intermediate_files": true, + "scatter_count": "50", + "single_NT_paired": true, + "somaticsniper_version": "1.0.5.0", + "split_intervals_extra_args": "", + "strelka2_version": "2.9.10", + "tabix_extra_args": "", + "tumor_id": "4915723", + "ubuntu_version": "20.04", + "ucla_cds": true, + "use_intersect_regions": true, + "vcf2maf_extra_args": "", + "vcf2maf_version": "v1.6.18", + "work_dir": "/scratch/8543" + }, + "params_schema": { + "algorithm": { + "choices": [ + "mutect2", + "somaticsniper", + "strelka2", + "muse" + ], + "default": [ + "mutect2", + "somaticsniper", + "strelka2", + "muse" + ], + "help": "List of sSNV algorithms", + "required": true, + "type": "List" + }, + "base_resource_update": { + "elements": { + "cpus": { + "help": "List of CPU updates", + "required": false, + "type": "ResourceUpdateList" + }, + "memory": { + "help": "List of memory updates", + "required": false, + "type": "ResourceUpdateList" + } + }, + "help": "User-defined modifications for adjusting base resource allocations for processes", + "required": false, + "type": "ResourceUpdateNamespace" + }, + "bgzip_extra_args": { + "allow_empty": true, + "default": "", + "help": "Additional arguments for bgzip command", + "required": false, + "type": "String" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "dbSNP": { + "help": "", + "mode": "r", + "required": true, + "type": "Path" + }, + "exome": { + "default": false, + "help": "The exome option when running manta and strelka2", + "required": false, + "type": "Bool" + }, + "filter_mutect_calls_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the FilterMutectCalls command", + "required": false, + "type": "String" + }, + "germline_resource_gnomad_vcf": { + "allow_empty": true, + "help": "", + "mode": "r", + "required": false, + "type": "Path" + }, + "input": { + "elements": { + "normal": { + "elements": { + "BAM": { + "help": "Absolute path to normal sample BAM files", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "Normal id/path input", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "elements": { + "BAM": { + "help": "Absolute path to tumor sample BAM files", + "mode": "r", + "required": true, + "type": "Path" + }, + "contamination_table": { + "help": "Absolute path to contamination.tables", + "mode": "r", + "required": false, + "type": "Path" + } + }, + "help": "Tumor id/path input", + "required": true, + "type": "BAMEntryList" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "intersect_regions": { + "help": "call regions bed file used by mutect2, strelka2 and intersect", + "mode": "r", + "required": true, + "type": "Path" + }, + "mutect2_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the Mutect2 command", + "required": false, + "type": "String" + }, + "ncbi_build": { + "help": "NCBI build version, e.g. GRCh38", + "required": true, + "type": "String" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient identifier", + "required": true, + "type": "String" + }, + "reference": { + "help": "Absolute path to reference directory", + "mode": "r", + "required": true, + "type": "Path" + }, + "save_intermediate_files": { + "default": false, + "help": "The option to save the intermediate files", + "required": false, + "type": "Bool" + }, + "scatter_count": { + "help": "", + "required": true, + "type": "Integer" + }, + "split_intervals_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the SplitIntervals command", + "required": false, + "type": "String" + }, + "tabix_extra_args": { + "allow_empty": true, + "default": "", + "help": "Additional arguments for tabix command", + "required": false, + "type": "String" + }, + "vcf2maf_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the vcf2maf command", + "required": false, + "type": "String" + }, + "work_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": false, + "type": "Path" + } + }, + "proc_names": "[Ljava.lang.String;@5984742f", + "process": { + "cache": false, + "commonRetryCodes": [ + "104", + "134", + "137", + "139", + "143", + "247" + ], + "containerOptions": { + "1": "--cpu-shares 1024 --cpus $task.cpus", + "2": "--cpu-shares 1024 --cpus $task.cpus", + "3": "--cpu-shares 1024 --cpus $task.cpus", + "closure": "--cpu-shares 1024 --cpus $task.cpus" + }, + "cpus": { + "1": "1", + "2": "2", + "3": "3", + "closure": "closure()" + }, + "errorStrategy": { + "1": "terminate", + "2": "terminate", + "3": "terminate", + "closure": "terminate" + }, + "executor": "local", + "maxRetries": "1", + "withLabel:process_high": { + "cpus": { + "1": "12", + "2": "16", + "3": "16", + "closure": "closure()" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "closure()" + } + }, + "withLabel:process_low": { + "cpus": { + "1": "2", + "2": "4", + "3": "6", + "closure": "closure()" + }, + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + } + }, + "withLabel:process_medium": { + "cpus": { + "1": "6", + "2": "12", + "3": "16", + "closure": "closure()" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "closure()" + } + }, + "withName:call_sIndel_Manta": { + "cpus": "6", + "memory": { + "1": "6 GB", + "2": "9 GB", + "3": "12 GB", + "closure": "retry_updater(6 GB, add, 3 GB, $task.attempt, memory)" + } + }, + "withName:call_sSNV_MuSE": { + "cpus": "6", + "memory": { + "1": "24 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "retry_updater(24 GB, add, 8 GB, $task.attempt, memory)" + } + }, + "withName:call_sSNV_Mutect2": { + "cpus": "1", + "memory": { + "1": "3 GB", + "2": "5 GB", + "3": "7 GB", + "closure": "retry_updater(3 GB, add, 2 GB, $task.attempt, memory)" + } + }, + "withName:call_sSNV_SomaticSniper": { + "cpus": "1", + "memory": { + "1": "1 GB", + "2": "4 GB", + "3": "7 GB", + "closure": "retry_updater(1 GB, add, 3 GB, $task.attempt, memory)" + } + }, + "withName:call_sSNV_Strelka2": { + "cpus": "6", + "ext": { + "retry_codes": [] + }, + "memory": { + "1": "2 GB", + "2": "14 GB", + "3": "26 GB", + "closure": "retry_updater(2 GB, add, 12 GB, $task.attempt, memory)" + } + }, + "withName:concat_VCFs_BCFtools": { + "cpus": "2", + "memory": { + "1": "5 GB", + "2": "15 GB", + "3": "25 GB", + "closure": "retry_updater(5 GB, add, 10 GB, $task.attempt, memory)" + } + }, + "withName:convert_BAM2Pileup_SAMtools": { + "cpus": "1", + "memory": { + "1": "1 GB", + "2": "4 GB", + "3": "7 GB", + "closure": "retry_updater(1 GB, add, 3 GB, $task.attempt, memory)" + } + }, + "withName:create_IndelCandidate_SAMtools": { + "cpus": "1", + "memory": { + "1": "1 GB", + "2": "4 GB", + "3": "7 GB", + "closure": "retry_updater(1 GB, add, 3 GB, $task.attempt, memory)" + } + }, + "withName:plot_VennDiagram_R": { + "cpus": "2", + "memory": { + "1": "5 GB", + "2": "15 GB", + "3": "25 GB", + "closure": "retry_updater(5 GB, add, 10 GB, $task.attempt, memory)" + } + }, + "withName:run_LearnReadOrientationModel_GATK": { + "cpus": "1", + "memory": { + "1": "8 GB", + "2": "16 GB", + "3": "31 GB", + "closure": "retry_updater(8 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_sump_MuSE": { + "cpus": "8", + "memory": { + "1": "24 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "retry_updater(24 GB, add, 8 GB, $task.attempt, memory)" + } + }, + "withName:run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "report": { + "enabled": true, + "file": "/tmp/outputs/call-sSNV-8.0.0/4915723/log-call-sSNV-8.0.0-20240226T172135Z/nextflow-log/report.html" + }, + "sm_tag": "4915723", + "timeline": { + "enabled": true, + "file": "/tmp/outputs/call-sSNV-8.0.0/4915723/log-call-sSNV-8.0.0-20240226T172135Z/nextflow-log/timeline.html" + }, + "trace": { + "enabled": true, + "file": "/tmp/outputs/call-sSNV-8.0.0/4915723/log-call-sSNV-8.0.0-20240226T172135Z/nextflow-log/trace.txt" + }, + "valid_algorithms": [ + "somaticsniper", + "strelka2", + "mutect2", + "muse" + ], + "workDir": "/scratch/8543", + "yaml": { + "algorithm": { + "choices": [ + "mutect2", + "somaticsniper", + "strelka2", + "muse" + ], + "default": [ + "mutect2", + "somaticsniper", + "strelka2", + "muse" + ], + "help": "List of sSNV algorithms", + "required": true, + "type": "List" + }, + "base_resource_update": { + "elements": { + "cpus": { + "help": "List of CPU updates", + "required": false, + "type": "ResourceUpdateList" + }, + "memory": { + "help": "List of memory updates", + "required": false, + "type": "ResourceUpdateList" + } + }, + "help": "User-defined modifications for adjusting base resource allocations for processes", + "required": false, + "type": "ResourceUpdateNamespace" + }, + "bgzip_extra_args": { + "allow_empty": true, + "default": "", + "help": "Additional arguments for bgzip command", + "required": false, + "type": "String" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "dbSNP": { + "help": "", + "mode": "r", + "required": true, + "type": "Path" + }, + "exome": { + "default": false, + "help": "The exome option when running manta and strelka2", + "required": false, + "type": "Bool" + }, + "filter_mutect_calls_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the FilterMutectCalls command", + "required": false, + "type": "String" + }, + "germline_resource_gnomad_vcf": { + "allow_empty": true, + "help": "", + "mode": "r", + "required": false, + "type": "Path" + }, + "input": { + "elements": { + "normal": { + "elements": { + "BAM": { + "help": "Absolute path to normal sample BAM files", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "Normal id/path input", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "elements": { + "BAM": { + "help": "Absolute path to tumor sample BAM files", + "mode": "r", + "required": true, + "type": "Path" + }, + "contamination_table": { + "help": "Absolute path to contamination.tables", + "mode": "r", + "required": false, + "type": "Path" + } + }, + "help": "Tumor id/path input", + "required": true, + "type": "BAMEntryList" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "intersect_regions": { + "help": "call regions bed file used by mutect2, strelka2 and intersect", + "mode": "r", + "required": true, + "type": "Path" + }, + "mutect2_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the Mutect2 command", + "required": false, + "type": "String" + }, + "ncbi_build": { + "help": "NCBI build version, e.g. GRCh38", + "required": true, + "type": "String" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient identifier", + "required": true, + "type": "String" + }, + "reference": { + "help": "Absolute path to reference directory", + "mode": "r", + "required": true, + "type": "Path" + }, + "save_intermediate_files": { + "default": false, + "help": "The option to save the intermediate files", + "required": false, + "type": "Bool" + }, + "scatter_count": { + "help": "", + "required": true, + "type": "Integer" + }, + "split_intervals_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the SplitIntervals command", + "required": false, + "type": "String" + }, + "tabix_extra_args": { + "allow_empty": true, + "default": "", + "help": "Additional arguments for tabix command", + "required": false, + "type": "String" + }, + "vcf2maf_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the vcf2maf command", + "required": false, + "type": "String" + }, + "work_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": false, + "type": "Path" + } + } + } +} \ No newline at end of file diff --git a/run-nextflow-tests/call-sSNV-F32.json b/run-nextflow-tests/call-sSNV-F32.json new file mode 100644 index 0000000..9b6c6d9 --- /dev/null +++ b/run-nextflow-tests/call-sSNV-F32.json @@ -0,0 +1,822 @@ +{ + "config": [ + "test/global.config", + "test/config/a_mini-all-tools.config" + ], + "params_file": "test/yaml/a_mini_n2-std-input.yaml", + "cpus": 32, + "memory_gb": 64, + "empty_files": [], + "mapped_files": [], + "nf_params": { + "output_dir": "/tmp/outputs" + }, + "envvars": { + "SLURM_JOB_ID": "8543" + }, + "mocks": { + "check_path": "", + "parse_bam_header": { + "read_group": [ + { + "SM": "4915723" + } + ] + } + }, + "dated_fields": [ + "params.log_output_dir", + "report.file", + "timeline.file", + "trace.file", + "params.date" + ], + "expected_result": { + "docker": { + "all_group_ids": "$(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "enabled": true, + "runOptions": "-u $(id -u):$(id -g) $(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "uid_and_gid": "-u $(id -u):$(id -g)" + }, + "manifest": { + "author": "Yuan Zhe (Caden) Bugh, Mao Tian, Sorel Fitz-Gibbon", + "homePage": "/~https://github.com/uclahs-cds/pipeline-call-sSNV", + "mainScript": "main.nf", + "name": "call-sSNV", + "nextflowVersion": ">=20.07.1", + "version": "8.0.0" + }, + "params": { + "BCFtools_version": "1.17", + "GATK_version": "4.5.0.0", + "MuSE_version": "2.0.4", + "algorithm": [ + "somaticsniper", + "strelka2", + "mutect2", + "muse" + ], + "bam_readcount_version": "0.8.0", + "bgzip_extra_args": "", + "cache_intermediate_pipeline_steps": false, + "call_ssnv_r_version": "dev", + "dataset_id": "TWGSAMIN", + "dbSNP": "/hot/ref/database/dbSNP-155/original/GRCh38/GCF_000001405.39.gz", + "docker_container_registry": "ghcr.io/uclahs-cds", + "docker_image_BCFtools": "ghcr.io/uclahs-cds/bcftools:1.17", + "docker_image_GATK": "broadinstitute/gatk:4.5.0.0", + "docker_image_MuSE": "ghcr.io/uclahs-cds/muse:2.0.4", + "docker_image_bam_readcount": "ghcr.io/uclahs-cds/bam-readcount:0.8.0", + "docker_image_manta": "ghcr.io/uclahs-cds/manta:1.6.0", + "docker_image_r_VennDiagram": "ghcr.io/uclahs-cds/call-ssnv-r:dev", + "docker_image_somaticsniper": "ghcr.io/uclahs-cds/somaticsniper:1.0.5.0", + "docker_image_strelka2": "ghcr.io/uclahs-cds/strelka2:2.9.10", + "docker_image_ubuntu": "ubuntu:20.04", + "docker_image_validate_params": "ghcr.io/uclahs-cds/pipeval:4.0.0-rc.2", + "docker_image_vcf2maf": "ghcr.io/mskcc/vcf2maf/vcf2maf:v1.6.18", + "exome": false, + "filter_mutect_calls_extra_args": "", + "gatk_command_mem_diff": "500 MB", + "germline": true, + "germline_resource_gnomad_vcf": "/hot/ref/tool-specific-input/GATK/GRCh38/af-only-gnomad.hg38.vcf.gz", + "germline_resource_gnomad_vcf_index": "/hot/ref/tool-specific-input/GATK/GRCh38/af-only-gnomad.hg38.vcf.gz.tbi", + "input": { + "normal": [ + { + "BAM": "/hot/resource/SMC-HET/normal/bams/A-mini/n2/output/HG002.N-n2.bam" + } + ], + "tumor": [ + { + "BAM": "/hot/resource/SMC-HET/tumours/A-mini/bams/n2/output/S2.T-n2.bam", + "contamination_table": "/hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/input/data/A-mini/S2.T-n2_getpileupsummaries_calculatecontamination.table" + } + ] + }, + "intersect_regions": "/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/Homo_sapiens_assembly38_no-decoy.bed.gz", + "intersect_regions_index": "/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/Homo_sapiens_assembly38_no-decoy.bed.gz.tbi", + "log_output_dir": "/tmp/outputs/call-sSNV-8.0.0/4915723/log-call-sSNV-8.0.0-20240226T172259Z", + "manta_version": "1.6.0", + "max_cpus": "32", + "max_memory": "64 GB", + "min_cpus": "1", + "min_memory": "1 MB", + "mutect2_extra_args": "", + "ncbi_build": "GRCh38", + "normal_id": "4915723", + "output_dir": "/tmp/outputs", + "output_dir_base": "/tmp/outputs/call-sSNV-8.0.0/4915723", + "patient_id": "TWGSAMIN000001", + "pipeval_version": "4.0.0-rc.2", + "proc_resource_params": { + "call_sIndel_Manta": { + "cpus": "8", + "memory": "6 GB", + "retry_strategy": { + "memory": { + "operand": "5 GB", + "strategy": "add" + } + } + }, + "call_sSNV_MuSE": { + "cpus": "12", + "memory": "48 GB", + "retry_strategy": { + "memory": { + "operand": "16 GB", + "strategy": "add" + } + } + }, + "call_sSNV_Mutect2": { + "cpus": "1", + "memory": "3 GB", + "retry_strategy": { + "memory": { + "operand": "3 GB", + "strategy": "add" + } + } + }, + "call_sSNV_SomaticSniper": { + "cpus": "1", + "memory": "1 GB", + "retry_strategy": { + "memory": { + "operand": "5 GB", + "strategy": "add" + } + } + }, + "call_sSNV_Strelka2": { + "cpus": "8", + "ext": { + "retry_codes": [] + }, + "memory": "2 GB", + "retry_strategy": { + "memory": { + "operand": "12 GB", + "strategy": "add" + } + } + }, + "concat_VCFs_BCFtools": { + "cpus": "2", + "memory": "5 GB", + "retry_strategy": { + "memory": { + "operand": "10 GB", + "strategy": "add" + } + } + }, + "convert_BAM2Pileup_SAMtools": { + "cpus": "1", + "memory": "1 GB", + "retry_strategy": { + "memory": { + "operand": "5 GB", + "strategy": "add" + } + } + }, + "create_IndelCandidate_SAMtools": { + "cpus": "1", + "memory": "1 GB", + "retry_strategy": { + "memory": { + "operand": "5 GB", + "strategy": "add" + } + } + }, + "plot_VennDiagram_R": { + "cpus": "2", + "memory": "5 GB", + "retry_strategy": { + "memory": { + "operand": "10 GB", + "strategy": "add" + } + } + }, + "run_LearnReadOrientationModel_GATK": { + "cpus": "1", + "memory": "16 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_sump_MuSE": { + "cpus": "12", + "memory": "48 GB", + "retry_strategy": { + "memory": { + "operand": "16 GB", + "strategy": "add" + } + } + }, + "run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "reference": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "sample_id": "4915723", + "samples_to_process": [ + { + "contamination_table": "null", + "id": "4915723", + "orig_id": "4915723", + "path": "/hot/resource/SMC-HET/normal/bams/A-mini/n2/output/HG002.N-n2.bam", + "sample_type": "normal" + }, + "{orig_id=4915723, id=4915723, path=/hot/resource/SMC-HET/tumours/A-mini/bams/n2/output/S2.T-n2.bam, contamination_table=/hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/input/data/A-mini/S2.T-n2_getpileupsummaries_calculatecontamination.table, sample_type=tumor}" + ], + "save_intermediate_files": true, + "scatter_count": "50", + "single_NT_paired": true, + "somaticsniper_version": "1.0.5.0", + "split_intervals_extra_args": "", + "strelka2_version": "2.9.10", + "tabix_extra_args": "", + "tumor_id": "4915723", + "ubuntu_version": "20.04", + "ucla_cds": true, + "use_intersect_regions": true, + "vcf2maf_extra_args": "", + "vcf2maf_version": "v1.6.18", + "work_dir": "/scratch/8543" + }, + "params_schema": { + "algorithm": { + "choices": [ + "mutect2", + "somaticsniper", + "strelka2", + "muse" + ], + "default": [ + "mutect2", + "somaticsniper", + "strelka2", + "muse" + ], + "help": "List of sSNV algorithms", + "required": true, + "type": "List" + }, + "base_resource_update": { + "elements": { + "cpus": { + "help": "List of CPU updates", + "required": false, + "type": "ResourceUpdateList" + }, + "memory": { + "help": "List of memory updates", + "required": false, + "type": "ResourceUpdateList" + } + }, + "help": "User-defined modifications for adjusting base resource allocations for processes", + "required": false, + "type": "ResourceUpdateNamespace" + }, + "bgzip_extra_args": { + "allow_empty": true, + "default": "", + "help": "Additional arguments for bgzip command", + "required": false, + "type": "String" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "dbSNP": { + "help": "", + "mode": "r", + "required": true, + "type": "Path" + }, + "exome": { + "default": false, + "help": "The exome option when running manta and strelka2", + "required": false, + "type": "Bool" + }, + "filter_mutect_calls_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the FilterMutectCalls command", + "required": false, + "type": "String" + }, + "germline_resource_gnomad_vcf": { + "allow_empty": true, + "help": "", + "mode": "r", + "required": false, + "type": "Path" + }, + "input": { + "elements": { + "normal": { + "elements": { + "BAM": { + "help": "Absolute path to normal sample BAM files", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "Normal id/path input", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "elements": { + "BAM": { + "help": "Absolute path to tumor sample BAM files", + "mode": "r", + "required": true, + "type": "Path" + }, + "contamination_table": { + "help": "Absolute path to contamination.tables", + "mode": "r", + "required": false, + "type": "Path" + } + }, + "help": "Tumor id/path input", + "required": true, + "type": "BAMEntryList" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "intersect_regions": { + "help": "call regions bed file used by mutect2, strelka2 and intersect", + "mode": "r", + "required": true, + "type": "Path" + }, + "mutect2_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the Mutect2 command", + "required": false, + "type": "String" + }, + "ncbi_build": { + "help": "NCBI build version, e.g. GRCh38", + "required": true, + "type": "String" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient identifier", + "required": true, + "type": "String" + }, + "reference": { + "help": "Absolute path to reference directory", + "mode": "r", + "required": true, + "type": "Path" + }, + "save_intermediate_files": { + "default": false, + "help": "The option to save the intermediate files", + "required": false, + "type": "Bool" + }, + "scatter_count": { + "help": "", + "required": true, + "type": "Integer" + }, + "split_intervals_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the SplitIntervals command", + "required": false, + "type": "String" + }, + "tabix_extra_args": { + "allow_empty": true, + "default": "", + "help": "Additional arguments for tabix command", + "required": false, + "type": "String" + }, + "vcf2maf_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the vcf2maf command", + "required": false, + "type": "String" + }, + "work_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": false, + "type": "Path" + } + }, + "proc_names": "[Ljava.lang.String;@5984742f", + "process": { + "cache": false, + "commonRetryCodes": [ + "104", + "134", + "137", + "139", + "143", + "247" + ], + "containerOptions": { + "1": "--cpu-shares 1024 --cpus $task.cpus", + "2": "--cpu-shares 1024 --cpus $task.cpus", + "3": "--cpu-shares 1024 --cpus $task.cpus", + "closure": "--cpu-shares 1024 --cpus $task.cpus" + }, + "cpus": { + "1": "1", + "2": "2", + "3": "3", + "closure": "closure()" + }, + "errorStrategy": { + "1": "terminate", + "2": "terminate", + "3": "terminate", + "closure": "terminate" + }, + "executor": "local", + "maxRetries": "1", + "withLabel:process_high": { + "cpus": { + "1": "12", + "2": "24", + "3": "32", + "closure": "closure()" + }, + "memory": { + "1": "64 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "closure()" + } + }, + "withLabel:process_low": { + "cpus": { + "1": "2", + "2": "4", + "3": "6", + "closure": "closure()" + }, + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + } + }, + "withLabel:process_medium": { + "cpus": { + "1": "6", + "2": "12", + "3": "18", + "closure": "closure()" + }, + "memory": { + "1": "42 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "closure()" + } + }, + "withName:call_sIndel_Manta": { + "cpus": "8", + "memory": { + "1": "6 GB", + "2": "11 GB", + "3": "16 GB", + "closure": "retry_updater(6 GB, add, 5 GB, $task.attempt, memory)" + } + }, + "withName:call_sSNV_MuSE": { + "cpus": "12", + "memory": { + "1": "48 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "retry_updater(48 GB, add, 16 GB, $task.attempt, memory)" + } + }, + "withName:call_sSNV_Mutect2": { + "cpus": "1", + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "retry_updater(3 GB, add, 3 GB, $task.attempt, memory)" + } + }, + "withName:call_sSNV_SomaticSniper": { + "cpus": "1", + "memory": { + "1": "1 GB", + "2": "6 GB", + "3": "11 GB", + "closure": "retry_updater(1 GB, add, 5 GB, $task.attempt, memory)" + } + }, + "withName:call_sSNV_Strelka2": { + "cpus": "8", + "ext": { + "retry_codes": [] + }, + "memory": { + "1": "2 GB", + "2": "14 GB", + "3": "26 GB", + "closure": "retry_updater(2 GB, add, 12 GB, $task.attempt, memory)" + } + }, + "withName:concat_VCFs_BCFtools": { + "cpus": "2", + "memory": { + "1": "5 GB", + "2": "15 GB", + "3": "25 GB", + "closure": "retry_updater(5 GB, add, 10 GB, $task.attempt, memory)" + } + }, + "withName:convert_BAM2Pileup_SAMtools": { + "cpus": "1", + "memory": { + "1": "1 GB", + "2": "6 GB", + "3": "11 GB", + "closure": "retry_updater(1 GB, add, 5 GB, $task.attempt, memory)" + } + }, + "withName:create_IndelCandidate_SAMtools": { + "cpus": "1", + "memory": { + "1": "1 GB", + "2": "6 GB", + "3": "11 GB", + "closure": "retry_updater(1 GB, add, 5 GB, $task.attempt, memory)" + } + }, + "withName:plot_VennDiagram_R": { + "cpus": "2", + "memory": { + "1": "5 GB", + "2": "15 GB", + "3": "25 GB", + "closure": "retry_updater(5 GB, add, 10 GB, $task.attempt, memory)" + } + }, + "withName:run_LearnReadOrientationModel_GATK": { + "cpus": "1", + "memory": { + "1": "16 GB", + "2": "32 GB", + "3": "64 GB", + "closure": "retry_updater(16 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_sump_MuSE": { + "cpus": "12", + "memory": { + "1": "48 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "retry_updater(48 GB, add, 16 GB, $task.attempt, memory)" + } + }, + "withName:run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "report": { + "enabled": true, + "file": "/tmp/outputs/call-sSNV-8.0.0/4915723/log-call-sSNV-8.0.0-20240226T172259Z/nextflow-log/report.html" + }, + "sm_tag": "4915723", + "timeline": { + "enabled": true, + "file": "/tmp/outputs/call-sSNV-8.0.0/4915723/log-call-sSNV-8.0.0-20240226T172259Z/nextflow-log/timeline.html" + }, + "trace": { + "enabled": true, + "file": "/tmp/outputs/call-sSNV-8.0.0/4915723/log-call-sSNV-8.0.0-20240226T172259Z/nextflow-log/trace.txt" + }, + "valid_algorithms": [ + "somaticsniper", + "strelka2", + "mutect2", + "muse" + ], + "workDir": "/scratch/8543", + "yaml": { + "algorithm": { + "choices": [ + "mutect2", + "somaticsniper", + "strelka2", + "muse" + ], + "default": [ + "mutect2", + "somaticsniper", + "strelka2", + "muse" + ], + "help": "List of sSNV algorithms", + "required": true, + "type": "List" + }, + "base_resource_update": { + "elements": { + "cpus": { + "help": "List of CPU updates", + "required": false, + "type": "ResourceUpdateList" + }, + "memory": { + "help": "List of memory updates", + "required": false, + "type": "ResourceUpdateList" + } + }, + "help": "User-defined modifications for adjusting base resource allocations for processes", + "required": false, + "type": "ResourceUpdateNamespace" + }, + "bgzip_extra_args": { + "allow_empty": true, + "default": "", + "help": "Additional arguments for bgzip command", + "required": false, + "type": "String" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "dbSNP": { + "help": "", + "mode": "r", + "required": true, + "type": "Path" + }, + "exome": { + "default": false, + "help": "The exome option when running manta and strelka2", + "required": false, + "type": "Bool" + }, + "filter_mutect_calls_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the FilterMutectCalls command", + "required": false, + "type": "String" + }, + "germline_resource_gnomad_vcf": { + "allow_empty": true, + "help": "", + "mode": "r", + "required": false, + "type": "Path" + }, + "input": { + "elements": { + "normal": { + "elements": { + "BAM": { + "help": "Absolute path to normal sample BAM files", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "Normal id/path input", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "elements": { + "BAM": { + "help": "Absolute path to tumor sample BAM files", + "mode": "r", + "required": true, + "type": "Path" + }, + "contamination_table": { + "help": "Absolute path to contamination.tables", + "mode": "r", + "required": false, + "type": "Path" + } + }, + "help": "Tumor id/path input", + "required": true, + "type": "BAMEntryList" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "intersect_regions": { + "help": "call regions bed file used by mutect2, strelka2 and intersect", + "mode": "r", + "required": true, + "type": "Path" + }, + "mutect2_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the Mutect2 command", + "required": false, + "type": "String" + }, + "ncbi_build": { + "help": "NCBI build version, e.g. GRCh38", + "required": true, + "type": "String" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient identifier", + "required": true, + "type": "String" + }, + "reference": { + "help": "Absolute path to reference directory", + "mode": "r", + "required": true, + "type": "Path" + }, + "save_intermediate_files": { + "default": false, + "help": "The option to save the intermediate files", + "required": false, + "type": "Bool" + }, + "scatter_count": { + "help": "", + "required": true, + "type": "Integer" + }, + "split_intervals_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the SplitIntervals command", + "required": false, + "type": "String" + }, + "tabix_extra_args": { + "allow_empty": true, + "default": "", + "help": "Additional arguments for tabix command", + "required": false, + "type": "String" + }, + "vcf2maf_extra_args": { + "allow_empty": true, + "help": "Additional arguments for the vcf2maf command", + "required": false, + "type": "String" + }, + "work_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": false, + "type": "Path" + } + } + } +} \ No newline at end of file diff --git a/run-nextflow-tests/entry.py b/run-nextflow-tests/entry.py index d18a4a0..22051d6 100755 --- a/run-nextflow-tests/entry.py +++ b/run-nextflow-tests/entry.py @@ -23,4 +23,4 @@ def run_pipeline_test(pipeline: Path, test_case: Path): parser.add_argument("test_path") args = parser.parse_args() - run_pipeline_test(Path(args.pipeline_path), Path(args.test_path)) + run_pipeline_test(Path(args.pipeline_path).resolve(), Path(args.test_path)) From f2c678d6de54177604a50222bd706fa7fb58b19a Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 26 Feb 2024 09:30:01 -0800 Subject: [PATCH 6/7] Add tests for pipeline-call-sSV --- run-nextflow-tests/call-sSV-F16.json | 467 +++++++++++++++++++++++++++ run-nextflow-tests/call-sSV-F32.json | 467 +++++++++++++++++++++++++++ 2 files changed, 934 insertions(+) create mode 100644 run-nextflow-tests/call-sSV-F16.json create mode 100644 run-nextflow-tests/call-sSV-F32.json diff --git a/run-nextflow-tests/call-sSV-F16.json b/run-nextflow-tests/call-sSV-F16.json new file mode 100644 index 0000000..7a510f8 --- /dev/null +++ b/run-nextflow-tests/call-sSV-F16.json @@ -0,0 +1,467 @@ +{ + "config": [ + "test/global.config", + "test/config/ssv-all-tools.config" + ], + "params_file": "test/yaml/ssv_test-std-input.yaml", + "cpus": 16, + "memory_gb": 31, + "empty_files": [], + "mapped_files": [], + "nf_params": { + "output_dir": "/tmp/outputs" + }, + "envvars": { + "SLURM_JOB_ID": "8543" + }, + "mocks": { + "check_path": "", + "parse_bam_header": { + "read_group": [ + { + "SM": "4915723" + } + ] + } + }, + "dated_fields": [ + "params.log_output_dir", + "report.file", + "timeline.file", + "trace.file", + "params.date" + ], + "expected_result": { + "docker": { + "all_group_ids": "$(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "enabled": true, + "runOptions": "-u $(id -u):$(id -g) $(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "uid_and_gid": "-u $(id -u):$(id -g)" + }, + "manifest": { + "author": "Yu Pan, Ghouse Mohammed, Mohammed Faizal Eeman Mootor", + "description": "A pipeline to call somatic SVs utilizing Delly and Manta", + "name": "call-sSV", + "version": "6.0.0" + }, + "node_cpus": "16", + "node_memory_GB": "31", + "params": { + "algorithm": [ + "delly", + "manta" + ], + "bcftools_version": "1.15.1", + "blcds_registered_dataset": false, + "cache_intermediate_pipeline_steps": false, + "dataset_id": "TWGSAMIN000001", + "dataset_registry_prefix": "/hot/data", + "date": "20240226T172732Z", + "delly_version": "1.2.6", + "docker_container_registry": "ghcr.io/uclahs-cds", + "docker_image_bcftools": "ghcr.io/uclahs-cds/bcftools:1.15.1", + "docker_image_delly": "ghcr.io/uclahs-cds/delly:1.2.6", + "docker_image_manta": "ghcr.io/uclahs-cds/manta:1.6.0", + "docker_image_validate": "ghcr.io/uclahs-cds/pipeval:4.0.0-rc.2", + "exclusion_file": "/hot/ref/tool-specific-input/Delly/hg38/human.hg38.excl.tsv", + "filter_condition": "FILTER=\\='PASS'", + "input": { + "BAM": { + "normal": [ + "/hot/software/pipeline/pipeline-call-sSV/Nextflow/development/input/data/TWGSAMIN000001-N003-S03-F.bam" + ], + "tumor": [ + "/hot/software/pipeline/pipeline-call-sSV/Nextflow/development/input/data/TWGSAMIN000001-T003-S03-F.bam" + ] + } + }, + "log_output_dir": "/tmp/outputs/call-sSV-6.0.0/4915723/log-call-sSV-6.0.0-20240226T172732Z", + "mad_cutoff": "15", + "manta_version": "1.6.0", + "map_qual": "20", + "max_cpus": "16", + "max_memory": "31 GB", + "min_clique_size": "5", + "min_cpus": "1", + "min_memory": "1 MB", + "output_dir": "/tmp/outputs", + "output_dir_base": "/tmp/outputs/call-sSV-6.0.0/4915723", + "pipeval_version": "4.0.0-rc.2", + "proc_resource_params": { + "call_sSV_Delly": { + "cpus": "1", + "memory": "16 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "call_sSV_Manta": { + "cpus": "1", + "memory": "16 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "filter_sSV_Delly": { + "cpus": "1", + "memory": "3 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "query_SampleName_BCFtools": { + "cpus": "1", + "memory": "3 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "reference_fasta": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "sample": "4915723", + "sample_id": "TWGSAMIN000001", + "samples_to_process": [ + { + "id": "4915723", + "path": "/hot/software/pipeline/pipeline-call-sSV/Nextflow/development/input/data/TWGSAMIN000001-N003-S03-F.bam", + "sample_type": "normal" + }, + "{id=4915723, path=/hot/software/pipeline/pipeline-call-sSV/Nextflow/development/input/data/TWGSAMIN000001-T003-S03-F.bam, sample_type=tumor}" + ], + "save_intermediate_files": false, + "ucla_cds": true, + "verbose": false, + "work_dir": "/scratch/8543" + }, + "params_schema": { + "algorithm": { + "choices": [ + "delly", + "manta" + ], + "default": [ + "delly", + "manta" + ], + "help": "List of available somatic SV callers", + "required": true, + "type": "List" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "exclusion_file": { + "help": "Absoulte path to an exclusion file", + "mode": "r", + "required": true, + "type": "Path" + }, + "filter_condition": { + "default": "FILTER=\\='PASS'", + "help": "", + "required": true, + "type": "String" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input normal BAMs", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAMs", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAMs for somatic structural variant calling", + "required": true, + "type": "InputBAMNamespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "mad_cutoff": { + "default": "15", + "help": "", + "required": true, + "type": "Integer" + }, + "map_qual": { + "default": "20", + "help": "", + "required": true, + "type": "Integer" + }, + "min_clique_size": { + "default": "5", + "help": "", + "required": true, + "type": "Integer" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "reference_fasta": { + "help": "Absolute path to a reference FASTA file", + "mode": "r", + "required": true, + "type": "Path" + }, + "sample_id": { + "help": "Sample ID", + "required": true, + "type": "String" + } + }, + "proc_names": "[Ljava.lang.String;@444ebefd", + "process": { + "cache": false, + "commonRetryCodes": [ + "104", + "134", + "137", + "139", + "143", + "247" + ], + "cpus": { + "1": "1", + "2": "2", + "3": "3", + "closure": "closure()" + }, + "echo": true, + "errorStrategy": { + "1": "terminate", + "2": "terminate", + "3": "terminate", + "closure": "terminate" + }, + "executor": "local", + "maxRetries": "1", + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + }, + "withLabel:process_high": { + "cpus": { + "1": "12", + "2": "16", + "3": "16", + "closure": "closure()" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "closure()" + } + }, + "withLabel:process_low": { + "cpus": { + "1": "2", + "2": "4", + "3": "6", + "closure": "closure()" + }, + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + } + }, + "withLabel:process_medium": { + "cpus": { + "1": "6", + "2": "12", + "3": "16", + "closure": "closure()" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "closure()" + } + }, + "withName:call_sSV_Delly": { + "cpus": "1", + "memory": { + "1": "16 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "retry_updater(16 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:call_sSV_Manta": { + "cpus": "1", + "memory": { + "1": "16 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "retry_updater(16 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:filter_sSV_Delly": { + "cpus": "1", + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "12 GB", + "closure": "retry_updater(3 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:query_SampleName_BCFtools": { + "cpus": "1", + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "12 GB", + "closure": "retry_updater(3 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "report": { + "enabled": true, + "file": "/tmp/outputs/call-sSV-6.0.0/4915723/log-call-sSV-6.0.0-20240226T172732Z/nextflow-log/report.html" + }, + "sample": [ + "4915723" + ], + "timeline": { + "enabled": true, + "file": "/tmp/outputs/call-sSV-6.0.0/4915723/log-call-sSV-6.0.0-20240226T172732Z/nextflow-log/timeline.html" + }, + "trace": { + "enabled": true, + "file": "/tmp/outputs/call-sSV-6.0.0/4915723/log-call-sSV-6.0.0-20240226T172732Z/nextflow-log/trace.txt" + }, + "tz": "sun.util.calendar.ZoneInfo[id=\"UTC\",offset=0,dstSavings=0,useDaylight=false,transitions=0,lastRule=null]", + "workDir": "/scratch/8543", + "yaml": { + "algorithm": { + "choices": [ + "delly", + "manta" + ], + "default": [ + "delly", + "manta" + ], + "help": "List of available somatic SV callers", + "required": true, + "type": "List" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "exclusion_file": { + "help": "Absoulte path to an exclusion file", + "mode": "r", + "required": true, + "type": "Path" + }, + "filter_condition": { + "default": "FILTER=\\='PASS'", + "help": "", + "required": true, + "type": "String" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input normal BAMs", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAMs", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAMs for somatic structural variant calling", + "required": true, + "type": "InputBAMNamespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "mad_cutoff": { + "default": "15", + "help": "", + "required": true, + "type": "Integer" + }, + "map_qual": { + "default": "20", + "help": "", + "required": true, + "type": "Integer" + }, + "min_clique_size": { + "default": "5", + "help": "", + "required": true, + "type": "Integer" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "reference_fasta": { + "help": "Absolute path to a reference FASTA file", + "mode": "r", + "required": true, + "type": "Path" + }, + "sample_id": { + "help": "Sample ID", + "required": true, + "type": "String" + } + } + } +} \ No newline at end of file diff --git a/run-nextflow-tests/call-sSV-F32.json b/run-nextflow-tests/call-sSV-F32.json new file mode 100644 index 0000000..976bd92 --- /dev/null +++ b/run-nextflow-tests/call-sSV-F32.json @@ -0,0 +1,467 @@ +{ + "config": [ + "test/global.config", + "test/config/ssv-all-tools.config" + ], + "params_file": "test/yaml/ssv_test-std-input.yaml", + "cpus": 32, + "memory_gb": 64, + "empty_files": [], + "mapped_files": [], + "nf_params": { + "output_dir": "/tmp/outputs" + }, + "envvars": { + "SLURM_JOB_ID": "8543" + }, + "mocks": { + "check_path": "", + "parse_bam_header": { + "read_group": [ + { + "SM": "4915723" + } + ] + } + }, + "dated_fields": [ + "params.log_output_dir", + "report.file", + "timeline.file", + "trace.file", + "params.date" + ], + "expected_result": { + "docker": { + "all_group_ids": "$(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "enabled": true, + "runOptions": "-u $(id -u):$(id -g) $(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "uid_and_gid": "-u $(id -u):$(id -g)" + }, + "manifest": { + "author": "Yu Pan, Ghouse Mohammed, Mohammed Faizal Eeman Mootor", + "description": "A pipeline to call somatic SVs utilizing Delly and Manta", + "name": "call-sSV", + "version": "6.0.0" + }, + "node_cpus": "32", + "node_memory_GB": "64", + "params": { + "algorithm": [ + "delly", + "manta" + ], + "bcftools_version": "1.15.1", + "blcds_registered_dataset": false, + "cache_intermediate_pipeline_steps": false, + "dataset_id": "TWGSAMIN000001", + "dataset_registry_prefix": "/hot/data", + "date": "20240226T172909Z", + "delly_version": "1.2.6", + "docker_container_registry": "ghcr.io/uclahs-cds", + "docker_image_bcftools": "ghcr.io/uclahs-cds/bcftools:1.15.1", + "docker_image_delly": "ghcr.io/uclahs-cds/delly:1.2.6", + "docker_image_manta": "ghcr.io/uclahs-cds/manta:1.6.0", + "docker_image_validate": "ghcr.io/uclahs-cds/pipeval:4.0.0-rc.2", + "exclusion_file": "/hot/ref/tool-specific-input/Delly/hg38/human.hg38.excl.tsv", + "filter_condition": "FILTER=\\='PASS'", + "input": { + "BAM": { + "normal": [ + "/hot/software/pipeline/pipeline-call-sSV/Nextflow/development/input/data/TWGSAMIN000001-N003-S03-F.bam" + ], + "tumor": [ + "/hot/software/pipeline/pipeline-call-sSV/Nextflow/development/input/data/TWGSAMIN000001-T003-S03-F.bam" + ] + } + }, + "log_output_dir": "/tmp/outputs/call-sSV-6.0.0/4915723/log-call-sSV-6.0.0-20240226T172909Z", + "mad_cutoff": "15", + "manta_version": "1.6.0", + "map_qual": "20", + "max_cpus": "32", + "max_memory": "64 GB", + "min_clique_size": "5", + "min_cpus": "1", + "min_memory": "1 MB", + "output_dir": "/tmp/outputs", + "output_dir_base": "/tmp/outputs/call-sSV-6.0.0/4915723", + "pipeval_version": "4.0.0-rc.2", + "proc_resource_params": { + "call_sSV_Delly": { + "cpus": "1", + "memory": "30 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "call_sSV_Manta": { + "cpus": "1", + "memory": "30 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "filter_sSV_Delly": { + "cpus": "1", + "memory": "30 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "query_SampleName_BCFtools": { + "cpus": "1", + "memory": "30 GB", + "retry_strategy": { + "memory": { + "operand": "2", + "strategy": "exponential" + } + } + }, + "run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "reference_fasta": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta", + "sample": "4915723", + "sample_id": "TWGSAMIN000001", + "samples_to_process": [ + { + "id": "4915723", + "path": "/hot/software/pipeline/pipeline-call-sSV/Nextflow/development/input/data/TWGSAMIN000001-N003-S03-F.bam", + "sample_type": "normal" + }, + "{id=4915723, path=/hot/software/pipeline/pipeline-call-sSV/Nextflow/development/input/data/TWGSAMIN000001-T003-S03-F.bam, sample_type=tumor}" + ], + "save_intermediate_files": false, + "ucla_cds": true, + "verbose": false, + "work_dir": "/scratch/8543" + }, + "params_schema": { + "algorithm": { + "choices": [ + "delly", + "manta" + ], + "default": [ + "delly", + "manta" + ], + "help": "List of available somatic SV callers", + "required": true, + "type": "List" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "exclusion_file": { + "help": "Absoulte path to an exclusion file", + "mode": "r", + "required": true, + "type": "Path" + }, + "filter_condition": { + "default": "FILTER=\\='PASS'", + "help": "", + "required": true, + "type": "String" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input normal BAMs", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAMs", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAMs for somatic structural variant calling", + "required": true, + "type": "InputBAMNamespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "mad_cutoff": { + "default": "15", + "help": "", + "required": true, + "type": "Integer" + }, + "map_qual": { + "default": "20", + "help": "", + "required": true, + "type": "Integer" + }, + "min_clique_size": { + "default": "5", + "help": "", + "required": true, + "type": "Integer" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "reference_fasta": { + "help": "Absolute path to a reference FASTA file", + "mode": "r", + "required": true, + "type": "Path" + }, + "sample_id": { + "help": "Sample ID", + "required": true, + "type": "String" + } + }, + "proc_names": "[Ljava.lang.String;@444ebefd", + "process": { + "cache": false, + "commonRetryCodes": [ + "104", + "134", + "137", + "139", + "143", + "247" + ], + "cpus": { + "1": "1", + "2": "2", + "3": "3", + "closure": "closure()" + }, + "echo": true, + "errorStrategy": { + "1": "terminate", + "2": "terminate", + "3": "terminate", + "closure": "terminate" + }, + "executor": "local", + "maxRetries": "1", + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + }, + "withLabel:process_high": { + "cpus": { + "1": "12", + "2": "24", + "3": "32", + "closure": "closure()" + }, + "memory": { + "1": "64 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "closure()" + } + }, + "withLabel:process_low": { + "cpus": { + "1": "2", + "2": "4", + "3": "6", + "closure": "closure()" + }, + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + } + }, + "withLabel:process_medium": { + "cpus": { + "1": "6", + "2": "12", + "3": "18", + "closure": "closure()" + }, + "memory": { + "1": "42 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "closure()" + } + }, + "withName:call_sSV_Delly": { + "cpus": "1", + "memory": { + "1": "30 GB", + "2": "60 GB", + "3": "64 GB", + "closure": "retry_updater(30 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:call_sSV_Manta": { + "cpus": "1", + "memory": { + "1": "30 GB", + "2": "60 GB", + "3": "64 GB", + "closure": "retry_updater(30 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:filter_sSV_Delly": { + "cpus": "1", + "memory": { + "1": "30 GB", + "2": "60 GB", + "3": "64 GB", + "closure": "retry_updater(30 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:query_SampleName_BCFtools": { + "cpus": "1", + "memory": { + "1": "30 GB", + "2": "60 GB", + "3": "64 GB", + "closure": "retry_updater(30 GB, exponential, 2, $task.attempt, memory)" + } + }, + "withName:run_validate_PipeVal": { + "cpus": "1", + "memory": "1 GB" + } + }, + "report": { + "enabled": true, + "file": "/tmp/outputs/call-sSV-6.0.0/4915723/log-call-sSV-6.0.0-20240226T172909Z/nextflow-log/report.html" + }, + "sample": [ + "4915723" + ], + "timeline": { + "enabled": true, + "file": "/tmp/outputs/call-sSV-6.0.0/4915723/log-call-sSV-6.0.0-20240226T172909Z/nextflow-log/timeline.html" + }, + "trace": { + "enabled": true, + "file": "/tmp/outputs/call-sSV-6.0.0/4915723/log-call-sSV-6.0.0-20240226T172909Z/nextflow-log/trace.txt" + }, + "tz": "sun.util.calendar.ZoneInfo[id=\"UTC\",offset=0,dstSavings=0,useDaylight=false,transitions=0,lastRule=null]", + "workDir": "/scratch/8543", + "yaml": { + "algorithm": { + "choices": [ + "delly", + "manta" + ], + "default": [ + "delly", + "manta" + ], + "help": "List of available somatic SV callers", + "required": true, + "type": "List" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "exclusion_file": { + "help": "Absoulte path to an exclusion file", + "mode": "r", + "required": true, + "type": "Path" + }, + "filter_condition": { + "default": "FILTER=\\='PASS'", + "help": "", + "required": true, + "type": "String" + }, + "input": { + "elements": { + "BAM": { + "elements": { + "normal": { + "help": "Input normal BAMs", + "required": false, + "type": "BAMEntryList" + }, + "tumor": { + "help": "Input tumor BAMs", + "required": false, + "type": "BAMEntryList" + } + }, + "help": "Input BAMs for somatic structural variant calling", + "required": true, + "type": "InputBAMNamespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputNamespace" + }, + "mad_cutoff": { + "default": "15", + "help": "", + "required": true, + "type": "Integer" + }, + "map_qual": { + "default": "20", + "help": "", + "required": true, + "type": "Integer" + }, + "min_clique_size": { + "default": "5", + "help": "", + "required": true, + "type": "Integer" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "reference_fasta": { + "help": "Absolute path to a reference FASTA file", + "mode": "r", + "required": true, + "type": "Path" + }, + "sample_id": { + "help": "Sample ID", + "required": true, + "type": "String" + } + } + } +} \ No newline at end of file From 6564eb0ff0d0e0a9c733949ec700b697d785f53f Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 26 Feb 2024 10:02:22 -0800 Subject: [PATCH 7/7] Add tests for call-mtSNV --- run-nextflow-tests/call-mtSNV-F16.json | 363 +++++++++++++++++++++++++ run-nextflow-tests/call-mtSNV-F32.json | 363 +++++++++++++++++++++++++ 2 files changed, 726 insertions(+) create mode 100644 run-nextflow-tests/call-mtSNV-F16.json create mode 100644 run-nextflow-tests/call-mtSNV-F32.json diff --git a/run-nextflow-tests/call-mtSNV-F16.json b/run-nextflow-tests/call-mtSNV-F16.json new file mode 100644 index 0000000..ca66e74 --- /dev/null +++ b/run-nextflow-tests/call-mtSNV-F16.json @@ -0,0 +1,363 @@ +{ + "config": [ + "test/global.config", + "test/test-paired-001/test.config" + ], + "params_file": "test/yaml/tcga/01-percent/paired.yaml", + "cpus": 16, + "memory_gb": 31, + "empty_files": [], + "mapped_files": [], + "nf_params": { + "output_dir": "/tmp/outputs" + }, + "envvars": { + "SLURM_JOB_ID": "8543" + }, + "mocks": { + "check_path": "", + "parse_bam_header": { + "read_group": [ + { + "SM": "4915723" + } + ] + } + }, + "dated_fields": [ + "params.log_output_dir", + "params.nextflow_log_dir", + "report.file", + "timeline.file", + "trace.file", + "params.date" + ], + "expected_result": { + "docker": { + "all_group_ids": "$(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "enabled": true, + "runOptions": "-u $(id -u):$(id -g) $(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "uid_and_gid": "-u $(id -u):$(id -g)" + }, + "manifest": { + "author": [ + "Alfredo Enrique Gonzalez", + "Takafumi Yamaguchi", + "Jieun Oh" + ], + "description": "Pipeline for calling mitochonrdial SNVs", + "name": "call-mtSNV", + "version": "3.0.2" + }, + "params": { + "BAMQL_docker_image": "ghcr.io/uclahs-cds/bamql:1.6.1", + "MToolBox_docker_image": "ghcr.io/uclahs-cds/mtoolbox:1.2.1-b52269e", + "bamql_version": "1.6.1", + "cache_intermediate_pipeline_steps": false, + "call_heteroplasmy_version": "1.0.1", + "dataset_id": "NFTEST-PAIRED-001", + "date": "20240226T180007Z", + "docker_container_registry": "ghcr.io/uclahs-cds", + "gmapdb": "/hot/ref/mitochondria_ref/gmapdb/gmapdb_2021-03-08", + "heteroplasmy_script_docker_image": "ghcr.io/uclahs-cds/call-heteroplasmy-script:1.0.1", + "input": { + "normal": { + "BAM": "/hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_normal_01prct.bam" + }, + "tumor": { + "BAM": "/hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_tumor_01prct.bam" + } + }, + "input_channel_list": [ + { + "BAM": "/hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_normal_01prct.bam", + "sample_ID": "4915723", + "sample_type": "normal" + }, + "{sample_type=tumor, sample_ID=4915723, BAM=/hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_tumor_01prct.bam}" + ], + "input_string": "normal: /hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_normal_01prct.bam\\n tumor: /hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_tumor_01prct.bam", + "log_output_dir": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180007Z//process-log/", + "max_cpus": "16", + "max_memory": "31 GB", + "mitoCaller2vcf_docker_image": "ghcr.io/uclahs-cds/mitocaller2vcf:1.0.0", + "mitoCaller2vcf_version": "1.0.0", + "mitocaller_docker_image": "ghcr.io/uclahs-cds/mitocaller:1.0.0", + "mitocaller_version": "1.0.0", + "mt_ref_genome_dir": "/hot/ref/mitochondria_ref/genome_fasta/", + "mtoolbox_version": "1.2.1-b52269e", + "nextflow_log_dir": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180007Z/", + "output_dir": "/tmp/outputs", + "output_dir_base": "/tmp/outputs/call-mtSNV-3.0.2/4915723/mitoCaller-1.0.0/", + "output_dir_prefix": "/tmp/outputs/call-mtSNV-3.0.2/4915723", + "patient_id": "NFTEST_01_PAIRED", + "pipeval_version": "4.0.0-rc.2", + "sample_id": "4915723", + "sample_mode": "paired", + "save_intermediate_files": false, + "ucla_cds": true, + "work_dir": "/scratch/8543" + }, + "params_schema": { + "cache_intermediate_pipeline_steps": { + "default": false, + "help": "Boolean value to indicate whether or not to cache intermediate pipeline steps", + "required": true, + "type": "Bool" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "gmapdb": { + "default": "/hot/ref/mitochondria_ref/gmapdb/gmapdb_2021-03-08", + "help": "Directory to gmapdb genomic index files", + "mode": "r", + "required": true, + "type": "Path" + }, + "input": { + "elements": { + "normal": { + "elements": { + "BAM": { + "help": "Absolute path to normal sample BAM", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "normal sample id and absolute path to BAM", + "required": false, + "type": "Namespace" + }, + "tumor": { + "elements": { + "BAM": { + "help": "Absolute path to tumor sample BAM", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "tumor sample id and absolute path to BAM", + "required": false, + "type": "Namespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputBAMNamespace" + }, + "mt_ref_genome_dir": { + "default": "/hot/ref/mitochondria_ref/genome_fasta/", + "help": "Absolute path to directory with mtDNA reference genome", + "mode": "r", + "required": true, + "type": "Path" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient identifier", + "required": true, + "type": "String" + }, + "save_intermediate_files": { + "default": false, + "help": "Boolean value to indicate whether or not to save intermediate files", + "required": true, + "type": "Bool" + } + }, + "process": { + "cache": true, + "cpus": { + "1": "1", + "2": "2", + "3": "3", + "closure": "closure()" + }, + "echo": true, + "errorStrategy": { + "1": "finish", + "2": "finish", + "3": "finish", + "closure": "finish" + }, + "executor": "local", + "maxRetries": "1", + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + }, + "withLabel:process_high": { + "cpus": { + "1": "16", + "2": "16", + "3": "16", + "closure": "closure()" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "closure()" + } + }, + "withLabel:process_low": { + "cpus": { + "1": "2", + "2": "4", + "3": "6", + "closure": "closure()" + }, + "memory": { + "1": "4 GB", + "2": "8 GB", + "3": "12 GB", + "closure": "closure()" + } + }, + "withLabel:process_medium": { + "cpus": { + "1": "6", + "2": "12", + "3": "16", + "closure": "closure()" + }, + "memory": { + "1": "31 GB", + "2": "31 GB", + "3": "31 GB", + "closure": "closure()" + } + }, + "withName:align_mtDNA_MToolBox": { + "cpus": "16", + "memory": "29 GB" + }, + "withName:call_heteroplasmy": { + "cpus": "1", + "memory": "2 GB" + }, + "withName:call_mtSNV_mitoCaller": { + "cpus": "1", + "memory": "2 GB" + }, + "withName:convert_mitoCaller2vcf_mitoCaller": { + "cpus": "13", + "memory": "16 GB" + }, + "withName:extract_mtDNA_BAMQL": { + "cpus": "1", + "memory": "4 GB" + }, + "withName:validate_input": { + "cpus": "1", + "memory": "2 GB" + }, + "withName:validate_output": { + "cpus": "1", + "memory": "2 GB" + } + }, + "report": { + "enabled": true, + "file": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180007Z//nextflow-log/report.html" + }, + "timeline": { + "enabled": true, + "file": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180007Z//nextflow-log/timeline.html" + }, + "trace": { + "enabled": true, + "file": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180007Z//nextflow-log/trace.txt" + }, + "workDir": "/scratch/8543", + "yaml": { + "cache_intermediate_pipeline_steps": { + "default": false, + "help": "Boolean value to indicate whether or not to cache intermediate pipeline steps", + "required": true, + "type": "Bool" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "gmapdb": { + "default": "/hot/ref/mitochondria_ref/gmapdb/gmapdb_2021-03-08", + "help": "Directory to gmapdb genomic index files", + "mode": "r", + "required": true, + "type": "Path" + }, + "input": { + "elements": { + "normal": { + "elements": { + "BAM": { + "help": "Absolute path to normal sample BAM", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "normal sample id and absolute path to BAM", + "required": false, + "type": "Namespace" + }, + "tumor": { + "elements": { + "BAM": { + "help": "Absolute path to tumor sample BAM", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "tumor sample id and absolute path to BAM", + "required": false, + "type": "Namespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputBAMNamespace" + }, + "mt_ref_genome_dir": { + "default": "/hot/ref/mitochondria_ref/genome_fasta/", + "help": "Absolute path to directory with mtDNA reference genome", + "mode": "r", + "required": true, + "type": "Path" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient identifier", + "required": true, + "type": "String" + }, + "save_intermediate_files": { + "default": false, + "help": "Boolean value to indicate whether or not to save intermediate files", + "required": true, + "type": "Bool" + } + } + } +} diff --git a/run-nextflow-tests/call-mtSNV-F32.json b/run-nextflow-tests/call-mtSNV-F32.json new file mode 100644 index 0000000..d76ca61 --- /dev/null +++ b/run-nextflow-tests/call-mtSNV-F32.json @@ -0,0 +1,363 @@ +{ + "config": [ + "test/global.config", + "test/test-paired-001/test.config" + ], + "params_file": "test/yaml/tcga/01-percent/paired.yaml", + "cpus": 32, + "memory_gb": 64, + "empty_files": [], + "mapped_files": [], + "nf_params": { + "output_dir": "/tmp/outputs" + }, + "envvars": { + "SLURM_JOB_ID": "8543" + }, + "mocks": { + "check_path": "", + "parse_bam_header": { + "read_group": [ + { + "SM": "4915723" + } + ] + } + }, + "dated_fields": [ + "params.log_output_dir", + "params.nextflow_log_dir", + "report.file", + "timeline.file", + "trace.file", + "params.date" + ], + "expected_result": { + "docker": { + "all_group_ids": "$(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "enabled": true, + "runOptions": "-u $(id -u):$(id -g) $(for i in `id --real --groups`; do echo -n \"--group-add=$i \"; done)", + "uid_and_gid": "-u $(id -u):$(id -g)" + }, + "manifest": { + "author": [ + "Alfredo Enrique Gonzalez", + "Takafumi Yamaguchi", + "Jieun Oh" + ], + "description": "Pipeline for calling mitochonrdial SNVs", + "name": "call-mtSNV", + "version": "3.0.2" + }, + "params": { + "BAMQL_docker_image": "ghcr.io/uclahs-cds/bamql:1.6.1", + "MToolBox_docker_image": "ghcr.io/uclahs-cds/mtoolbox:1.2.1-b52269e", + "bamql_version": "1.6.1", + "cache_intermediate_pipeline_steps": false, + "call_heteroplasmy_version": "1.0.1", + "dataset_id": "NFTEST-PAIRED-001", + "date": "20240226T180134Z", + "docker_container_registry": "ghcr.io/uclahs-cds", + "gmapdb": "/hot/ref/mitochondria_ref/gmapdb/gmapdb_2021-03-08", + "heteroplasmy_script_docker_image": "ghcr.io/uclahs-cds/call-heteroplasmy-script:1.0.1", + "input": { + "normal": { + "BAM": "/hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_normal_01prct.bam" + }, + "tumor": { + "BAM": "/hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_tumor_01prct.bam" + } + }, + "input_channel_list": [ + { + "BAM": "/hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_normal_01prct.bam", + "sample_ID": "4915723", + "sample_type": "normal" + }, + "{sample_type=tumor, sample_ID=4915723, BAM=/hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_tumor_01prct.bam}" + ], + "input_string": "normal: /hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_normal_01prct.bam\\n tumor: /hot/software/pipeline/pipeline-call-mtSNV/Nextflow/development/input/data/test-bams/tcga_test_file_tumor_01prct.bam", + "log_output_dir": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180134Z//process-log/", + "max_cpus": "32", + "max_memory": "64 GB", + "mitoCaller2vcf_docker_image": "ghcr.io/uclahs-cds/mitocaller2vcf:1.0.0", + "mitoCaller2vcf_version": "1.0.0", + "mitocaller_docker_image": "ghcr.io/uclahs-cds/mitocaller:1.0.0", + "mitocaller_version": "1.0.0", + "mt_ref_genome_dir": "/hot/ref/mitochondria_ref/genome_fasta/", + "mtoolbox_version": "1.2.1-b52269e", + "nextflow_log_dir": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180134Z/", + "output_dir": "/tmp/outputs", + "output_dir_base": "/tmp/outputs/call-mtSNV-3.0.2/4915723/mitoCaller-1.0.0/", + "output_dir_prefix": "/tmp/outputs/call-mtSNV-3.0.2/4915723", + "patient_id": "NFTEST_01_PAIRED", + "pipeval_version": "4.0.0-rc.2", + "sample_id": "4915723", + "sample_mode": "paired", + "save_intermediate_files": false, + "ucla_cds": true, + "work_dir": "/scratch/8543" + }, + "params_schema": { + "cache_intermediate_pipeline_steps": { + "default": false, + "help": "Boolean value to indicate whether or not to cache intermediate pipeline steps", + "required": true, + "type": "Bool" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "gmapdb": { + "default": "/hot/ref/mitochondria_ref/gmapdb/gmapdb_2021-03-08", + "help": "Directory to gmapdb genomic index files", + "mode": "r", + "required": true, + "type": "Path" + }, + "input": { + "elements": { + "normal": { + "elements": { + "BAM": { + "help": "Absolute path to normal sample BAM", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "normal sample id and absolute path to BAM", + "required": false, + "type": "Namespace" + }, + "tumor": { + "elements": { + "BAM": { + "help": "Absolute path to tumor sample BAM", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "tumor sample id and absolute path to BAM", + "required": false, + "type": "Namespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputBAMNamespace" + }, + "mt_ref_genome_dir": { + "default": "/hot/ref/mitochondria_ref/genome_fasta/", + "help": "Absolute path to directory with mtDNA reference genome", + "mode": "r", + "required": true, + "type": "Path" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient identifier", + "required": true, + "type": "String" + }, + "save_intermediate_files": { + "default": false, + "help": "Boolean value to indicate whether or not to save intermediate files", + "required": true, + "type": "Bool" + } + }, + "process": { + "cache": true, + "cpus": { + "1": "1", + "2": "2", + "3": "3", + "closure": "closure()" + }, + "echo": true, + "errorStrategy": { + "1": "finish", + "2": "finish", + "3": "finish", + "closure": "finish" + }, + "executor": "local", + "maxRetries": "1", + "memory": { + "1": "3 GB", + "2": "6 GB", + "3": "9 GB", + "closure": "closure()" + }, + "withLabel:process_high": { + "cpus": { + "1": "32", + "2": "32", + "3": "32", + "closure": "closure()" + }, + "memory": { + "1": "64 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "closure()" + } + }, + "withLabel:process_low": { + "cpus": { + "1": "2", + "2": "4", + "3": "6", + "closure": "closure()" + }, + "memory": { + "1": "4 GB", + "2": "8 GB", + "3": "12 GB", + "closure": "closure()" + } + }, + "withLabel:process_medium": { + "cpus": { + "1": "6", + "2": "12", + "3": "18", + "closure": "closure()" + }, + "memory": { + "1": "42 GB", + "2": "64 GB", + "3": "64 GB", + "closure": "closure()" + } + }, + "withName:align_mtDNA_MToolBox": { + "cpus": "32", + "memory": "60 GB" + }, + "withName:call_heteroplasmy": { + "cpus": "1", + "memory": "2 GB" + }, + "withName:call_mtSNV_mitoCaller": { + "cpus": "1", + "memory": "2 GB" + }, + "withName:convert_mitoCaller2vcf_mitoCaller": { + "cpus": "13", + "memory": "16 GB" + }, + "withName:extract_mtDNA_BAMQL": { + "cpus": "1", + "memory": "4 GB" + }, + "withName:validate_input": { + "cpus": "1", + "memory": "2 GB" + }, + "withName:validate_output": { + "cpus": "1", + "memory": "2 GB" + } + }, + "report": { + "enabled": true, + "file": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180134Z//nextflow-log/report.html" + }, + "timeline": { + "enabled": true, + "file": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180134Z//nextflow-log/timeline.html" + }, + "trace": { + "enabled": true, + "file": "/tmp/outputs/call-mtSNV-3.0.2/4915723/log-call-mtSNV-3.0.2-20240226T180134Z//nextflow-log/trace.txt" + }, + "workDir": "/scratch/8543", + "yaml": { + "cache_intermediate_pipeline_steps": { + "default": false, + "help": "Boolean value to indicate whether or not to cache intermediate pipeline steps", + "required": true, + "type": "Bool" + }, + "dataset_id": { + "help": "Dataset identifier", + "required": true, + "type": "String" + }, + "gmapdb": { + "default": "/hot/ref/mitochondria_ref/gmapdb/gmapdb_2021-03-08", + "help": "Directory to gmapdb genomic index files", + "mode": "r", + "required": true, + "type": "Path" + }, + "input": { + "elements": { + "normal": { + "elements": { + "BAM": { + "help": "Absolute path to normal sample BAM", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "normal sample id and absolute path to BAM", + "required": false, + "type": "Namespace" + }, + "tumor": { + "elements": { + "BAM": { + "help": "Absolute path to tumor sample BAM", + "mode": "r", + "required": true, + "type": "Path" + } + }, + "help": "tumor sample id and absolute path to BAM", + "required": false, + "type": "Namespace" + } + }, + "help": "Input samples", + "required": true, + "type": "InputBAMNamespace" + }, + "mt_ref_genome_dir": { + "default": "/hot/ref/mitochondria_ref/genome_fasta/", + "help": "Absolute path to directory with mtDNA reference genome", + "mode": "r", + "required": true, + "type": "Path" + }, + "output_dir": { + "help": "Absolute path to output directory", + "mode": "w", + "required": true, + "type": "Path" + }, + "patient_id": { + "help": "Patient identifier", + "required": true, + "type": "String" + }, + "save_intermediate_files": { + "default": false, + "help": "Boolean value to indicate whether or not to save intermediate files", + "required": true, + "type": "Bool" + } + } + } +} \ No newline at end of file