From 0eb1211e18cf5b3c4420eb9033854be5addbb723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Gudy=C5=9B?= Date: Mon, 25 Nov 2024 14:58:53 +0100 Subject: [PATCH] Several updates * Remove --bin* options in vclust.py * Submodules updated to latest revisions. Co-authored-by: aziele --- .github/workflows/deploy.yml | 2 +- .github/workflows/main.yml | 12 ++++-- .github/workflows/self-hosted.yml | 2 +- 3rd_party/clusty | 2 +- 3rd_party/kmer-db | 2 +- 3rd_party/lz-ani | 2 +- 3rd_party/ref-utils | 2 +- README.md | 2 + makefile | 8 ++-- pyproject.toml | 2 +- vclust.py | 67 +++++++------------------------ 11 files changed, 36 insertions(+), 67 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index d865f7e..3f0d88a 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -52,7 +52,7 @@ jobs: steps: - name: make - run: make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true + run: gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true STATIC_LINK=true - name: tar artifacts run: | mkdir ${DIR} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3d56435..e7b073b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,18 +15,24 @@ jobs: strategy: fail-fast: false matrix: - machine: [ubuntu-latest, macOS-12] + machine: [ubuntu-latest] + gmake_install_command: ['gmake --version'] compiler: [12] + include: + - {machine: macOS-13, gmake_install_command: 'brew install make && gmake --version', compiler: 12} runs-on: ['${{ matrix.machine }}'] steps: - uses: actions/checkout@v4 with: submodules: recursive + + - name: install gmake + run: ${{ matrix.gmake_install_command }} - name: make run: | - make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} + gmake -j CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} STATIC_LINK=true - name: tar artifacts run: tar -cvf vclust.tar ./vclust.py ./test.py ./example ./bin/kmer-db ./bin/lz-ani ./bin/clusty ./bin/multi-fasta-split @@ -42,7 +48,7 @@ jobs: strategy: fail-fast: false matrix: - machine: [ubuntu-latest, macOS-12] + machine: [ubuntu-latest, macOS-13] runs-on: ['${{ matrix.machine }}'] diff --git a/.github/workflows/self-hosted.yml b/.github/workflows/self-hosted.yml index 58abdae..9daafb6 100644 --- a/.github/workflows/self-hosted.yml +++ b/.github/workflows/self-hosted.yml @@ -50,7 +50,7 @@ jobs: steps: - name: make - run: make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true + run: gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true STATIC_LINK=true - name: print info run: python3 vclust.py info diff --git a/3rd_party/clusty b/3rd_party/clusty index 7b109d4..6a5430d 160000 --- a/3rd_party/clusty +++ b/3rd_party/clusty @@ -1 +1 @@ -Subproject commit 7b109d42a4c603e26dead5b566d43c0506a858d7 +Subproject commit 6a5430ddf7adc5de7af20438daec61d179e3200b diff --git a/3rd_party/kmer-db b/3rd_party/kmer-db index 742b494..e98e257 160000 --- a/3rd_party/kmer-db +++ b/3rd_party/kmer-db @@ -1 +1 @@ -Subproject commit 742b4942b71271e8b0a1be63405e86b0d1f795ec +Subproject commit e98e257c955ae8795cbc3a512ebbae96e21d6bc9 diff --git a/3rd_party/lz-ani b/3rd_party/lz-ani index e3cc571..c898e1e 160000 --- a/3rd_party/lz-ani +++ b/3rd_party/lz-ani @@ -1 +1 @@ -Subproject commit e3cc571d973aedf634afd349c641dbb1328ea493 +Subproject commit c898e1e6a91dd90c3926fef583feb9ee5a04bb03 diff --git a/3rd_party/ref-utils b/3rd_party/ref-utils index 21d36c7..6f52a54 160000 --- a/3rd_party/ref-utils +++ b/3rd_party/ref-utils @@ -1 +1 @@ -Subproject commit 21d36c7c5a629e23446400d51cfd317c57ac5dc7 +Subproject commit 6f52a541fcf3c4f880c37eaa30cee9f58837729e diff --git a/README.md b/README.md index f61af0a..bbe3191 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ [![Build and tests](../../workflows/Build%20and%20tests/badge.svg)](../../actions/workflows/main.yml) [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) + +[![PyPI - Total Downloads](https://static.pepy.tech/personalized-badge/vclust?period=total&units=abbreviation&left_color=grey&right_color=green&left_text=PyPI%20total%20downloads)](https://www.pepy.tech/projects/vclust) [![PyPI - Downloads](https://img.shields.io/pypi/dm/vclust?label=PyPI%20downloads)](https://pypi.org/project/vclust/) [![GitHub downloads](https://img.shields.io/github/downloads/refresh-bio/vclust/total.svg?style=flag&label=GitHub%20downloads)](/~https://github.com/refresh-bio/vclust/releases) [![Bioconda downloads](https://img.shields.io/conda/dn/bioconda/vclust.svg?style=flag&label=Bioconda%20downloads)](https://anaconda.org/bioconda/vclust) diff --git a/makefile b/makefile index 8a205fd..47f4daa 100644 --- a/makefile +++ b/makefile @@ -14,10 +14,10 @@ prep: cd 3rd_party/clusty && $(MAKE) -j cd 3rd_party/ref-utils && $(MAKE) -j mkdir -p bin - cp 3rd_party/kmer-db/kmer-db ./bin/ - cp 3rd_party/lz-ani/lz-ani ./bin/ - cp 3rd_party/clusty/clusty ./bin/ - cp 3rd_party/ref-utils/multi-fasta-split/multi-fasta-split ./bin/ + cp 3rd_party/kmer-db/bin/kmer-db ./bin/ + cp 3rd_party/lz-ani/bin/lz-ani ./bin/ + cp 3rd_party/clusty/bin/clusty ./bin/ + cp 3rd_party/ref-utils/bin/multi-fasta-split ./bin/ clean: cd 3rd_party/kmer-db && $(MAKE) clean diff --git a/pyproject.toml b/pyproject.toml index 3fa8bdb..494fad5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ py-modules = ["vclust"] where = ["./"] [project] -name = "vclust-test" +name = "vclust" description = """Fast and accurate tool for calculating \ Average Nucleotide Identity (ANI) and clustering virus \ genomes and metagenomic contigs""" diff --git a/vclust.py b/vclust.py index 00c7269..b2dacc3 100755 --- a/vclust.py +++ b/vclust.py @@ -16,7 +16,7 @@ import typing import uuid -__version__ = '1.2.8' +__version__ = '1.2.9' DEFAULT_THREAD_COUNT = min(multiprocessing.cpu_count(), 64) @@ -95,7 +95,7 @@ def ranged_float_type(value): metavar='', type=input_path_type, dest='input_path', - help='Input FASTA file or directory with FASTA files', + help='Input FASTA file or directory of files (gzipped or uncompressed)', required=True ) prefilter_required.add_argument( @@ -119,16 +119,15 @@ def ranged_float_type(value): metavar="", type=int, default=20, - help='Filter genome pairs based on minimum number of shared k-mers ' - '[%(default)s]' + help='Minimum number of shared k-mers between two genomes [%(default)s]' ) prefilter_parser.add_argument( '--min-ident', metavar="", type=ranged_float_type, default=0.7, - help='Filter genome pairs based on minimum sequence identity of ' - 'the shorter sequence (0-1) [%(default)s]' + help='Minimum sequence identity (0-1) between two genomes. Calculated ' + 'based on the shorter sequence [%(default)s]' ) prefilter_parser.add_argument( '--batch-size', @@ -144,9 +143,9 @@ def ranged_float_type(value): metavar="", type=ranged_float_type, default=1.0, - help='Fraction of k-mers to analyze for each genome (0-1). A lower ' - 'value reduces RAM usage and speeds up processing (affects sensitivity) ' - '[%(default)s]' + help='Fraction of k-mers to analyze in each genome (0-1). A lower ' + 'value reduces RAM usage and speeds up processing. By default, all ' + 'k-mers [%(default)s]' ) prefilter_parser.add_argument( '--max-seqs', @@ -164,22 +163,6 @@ def ranged_float_type(value): action="store_true", help='Keep temporary Kmer-db files [%(default)s]' ) - prefilter_parser.add_argument( - '--bin', - metavar='', - type=pathlib.Path, - dest="bin_kmerdb", - default=f'{BIN_KMERDB}', - help='Path to the Kmer-db binary [%(default)s]' - ) - prefilter_parser.add_argument( - '--bin-fasta', - metavar='', - type=pathlib.Path, - dest="bin_fastasplit", - default=f'{BIN_FASTASPLIT}', - help='Path to the multi-fasta-split binary [%(default)s]' - ) prefilter_parser.add_argument( '-t', '--threads', metavar="", @@ -214,7 +197,7 @@ def ranged_float_type(value): metavar='', type=input_path_type, dest='input_path', - help='Input FASTA file or directory with FASTA files', + help='Input FASTA file or directory of files (gzipped or uncompressed)', required=True ) align_required.add_argument( @@ -298,14 +281,6 @@ def ranged_float_type(value): help='Min. reference coverage (aligned fraction) to output (0-1) ' '[%(default)s]' ) - align_parser.add_argument( - '--bin', - metavar='', - type=pathlib.Path, - dest='bin_lzani', - default=f'{BIN_LZANI}', - help='Path to the LZ-ANI binary [%(default)s]' - ) align_parser.add_argument( '--mal', metavar='', @@ -528,14 +503,6 @@ def ranged_float_type(value): default=2, help='Number of iterations for the Leiden algorithm [%(default)s]' ) - cluster_parser.add_argument( - '--bin', - metavar='', - type=pathlib.Path, - dest="bin_clusty", - default=f'{BIN_CLUSTY}', - help='Path to the Clusty binary [%(default)s]' - ) cluster_parser.add_argument( '-v', '--verbose', action="store_true", @@ -1215,7 +1182,7 @@ def vclust_info() -> None: output_lines.append(f'{RED}Status: error{RESET}') output_lines.extend(f" - {name}: {error}" for name, error in errors) else: - output_lines.append(f'{GREEN}Status: ok{RESET}') + output_lines.append(f'{GREEN}Status: ready{RESET}') # Output the complete information. print('\n'.join(output_lines)) @@ -1263,7 +1230,7 @@ def main(): vclust_info() # Prefilter elif args.command == 'prefilter': - args.bin_kmerdb = validate_binary(args.bin_kmerdb) + validate_binary(BIN_KMERDB) args = validate_args_prefilter(args, parser) args = validate_args_fasta_input(args, parser) @@ -1278,13 +1245,12 @@ def main(): else: # Split multi-fasta file. if args.batch_size: - args.bin_fastasplit = validate_binary(args.bin_fastasplit) + validate_binary(BIN_FASTASPLIT) cmd = cmd_fastasplit( input_fasta=args.input_path, out_dir=out_dir, n=args.batch_size, verbose=args.verbose, - bin_path=args.bin_fastasplit, ) p = run(cmd, args.verbose, logger) for f in out_dir.glob('part_*'): @@ -1311,7 +1277,6 @@ def main(): kmer_size=args.k, kmers_fraction=args.kmers_fraction, num_threads=args.num_threads, - bin_path=args.bin_kmerdb, ) p = run(cmd, args.verbose, logger) db_paths.append(db_path) @@ -1333,7 +1298,6 @@ def main(): min_ident=args.min_ident, max_seqs=args.max_seqs, num_threads=args.num_threads, - bin_path=args.bin_kmerdb, ) p = run(cmd, args.verbose, logger) @@ -1342,7 +1306,6 @@ def main(): outfile_distance=args.output_path, min_ident=args.min_ident, num_threads=args.num_threads, - bin_path=args.bin_kmerdb, ) p = run(cmd, args.verbose, logger) @@ -1353,7 +1316,7 @@ def main(): # Align elif args.command == 'align': - args.bin_lzani = validate_binary(args.bin_lzani) + validate_binary(BIN_LZANI) args = validate_args_fasta_input(args, parser) out_dir = args.output_path.parent / get_uuid() @@ -1386,7 +1349,6 @@ def main(): ar=args.ar, num_threads=args.num_threads, verbose=args.verbose, - bin_path=args.bin_lzani, ) p = run(cmd, args.verbose, logger) @@ -1396,7 +1358,7 @@ def main(): # Cluster elif args.command == 'cluster': - args.bin_clusty = validate_binary(args.bin_clusty) + validate_binary(BIN_CLUSTY) args = validate_args_cluster(args, parser) cmd = cmd_clusty( @@ -1416,7 +1378,6 @@ def main(): leiden_resolution=args.leiden_resolution, leiden_beta=args.leiden_beta, leiden_iterations=args.leiden_iterations, - bin_path=args.bin_clusty, ) p = run(cmd, args.verbose, logger)