Skip to content

Commit

Permalink
Several updates
Browse files Browse the repository at this point in the history
* Remove --bin* options in vclust.py
* Submodules updated to latest revisions.

Co-authored-by: aziele <a.zielezinski@gmail.com>
  • Loading branch information
agudys and aziele authored Nov 25, 2024
1 parent c5058fc commit 0eb1211
Show file tree
Hide file tree
Showing 11 changed files with 36 additions and 67 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:

steps:
- name: make
run: make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true
run: gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true STATIC_LINK=true
- name: tar artifacts
run: |
mkdir ${DIR}
Expand Down
12 changes: 9 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,24 @@ jobs:
strategy:
fail-fast: false
matrix:
machine: [ubuntu-latest, macOS-12]
machine: [ubuntu-latest]
gmake_install_command: ['gmake --version']
compiler: [12]
include:
- {machine: macOS-13, gmake_install_command: 'brew install make && gmake --version', compiler: 12}
runs-on: ['${{ matrix.machine }}']

steps:
- uses: actions/checkout@v4
with:
submodules: recursive

- name: install gmake
run: ${{ matrix.gmake_install_command }}

- name: make
run: |
make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}}
gmake -j CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} STATIC_LINK=true
- name: tar artifacts
run: tar -cvf vclust.tar ./vclust.py ./test.py ./example ./bin/kmer-db ./bin/lz-ani ./bin/clusty ./bin/multi-fasta-split

Expand All @@ -42,7 +48,7 @@ jobs:
strategy:
fail-fast: false
matrix:
machine: [ubuntu-latest, macOS-12]
machine: [ubuntu-latest, macOS-13]

runs-on: ['${{ matrix.machine }}']

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:

steps:
- name: make
run: make -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true
run: gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=${{ matrix.platform }} LEIDEN=true STATIC_LINK=true
- name: print info
run: python3 vclust.py info

Expand Down
2 changes: 1 addition & 1 deletion 3rd_party/clusty
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
[![Build and tests](../../workflows/Build%20and%20tests/badge.svg)](../../actions/workflows/main.yml)
[![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)


[![PyPI - Total Downloads](https://static.pepy.tech/personalized-badge/vclust?period=total&units=abbreviation&left_color=grey&right_color=green&left_text=PyPI%20total%20downloads)](https://www.pepy.tech/projects/vclust)
[![PyPI - Downloads](https://img.shields.io/pypi/dm/vclust?label=PyPI%20downloads)](https://pypi.org/project/vclust/)
[![GitHub downloads](https://img.shields.io/github/downloads/refresh-bio/vclust/total.svg?style=flag&label=GitHub%20downloads)](/~https://github.com/refresh-bio/vclust/releases)
[![Bioconda downloads](https://img.shields.io/conda/dn/bioconda/vclust.svg?style=flag&label=Bioconda%20downloads)](https://anaconda.org/bioconda/vclust)
Expand Down
8 changes: 4 additions & 4 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ prep:
cd 3rd_party/clusty && $(MAKE) -j
cd 3rd_party/ref-utils && $(MAKE) -j
mkdir -p bin
cp 3rd_party/kmer-db/kmer-db ./bin/
cp 3rd_party/lz-ani/lz-ani ./bin/
cp 3rd_party/clusty/clusty ./bin/
cp 3rd_party/ref-utils/multi-fasta-split/multi-fasta-split ./bin/
cp 3rd_party/kmer-db/bin/kmer-db ./bin/
cp 3rd_party/lz-ani/bin/lz-ani ./bin/
cp 3rd_party/clusty/bin/clusty ./bin/
cp 3rd_party/ref-utils/bin/multi-fasta-split ./bin/

clean:
cd 3rd_party/kmer-db && $(MAKE) clean
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ py-modules = ["vclust"]
where = ["./"]

[project]
name = "vclust-test"
name = "vclust"
description = """Fast and accurate tool for calculating \
Average Nucleotide Identity (ANI) and clustering virus \
genomes and metagenomic contigs"""
Expand Down
67 changes: 14 additions & 53 deletions vclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import typing
import uuid

__version__ = '1.2.8'
__version__ = '1.2.9'

DEFAULT_THREAD_COUNT = min(multiprocessing.cpu_count(), 64)

Expand Down Expand Up @@ -95,7 +95,7 @@ def ranged_float_type(value):
metavar='<file>',
type=input_path_type,
dest='input_path',
help='Input FASTA file or directory with FASTA files',
help='Input FASTA file or directory of files (gzipped or uncompressed)',
required=True
)
prefilter_required.add_argument(
Expand All @@ -119,16 +119,15 @@ def ranged_float_type(value):
metavar="<int>",
type=int,
default=20,
help='Filter genome pairs based on minimum number of shared k-mers '
'[%(default)s]'
help='Minimum number of shared k-mers between two genomes [%(default)s]'
)
prefilter_parser.add_argument(
'--min-ident',
metavar="<float>",
type=ranged_float_type,
default=0.7,
help='Filter genome pairs based on minimum sequence identity of '
'the shorter sequence (0-1) [%(default)s]'
help='Minimum sequence identity (0-1) between two genomes. Calculated '
'based on the shorter sequence [%(default)s]'
)
prefilter_parser.add_argument(
'--batch-size',
Expand All @@ -144,9 +143,9 @@ def ranged_float_type(value):
metavar="<float>",
type=ranged_float_type,
default=1.0,
help='Fraction of k-mers to analyze for each genome (0-1). A lower '
'value reduces RAM usage and speeds up processing (affects sensitivity) '
'[%(default)s]'
help='Fraction of k-mers to analyze in each genome (0-1). A lower '
'value reduces RAM usage and speeds up processing. By default, all '
'k-mers [%(default)s]'
)
prefilter_parser.add_argument(
'--max-seqs',
Expand All @@ -164,22 +163,6 @@ def ranged_float_type(value):
action="store_true",
help='Keep temporary Kmer-db files [%(default)s]'
)
prefilter_parser.add_argument(
'--bin',
metavar='<file>',
type=pathlib.Path,
dest="bin_kmerdb",
default=f'{BIN_KMERDB}',
help='Path to the Kmer-db binary [%(default)s]'
)
prefilter_parser.add_argument(
'--bin-fasta',
metavar='<file>',
type=pathlib.Path,
dest="bin_fastasplit",
default=f'{BIN_FASTASPLIT}',
help='Path to the multi-fasta-split binary [%(default)s]'
)
prefilter_parser.add_argument(
'-t', '--threads',
metavar="<int>",
Expand Down Expand Up @@ -214,7 +197,7 @@ def ranged_float_type(value):
metavar='<file>',
type=input_path_type,
dest='input_path',
help='Input FASTA file or directory with FASTA files',
help='Input FASTA file or directory of files (gzipped or uncompressed)',
required=True
)
align_required.add_argument(
Expand Down Expand Up @@ -298,14 +281,6 @@ def ranged_float_type(value):
help='Min. reference coverage (aligned fraction) to output (0-1) '
'[%(default)s]'
)
align_parser.add_argument(
'--bin',
metavar='<file>',
type=pathlib.Path,
dest='bin_lzani',
default=f'{BIN_LZANI}',
help='Path to the LZ-ANI binary [%(default)s]'
)
align_parser.add_argument(
'--mal',
metavar='<int>',
Expand Down Expand Up @@ -528,14 +503,6 @@ def ranged_float_type(value):
default=2,
help='Number of iterations for the Leiden algorithm [%(default)s]'
)
cluster_parser.add_argument(
'--bin',
metavar='<file>',
type=pathlib.Path,
dest="bin_clusty",
default=f'{BIN_CLUSTY}',
help='Path to the Clusty binary [%(default)s]'
)
cluster_parser.add_argument(
'-v', '--verbose',
action="store_true",
Expand Down Expand Up @@ -1215,7 +1182,7 @@ def vclust_info() -> None:
output_lines.append(f'{RED}Status: error{RESET}')
output_lines.extend(f" - {name}: {error}" for name, error in errors)
else:
output_lines.append(f'{GREEN}Status: ok{RESET}')
output_lines.append(f'{GREEN}Status: ready{RESET}')

# Output the complete information.
print('\n'.join(output_lines))
Expand Down Expand Up @@ -1263,7 +1230,7 @@ def main():
vclust_info()
# Prefilter
elif args.command == 'prefilter':
args.bin_kmerdb = validate_binary(args.bin_kmerdb)
validate_binary(BIN_KMERDB)
args = validate_args_prefilter(args, parser)
args = validate_args_fasta_input(args, parser)

Expand All @@ -1278,13 +1245,12 @@ def main():
else:
# Split multi-fasta file.
if args.batch_size:
args.bin_fastasplit = validate_binary(args.bin_fastasplit)
validate_binary(BIN_FASTASPLIT)
cmd = cmd_fastasplit(
input_fasta=args.input_path,
out_dir=out_dir,
n=args.batch_size,
verbose=args.verbose,
bin_path=args.bin_fastasplit,
)
p = run(cmd, args.verbose, logger)
for f in out_dir.glob('part_*'):
Expand All @@ -1311,7 +1277,6 @@ def main():
kmer_size=args.k,
kmers_fraction=args.kmers_fraction,
num_threads=args.num_threads,
bin_path=args.bin_kmerdb,
)
p = run(cmd, args.verbose, logger)
db_paths.append(db_path)
Expand All @@ -1333,7 +1298,6 @@ def main():
min_ident=args.min_ident,
max_seqs=args.max_seqs,
num_threads=args.num_threads,
bin_path=args.bin_kmerdb,
)
p = run(cmd, args.verbose, logger)

Expand All @@ -1342,7 +1306,6 @@ def main():
outfile_distance=args.output_path,
min_ident=args.min_ident,
num_threads=args.num_threads,
bin_path=args.bin_kmerdb,
)
p = run(cmd, args.verbose, logger)

Expand All @@ -1353,7 +1316,7 @@ def main():

# Align
elif args.command == 'align':
args.bin_lzani = validate_binary(args.bin_lzani)
validate_binary(BIN_LZANI)
args = validate_args_fasta_input(args, parser)

out_dir = args.output_path.parent / get_uuid()
Expand Down Expand Up @@ -1386,7 +1349,6 @@ def main():
ar=args.ar,
num_threads=args.num_threads,
verbose=args.verbose,
bin_path=args.bin_lzani,
)
p = run(cmd, args.verbose, logger)

Expand All @@ -1396,7 +1358,7 @@ def main():

# Cluster
elif args.command == 'cluster':
args.bin_clusty = validate_binary(args.bin_clusty)
validate_binary(BIN_CLUSTY)
args = validate_args_cluster(args, parser)

cmd = cmd_clusty(
Expand All @@ -1416,7 +1378,6 @@ def main():
leiden_resolution=args.leiden_resolution,
leiden_beta=args.leiden_beta,
leiden_iterations=args.leiden_iterations,
bin_path=args.bin_clusty,
)
p = run(cmd, args.verbose, logger)

Expand Down

0 comments on commit 0eb1211

Please sign in to comment.