Skip to content

Commit

Permalink
[MRG] better handle some pickfile errors (#1924)
Browse files Browse the repository at this point in the history
* better handle some pickfile errors

* Update tests/test_cmd_signature.py

Co-authored-by: Tessa Pierce Ward <bluegenes@users.noreply.github.com>

Co-authored-by: Tessa Pierce Ward <bluegenes@users.noreply.github.com>
  • Loading branch information
ctb and bluegenes authored Apr 6, 2022
1 parent a4d7e2c commit 1229dc1
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 5 deletions.
8 changes: 7 additions & 1 deletion src/sourmash/picklist.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"Picklist code for extracting subsets of signatures."
import csv
import os
from enum import Enum

# set up preprocessing functions for column stuff
Expand Down Expand Up @@ -143,18 +144,23 @@ def load(self, pickfile, column_name):
"load pickset, return num empty vals, and set of duplicate vals."
pickset = self.init()

if not os.path.exists(pickfile) or not os.path.isfile(pickfile):
raise ValueError(f"pickfile '{pickfile}' must exist and be a regular file")

n_empty_val = 0
dup_vals = set()
with open(pickfile, newline='') as csvfile:
x = csvfile.readline()

# skip leading comment line in case there's a manifest header
if x[0] == '#':
if not x or x[0] == '#':
pass
else:
csvfile.seek(0)

r = csv.DictReader(csvfile)
if not r.fieldnames:
raise ValueError(f"empty or improperly formatted pickfile '{pickfile}'")

if column_name not in r.fieldnames:
raise ValueError(f"column '{column_name}' not in pickfile '{pickfile}'")
Expand Down
8 changes: 4 additions & 4 deletions src/sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,15 @@ def load_picklist(args):
if args.picklist:
try:
picklist = SignaturePicklist.from_picklist_args(args.picklist)

notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")

n_empty_val, dup_vals = picklist.load(picklist.pickfile, picklist.column_name)
except ValueError as exc:
error("ERROR: could not load picklist.")
error(str(exc))
sys.exit(-1)

notify(f"picking column '{picklist.column_name}' of type '{picklist.coltype}' from '{picklist.pickfile}'")

n_empty_val, dup_vals = picklist.load(picklist.pickfile, picklist.column_name)

notify(f"loaded {len(picklist.pickset)} distinct values into picklist.")
if n_empty_val:
notify(f"WARNING: {n_empty_val} empty values in column '{picklist.column_name}' in picklist file")
Expand Down
39 changes: 39 additions & 0 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -1627,6 +1627,45 @@ def test_sig_extract_7_no_ksize(c):
assert len(siglist) == 3


def test_sig_extract_8_empty_picklist_fail(runtmp):
# what happens with an empty picklist?
sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')

# make empty picklist
picklist_csv = runtmp.output('pick.csv')
with open(picklist_csv, 'w', newline='') as csvfp:
pass

picklist_arg = f"{picklist_csv}:md5full:md5"

with pytest.raises(SourmashCommandFailed):
runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)

err = runtmp.last_result.err
print(err)

assert "empty or improperly formatted pickfile" in err


def test_sig_extract_8_nofile_picklist_fail(runtmp):
# what happens when picklist file does not exist?
sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')

# picklist file does not exist
picklist_csv = runtmp.output('pick.csv')
picklist_arg = f"{picklist_csv}:md5full:md5"

with pytest.raises(SourmashCommandFailed):
runtmp.sourmash('sig', 'extract', sig47, sig63, '--picklist', picklist_arg)

err = runtmp.last_result.err
print(err)

assert "must exist and be a regular file" in err


def test_sig_extract_8_picklist_md5(runtmp):
# extract 47 from 47, using a picklist w/full md5
sig47 = utils.get_test_data('47.fa.sig')
Expand Down

0 comments on commit 1229dc1

Please sign in to comment.