Skip to content

Commit

Permalink
Merge branch 'latest' into gz-tax
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes authored Oct 19, 2022
2 parents d8b2ce6 + 2b86b0d commit f699be2
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 22 deletions.
35 changes: 17 additions & 18 deletions src/sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,8 @@ def import_csv(args):
notify(f'loaded signature: {name} {s.md5sum()[:8]}')

notify(f'saving {len(siglist)} signatures to JSON')
with FileOutput(args.output, 'wt') as outfp:
sig.save_signatures(siglist, outfp)
with SaveSignaturesToLocation(args.output) as save_sig:
save_sig.add_many(siglist)


def sbt_combine(args):
Expand Down Expand Up @@ -902,8 +902,8 @@ def gather(args):
abund_query_mh = remaining_query.minhash.inflate(orig_query_mh)
remaining_query.minhash = abund_query_mh

with FileOutput(args.output_unassigned, 'wt') as fp:
sig.save_signatures([ remaining_query ], fp)
with SaveSignaturesToLocation(args.output_unassigned) as save_sig:
save_sig.add(remaining_query)

if picklist:
sourmash_args.report_picklist(args, picklist)
Expand Down Expand Up @@ -1077,10 +1077,9 @@ def multigather(args):
result.write(w)

output_matches = output_base + '.matches.sig'
with open(output_matches, 'wt') as fp:
outname = output_matches
notify(f'saving all matching signatures to "{outname}"')
sig.save_signatures([ r.match for r in found ], fp)
with SaveSignaturesToLocation(output_matches) as save_sig:
notify(f"saving all matching signatures to '{output_matches}'")
save_sig.add_many([ r.match for r in found ])

output_unassigned = output_base + '.unassigned.sig'
with open(output_unassigned, 'wt') as fp:
Expand All @@ -1101,8 +1100,9 @@ def multigather(args):
else:
notify(f'saving unassigned hashes to "{output_unassigned}"')

with SaveSignaturesToLocation(output_unassigned) as save_sig:
# CTB: note, multigather does not save abundances
sig.save_signatures([ remaining_query ], fp)
save_sig.add(remaining_query)
n += 1

# fini, next query!
Expand Down Expand Up @@ -1199,11 +1199,10 @@ def do_search():
similarity)

if args.output:
notify(f'saving signature to {args.output}')
with FileOutput(args.output, 'wt') as fp:
streamsig = sig.SourmashSignature(E, filename='stdin',
name=args.name)
sig.save_signatures([streamsig], fp)
notify(f"saving signature to '{args.output}'")
streamsig = sig.SourmashSignature(E, filename='stdin', name=args.name)
with SaveSignaturesToLocation(args.output) as save_sig:
save_sig.add(streamsig)


def migrate(args):
Expand Down Expand Up @@ -1392,8 +1391,8 @@ def prefetch(args):
ident_mh = ident_mh.inflate(orig_query_mh)

ss = sig.SourmashSignature(ident_mh, name=sig_name)
with open(filename, "wt") as fp:
sig.save_signatures([ss], fp)
with SaveSignaturesToLocation(filename) as save_sig:
save_sig.add(ss)

if args.save_unmatched_hashes:
filename = args.save_unmatched_hashes
Expand All @@ -1409,8 +1408,8 @@ def prefetch(args):
noident_mh = noident_mh.inflate(orig_query_mh)

ss = sig.SourmashSignature(noident_mh, name=sig_name)
with open(filename, "wt") as fp:
sig.save_signatures([ss], fp)
with SaveSignaturesToLocation(filename) as save_sig:
save_sig.add(ss)

if picklist:
sourmash_args.report_picklist(args, picklist)
Expand Down
75 changes: 71 additions & 4 deletions tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -4141,6 +4141,36 @@ def test_gather_metagenome_output_unassigned(runtmp):
'NC_011294.1' in runtmp.last_result.out))


def test_gather_metagenome_output_unassigned_as_zip(runtmp):
testdata_glob = utils.get_test_data('gather/GCF_000195995*g')
testdata_sigs = glob.glob(testdata_glob)[0]

query_sig = utils.get_test_data('gather/combined.sig')

runtmp.sourmash('gather', query_sig, testdata_sigs, '-k', '21', '--output-unassigned=unassigned.sig.zip')

print(runtmp.last_result.out)
print(runtmp.last_result.err)

assert 'found 1 matches total' in runtmp.last_result.out
assert 'the recovered matches hit 33.2% of the query' in runtmp.last_result.out
assert all(('4.9 Mbp 33.2% 100.0%' in runtmp.last_result.out,
'NC_003198.1 Salmonella enterica subsp' in runtmp.last_result.out))

assert zipfile.is_zipfile(runtmp.output('unassigned.sig.zip'))

# now examine unassigned
testdata2_glob = utils.get_test_data('gather/GCF_000009505.1*.sig')
testdata2_sigs = glob.glob(testdata2_glob)[0]

runtmp.sourmash('gather', 'unassigned.sig.zip', testdata_sigs, testdata2_sigs, '-k', '21')

print(runtmp.last_result.out)
print(runtmp.last_result.err)
assert all(('1.3 Mbp 13.6% 28.2%' in runtmp.last_result.out,
'NC_011294.1' in runtmp.last_result.out))


def test_gather_metagenome_output_unassigned_none(runtmp):
# test what happens when there's nothing unassigned to output
testdata_glob = utils.get_test_data('gather/GCF_*.sig')
Expand Down Expand Up @@ -5124,6 +5154,7 @@ def test_sbt_categorize_multiple_ksizes_moltypes(runtmp):


def test_watch_check_num_bounds_negative(runtmp):
# check that watch properly outputs error on negative num
c = runtmp
testdata0 = utils.get_test_data('genome-s10.fa.gz')
testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
Expand All @@ -5138,6 +5169,7 @@ def test_watch_check_num_bounds_negative(runtmp):


def test_watch_check_num_bounds_less_than_minimum(runtmp):
# check that watch properly outputs warnings on small num
c = runtmp
testdata0 = utils.get_test_data('genome-s10.fa.gz')
testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
Expand All @@ -5151,6 +5183,7 @@ def test_watch_check_num_bounds_less_than_minimum(runtmp):


def test_watch_check_num_bounds_more_than_maximum(runtmp):
# check that watch properly outputs warnings on large num
c = runtmp
testdata0 = utils.get_test_data('genome-s10.fa.gz')
testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
Expand All @@ -5163,8 +5196,9 @@ def test_watch_check_num_bounds_more_than_maximum(runtmp):
assert "WARNING: num value should be <= 50000. Continuing anyway." in c.last_result.err


@utils.in_tempdir
def test_watch(c):
def test_watch(runtmp):
# check basic watch functionality
c = runtmp
testdata0 = utils.get_test_data('genome-s10.fa.gz')
testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
shutil.copyfile(testdata1, c.output('1.sig'))
Expand All @@ -5178,8 +5212,9 @@ def test_watch(c):
assert 'FOUND: genome-s10, at 1.000' in c.last_result.out


@utils.in_tempdir
def test_watch_deduce_ksize(c):
def test_watch_deduce_ksize(runtmp):
# check that watch guesses ksize automatically from database
c = runtmp
testdata0 = utils.get_test_data('genome-s10.fa.gz')
c.run_sourmash('sketch','dna','-p','k=29,num=500', '-o', '1.sig', testdata0)

Expand All @@ -5194,6 +5229,7 @@ def test_watch_deduce_ksize(c):


def test_watch_coverage(runtmp):
# check output details/coverage of found
testdata0 = utils.get_test_data('genome-s10.fa.gz')
testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
shutil.copyfile(testdata1, runtmp.output('1.sig'))
Expand All @@ -5215,6 +5251,37 @@ def test_watch_coverage(runtmp):
assert 'FOUND: genome-s10, at 1.000' in runtmp.last_result.out


def test_watch_output_sig(runtmp):
# test watch --output
testdata0 = utils.get_test_data('genome-s10.fa.gz')
testdata1 = utils.get_test_data('genome-s10.fa.gz.sig')
shutil.copyfile(testdata1, runtmp.output('1.sig'))

args = ['index', '--dna', '-k', '21', 'zzz', '1.sig']
runtmp.sourmash(*args)

with open(runtmp.output('query.fa'), 'wt') as fp:
record = list(screed.open(testdata0))[0]
for start in range(0, len(record), 100):
fp.write('>{}\n{}\n'.format(start,
record.sequence[start:start+500]))

args = ['watch', '--ksize', '21', '--dna', 'zzz', 'query.fa',
'-o', 'out.sig', '--name', 'xyzfoo']
runtmp.sourmash(*args)

print(runtmp.last_result.out)
print(runtmp.last_result.err)

out_sig = runtmp.output('out.sig')
assert os.path.exists(out_sig)

siglist = list(sourmash.load_file_as_signatures(out_sig))
assert len(siglist) == 1
assert siglist[0].filename == 'stdin'
assert siglist[0].name == 'xyzfoo'


def test_storage_convert(runtmp):
testdata = utils.get_test_data('v2.sbt.json')
shutil.copyfile(testdata, runtmp.output('v2.sbt.json'))
Expand Down

0 comments on commit f699be2

Please sign in to comment.