Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MRG: make sourmash plot labels/indices arguments make sense #2790

Merged
merged 12 commits into from
Oct 15, 2023
21 changes: 12 additions & 9 deletions doc/command-line.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,15 +259,18 @@ sourmash plot <matrix_file>
```

Options:
```
--pdf -- output PDF files.
--labels -- display the signature names (by default, the filenames) on the plot
--indices -- turn off index display on the plot.
--vmax -- maximum value (default 1.0) for heatmap.
--vmin -- minimum value (default 0.0) for heatmap.
--subsample=<N> -- plot a maximum of <N> samples, randomly chosen.
--subsample-seed=<seed> -- seed for pseudorandom number generator.
```
* `--pdf` -- output PDF files. (defaults to PNG)
* `--labels` -- display the signature names on the plot (default)
* `--indices` -- turn on index display on the plot.
* `--vmax` -- maximum value (default 1.0) for heatmap.
* `--vmin` -- minimum value (default 0.0) for heatmap.
* `--subsample=<N>` -- plot a maximum of <N> samples, randomly chosen.
* `--subsample-seed=<seed>` -- seed for pseudorandom number generator.

Example command lines for label and index display -

* `--indices` will show only numbers;
* `--no-labels --no-indices` will remove all labels!

Example output:

Expand Down
116 changes: 39 additions & 77 deletions doc/plotting-compare.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,70 +51,22 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"\u001b[K\r\n",
"== This is sourmash version 4.8.2. ==\r\n",
"\r",
"\u001b[K== Please cite Brown and Irber (2016), doi:10.21105/joss.00027. ==\r\n",
"\r\n",
"\r",
"\u001b[Kloading '../tests/test-data/demo/SRR2060939_1.sig'\r",
"\r",
"\u001b[KLoaded 1 sigs from '../tests/test-data/demo/SRR2060939_1.sig'\r",
"\r",
"\u001b[Kloading '../tests/test-data/demo/SRR2060939_2.sig'\r",
"\r",
"\u001b[K<<<igs so far. Now reading from file '../tests/test-data/demo/SRR2060939_2.sig'\r",
"\r",
"\u001b[KLoaded 1 sigs from '../tests/test-data/demo/SRR2060939_2.sig'\r",
"\r",
"\u001b[Kloading '../tests/test-data/demo/SRR2241509_1.sig'\r",
"\r",
"\u001b[K<<<igs so far. Now reading from file '../tests/test-data/demo/SRR2241509_1.sig'\r",
"\r",
"\u001b[KLoaded 1 sigs from '../tests/test-data/demo/SRR2241509_1.sig'\r",
"\r",
"\u001b[Kloading '../tests/test-data/demo/SRR2255622_1.sig'\r",
"\r",
"\u001b[K<<<igs so far. Now reading from file '../tests/test-data/demo/SRR2255622_1.sig'\r",
"\r",
"\u001b[KLoaded 1 sigs from '../tests/test-data/demo/SRR2255622_1.sig'\r",
"\r",
"\u001b[Kloading '../tests/test-data/demo/SRR453566_1.sig'\r",
"\r",
"\u001b[K<<<sigs so far. Now reading from file '../tests/test-data/demo/SRR453566_1.sig'\r",
"\r",
"\u001b[KLoaded 1 sigs from '../tests/test-data/demo/SRR453566_1.sig'\r",
"\r",
"\u001b[Kloading '../tests/test-data/demo/SRR453569_1.sig'\r",
"\r",
"\u001b[K<<<sigs so far. Now reading from file '../tests/test-data/demo/SRR453569_1.sig'\r",
"\r",
"\u001b[KLoaded 1 sigs from '../tests/test-data/demo/SRR453569_1.sig'\r",
"\r",
"\u001b[Kloading '../tests/test-data/demo/SRR453570_1.sig'\r",
"\r",
"\u001b[K<<<sigs so far. Now reading from file '../tests/test-data/demo/SRR453570_1.sig'\r",
"\r",
"\u001b[KLoaded 1 sigs from '../tests/test-data/demo/SRR453570_1.sig'\r",
"\r",
"\u001b[K \r",
"\r",
"\u001b[Kloaded 7 signatures total.\r\n",
"\r",
"\u001b[K\r\n",
"0-SRR2060939_1.fa...\t[1. 0.356 0.078 0.086 0. 0. 0. ]\r\n",
"1-SRR2060939_2.fa...\t[0.356 1. 0.072 0.078 0. 0. 0. ]\r\n",
"2-SRR2241509_1.fa...\t[0.078 0.072 1. 0.074 0. 0. 0. ]\r\n",
"3-SRR2255622_1.fa...\t[0.086 0.078 0.074 1. 0. 0. 0. ]\r\n",
"4-SRR453566_1.fas...\t[0. 0. 0. 0. 1. 0.382 0.364]\r\n",
"5-SRR453569_1.fas...\t[0. 0. 0. 0. 0.382 1. 0.386]\r\n",
"6-SRR453570_1.fas...\t[0. 0. 0. 0. 0.364 0.386 1. ]\r\n",
"min similarity in matrix: 0.000\r\n",
"\r",
"\u001b[Ksaving labels to: compare-demo.labels.txt\r\n",
"\r",
"\u001b[Ksaving comparison matrix to: compare-demo\r\n"
"\u001b[K\n",
"== This is sourmash version 4.8.5.dev0. ==\n",
"\u001b[K== Please cite Brown and Irber (2016), doi:10.21105/joss.00027. ==\n",
"\n",
"\u001b[Kloaded 7 signatures total. \n",
"\u001b[K\n",
"0-SRR2060939_1.fa...\t[1. 0.356 0.078 0.086 0. 0. 0. ]\n",
"1-SRR2060939_2.fa...\t[0.356 1. 0.072 0.078 0. 0. 0. ]\n",
"2-SRR2241509_1.fa...\t[0.078 0.072 1. 0.074 0. 0. 0. ]\n",
"3-SRR2255622_1.fa...\t[0.086 0.078 0.074 1. 0. 0. 0. ]\n",
"4-SRR453566_1.fas...\t[0. 0. 0. 0. 1. 0.382 0.364]\n",
"5-SRR453569_1.fas...\t[0. 0. 0. 0. 0.382 1. 0.386]\n",
"6-SRR453570_1.fas...\t[0. 0. 0. 0. 0.364 0.386 1. ]\n",
"min similarity in matrix: 0.000\n",
"\u001b[Ksaving labels to: compare-demo.labels.txt\n",
"\u001b[Ksaving comparison matrix to: compare-demo\n"
]
}
],
Expand Down Expand Up @@ -266,10 +218,15 @@
"source": [
"import scipy.cluster.hierarchy as sch\n",
"\n",
"def plot_composite_matrix(D, labeltext, show_labels=True, show_indices=True,\n",
"def plot_composite_matrix(D, labeltext, show_labels=True,\n",
" vmax=1.0, vmin=0.0, force=False):\n",
" \"\"\"Build a composite plot showing dendrogram + distance matrix/heatmap.\n",
" Returns a matplotlib figure.\"\"\"\n",
"\n",
" Returns a matplotlib figure.\n",
"\n",
" If show_labels is True, display labels. Otherwise, no labels are\n",
" shown on the plot.\n",
" \"\"\"\n",
" if D.max() > 1.0 or D.min() < 0.0:\n",
" error('This matrix doesn\\'t look like a distance matrix - min value {}, max value {}', D.min(), D.max())\n",
" if not force:\n",
Expand All @@ -288,12 +245,8 @@
" # plot dendrogram\n",
" Y = sch.linkage(D, method='single') # centroid\n",
"\n",
" dendrolabels = labeltext\n",
" if not show_labels:\n",
" dendrolabels = [str(i) for i in range(len(labeltext))]\n",
"\n",
" Z1 = sch.dendrogram(Y, orientation='left', labels=dendrolabels,\n",
" no_labels=not show_indices)\n",
" Z1 = sch.dendrogram(Y, orientation='left', labels=labeltext,\n",
" no_labels=not show_labels, get_leaves=True)\n",
" ax1.set_xticks([])\n",
"\n",
" xstart = 0.45\n",
Expand All @@ -302,15 +255,17 @@
" xstart = 0.315\n",
" scale_xstart = xstart + width + 0.01\n",
"\n",
" # plot matrix\n",
" axmatrix = fig.add_axes([xstart, 0.1, width, 0.6])\n",
"\n",
" # (this reorders D by the clustering in Z1)\n",
" # re-order labels along rows, top to bottom\n",
" idx1 = Z1['leaves']\n",
" reordered_labels = [ labeltext[i] for i in idx1 ]\n",
"\n",
" # reorder D by the clustering in the dendrogram\n",
" D = D[idx1, :]\n",
" D = D[:, idx1]\n",
"\n",
" # show matrix\n",
" axmatrix = fig.add_axes([xstart, 0.1, width, 0.6])\n",
"\n",
" im = axmatrix.matshow(D, aspect='auto', origin='lower',\n",
" cmap=pylab.cm.YlGnBu, vmin=vmin, vmax=vmax)\n",
" axmatrix.set_xticks([])\n",
Expand All @@ -320,7 +275,7 @@
" axcolor = fig.add_axes([scale_xstart, 0.1, 0.02, 0.6])\n",
" pylab.colorbar(im, cax=axcolor)\n",
"\n",
" return fig"
" return fig, reordered_labels, D"
]
},
{
Expand All @@ -342,6 +297,13 @@
"source": [
"_ = plot_composite_matrix(matrix, labels)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
16 changes: 12 additions & 4 deletions src/sourmash/cli/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,24 @@ def subparser(subparsers):
help='output PDF; default is PNG'
)
subparser.add_argument(
'--labels', action='store_true',
'--labels', action='store_true', default=None,
help='show sample labels on dendrogram/matrix'
)
subparser.add_argument(
'--no-labels', action='store_false', dest='labels',
help='do not show sample labels'
)
subparser.add_argument(
'--labeltext',
help='filename containing list of labels (overrides signature names)'
help='filename containing list of labels (overrides signature names); implies --labels'
)
subparser.add_argument(
'--indices', action='store_true', default=None,
help='show sample indices but not labels; overridden by --labels'
)
subparser.add_argument(
'--indices', action='store_false',
help='show sample indices but not labels'
'--no-indices', action='store_false', dest='indices',
help='do not show sample indices'
)
subparser.add_argument(
'--vmin', default=0.0, type=float,
Expand Down
64 changes: 40 additions & 24 deletions src/sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,39 +246,55 @@ def plot(args):

# load files
D_filename = args.distances
labelfilename = D_filename + '.labels.txt'

notify(f'loading comparison matrix from {D_filename}...')
D = numpy.load(open(D_filename, 'rb'))
# not sure how to change this to use f-strings
notify('...got {} x {} matrix.', *D.shape)

if args.labeltext:
labelfilename = args.labeltext
notify(f'loading labels from {labelfilename}')
labeltext = [ x.strip() for x in open(labelfilename) ]
if len(labeltext) != D.shape[0]:
error('{} labels != matrix size, exiting', len(labeltext))
sys.exit(-1)

# build filenames, decide on PDF/PNG output
dendrogram_out = os.path.basename(D_filename) + '.dendro'
if args.pdf:
dendrogram_out += '.pdf'
# see sourmash#2790 for details :)
if args.labeltext or args.labels:
display_labels = True
args.labels = True # override => labels always true
elif args.labels is None and not args.indices:
# default to labels
args.labels = True
display_labels = True
elif args.indices or (not args.labels and args.indices is None):
# turn on indices only, not label names
args.indices = True
display_labels = True
else:
dendrogram_out += '.png'
display_labels = False

matrix_out = os.path.basename(D_filename) + '.matrix'
if args.pdf:
matrix_out += '.pdf'
if args.labels:
if args.labeltext:
labelfilename = args.labeltext
else:
labelfilename = D_filename + '.labels.txt'

notify(f'loading labels from {labelfilename}')
labeltext = [ x.strip() for x in open(labelfilename) ]

if len(labeltext) != D.shape[0]:
error('{} labels != matrix size, exiting', len(labeltext))
sys.exit(-1)
elif args.indices:
# construct integer labels
labeltext = [str(i + 1) for i in range(D.shape[0])]
else:
matrix_out += '.png'
assert not display_labels
labeltext = [""] * D.shape[0]

hist_out = os.path.basename(D_filename) + '.hist'
if args.pdf:
hist_out += '.pdf'
ext = '.pdf'
else:
hist_out += '.png'
ext = '.png'

# build filenames, decide on PDF/PNG output
dendrogram_out = os.path.basename(D_filename) + '.dendro' + ext
matrix_out = os.path.basename(D_filename) + '.matrix' + ext
hist_out = os.path.basename(D_filename) + '.hist' + ext

# output to a different directory?
if args.output_dir:
Expand Down Expand Up @@ -314,14 +330,14 @@ def plot(args):

### do clustering
Y = sch.linkage(D, method='single')
sch.dendrogram(Y, orientation='right', labels=labeltext)
sch.dendrogram(Y, orientation='right', labels=labeltext,
no_labels=not display_labels)
fig.savefig(dendrogram_out)
notify(f'wrote dendrogram to: {dendrogram_out}')

### make the dendrogram+matrix:
(fig, rlabels, rmat) = sourmash_fig.plot_composite_matrix(D, labeltext,
show_labels=args.labels,
show_indices=args.indices,
show_labels=display_labels,
vmin=args.vmin,
vmax=args.vmax,
force=args.force)
Expand Down
18 changes: 9 additions & 9 deletions src/sourmash/fig.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /usr/bin/env python
"""
Make plots using the distance matrix+labels output by ``sourmash compare``.
Make plots using the distance matrix+labels output by `sourmash compare`.
"""
from .logging import error, notify
try:
Expand All @@ -20,11 +20,15 @@ def load_matrix_and_labels(basefile):
return (D, labeltext)


def plot_composite_matrix(D, labeltext, show_labels=True, show_indices=True,
def plot_composite_matrix(D, labeltext, show_labels=True,
vmax=1.0, vmin=0.0, force=False):
"""Build a composite plot showing dendrogram + distance matrix/heatmap.

Returns a matplotlib figure."""
Returns a matplotlib figure.

If show_labels is True, display labels. Otherwise, no labels are
shown on the plot.
"""
if D.max() > 1.0 or D.min() < 0.0:
error('This matrix doesn\'t look like a distance matrix - min value {}, max value {}', D.min(), D.max())
if not force:
Expand All @@ -43,12 +47,8 @@ def plot_composite_matrix(D, labeltext, show_labels=True, show_indices=True,
# plot dendrogram
Y = sch.linkage(D, method='single') # centroid

dendrolabels = labeltext
if not show_labels:
dendrolabels = [str(i) for i in range(len(labeltext))]

Z1 = sch.dendrogram(Y, orientation='left', labels=dendrolabels,
no_labels=not show_indices, get_leaves=True)
Z1 = sch.dendrogram(Y, orientation='left', labels=labeltext,
no_labels=not show_labels, get_leaves=True)
ax1.set_xticks([])

xstart = 0.45
Expand Down
Loading