This repository has been archived by the owner on Nov 17, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6.8k
/
Copy pathsparse.py
1637 lines (1432 loc) · 60.9 KB
/
sparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# coding: utf-8
# pylint: disable=wildcard-import, unused-wildcard-import, too-many-lines
"""Sparse NDArray API of MXNet."""
from __future__ import absolute_import
from __future__ import division
try:
from __builtin__ import slice as py_slice
from __builtin__ import sum as py_sum
except ImportError:
from builtins import slice as py_slice
from builtins import sum as py_sum
import ctypes
import warnings
import operator
from array import array as native_array
__all__ = ["_ndarray_cls", "csr_matrix", "row_sparse_array",
"BaseSparseNDArray", "CSRNDArray", "RowSparseNDArray",
"add", "subtract", "multiply", "divide"]
import numpy as np
from ..base import NotSupportedForSparseNDArray
from ..base import _LIB, numeric_types
from ..base import c_array_buf, mx_real_t, integer_types
from ..base import mx_uint, NDArrayHandle, check_call
from ..context import Context, current_context
from . import _internal
from . import op
try:
from .gen_sparse import retain as gs_retain # pylint: disable=redefined-builtin
except ImportError:
gs_retain = None
from ._internal import _set_ndarray_class
from .ndarray import NDArray, _storage_type, _DTYPE_NP_TO_MX, _DTYPE_MX_TO_NP
from .ndarray import _STORAGE_TYPE_STR_TO_ID, _STORAGE_TYPE_ROW_SPARSE, _STORAGE_TYPE_CSR
from .ndarray import _STORAGE_TYPE_UNDEFINED, _STORAGE_TYPE_DEFAULT
from .ndarray import zeros as _zeros_ndarray
from .ndarray import array as _array
from .ndarray import _ufunc_helper
try:
import scipy.sparse as spsp
except ImportError:
spsp = None
_STORAGE_AUX_TYPES = {
'row_sparse': [np.int64],
'csr': [np.int64, np.int64]
}
def _new_alloc_handle(stype, shape, ctx, delay_alloc, dtype, aux_types, aux_shapes=None):
"""Return a new handle with specified storage type, shape, dtype and context.
Empty handle is only used to hold results
Returns
-------
handle
A new empty ndarray handle
"""
hdl = NDArrayHandle()
for aux_t in aux_types:
if np.dtype(aux_t) != np.dtype("int64"):
raise NotImplementedError("only int64 is supported for aux types")
aux_type_ids = [int(_DTYPE_NP_TO_MX[np.dtype(aux_t).type]) for aux_t in aux_types]
aux_shapes = [(0,) for aux_t in aux_types] if aux_shapes is None else aux_shapes
aux_shape_lens = [len(aux_shape) for aux_shape in aux_shapes]
aux_shapes = py_sum(aux_shapes, ())
num_aux = mx_uint(len(aux_types))
check_call(_LIB.MXNDArrayCreateSparseEx(
ctypes.c_int(int(_STORAGE_TYPE_STR_TO_ID[stype])),
c_array_buf(mx_uint, native_array('I', shape)),
mx_uint(len(shape)),
ctypes.c_int(ctx.device_typeid),
ctypes.c_int(ctx.device_id),
ctypes.c_int(int(delay_alloc)),
ctypes.c_int(int(_DTYPE_NP_TO_MX[np.dtype(dtype).type])),
num_aux,
c_array_buf(ctypes.c_int, native_array('i', aux_type_ids)),
c_array_buf(mx_uint, native_array('I', aux_shape_lens)),
c_array_buf(mx_uint, native_array('I', aux_shapes)),
ctypes.byref(hdl)))
return hdl
class BaseSparseNDArray(NDArray):
"""The base class of an NDArray stored in a sparse storage format.
See CSRNDArray and RowSparseNDArray for more details.
"""
def __repr__(self):
"""Returns a string representation of the sparse array."""
shape_info = 'x'.join(['%d' % x for x in self.shape])
# The data content is not displayed since the array usually has big shape
return '\n<%s %s @%s>' % (self.__class__.__name__,
shape_info, self.context)
def __add__(self, other):
return add(self, other)
def __sub__(self, other):
return subtract(self, other)
def __mul__(self, other):
return multiply(self, other)
def __div__(self, other):
return divide(self, other)
def __iadd__(self, other):
raise NotImplementedError()
def __isub__(self, other):
raise NotImplementedError()
def __imul__(self, other):
raise NotImplementedError()
def __idiv__(self, other):
raise NotImplementedError()
def __itruediv__(self, other):
raise NotImplementedError()
def _sync_copyfrom(self, source_array):
raise NotImplementedError()
def _at(self, idx):
raise NotSupportedForSparseNDArray(self._at, '[idx]', idx)
def _slice(self, start, stop):
raise NotSupportedForSparseNDArray(self._slice, None, start, stop)
def reshape(self, *shape, **kwargs):
raise NotSupportedForSparseNDArray(self.reshape, None, shape)
@property
def size(self):
# the `size` for a sparse ndarray is ambiguous, hence disabled.
raise NotImplementedError()
def _aux_type(self, i):
"""Data-type of the array's ith aux data.
Returns
-------
numpy.dtype
This BaseSparseNDArray's aux data type.
"""
aux_type = ctypes.c_int()
check_call(_LIB.MXNDArrayGetAuxType(self.handle, i, ctypes.byref(aux_type)))
return _DTYPE_MX_TO_NP[aux_type.value]
@property
def _num_aux(self):
"""The number of aux data used to help store the sparse ndarray.
"""
return len(_STORAGE_AUX_TYPES[self.stype])
@property
def _aux_types(self):
"""The data types of the aux data for the BaseSparseNDArray.
"""
aux_types = []
num_aux = self._num_aux
for i in range(num_aux):
aux_types.append(self._aux_type(i))
return aux_types
def asnumpy(self):
"""Return a dense ``numpy.ndarray`` object with value copied from this array
"""
return self.tostype('default').asnumpy()
def astype(self, dtype, copy=True):
"""Return a copy of the array after casting to a specified type.
Parameters
----------
dtype : numpy.dtype or str
The type of the returned array.
copy : bool
Default `True`. By default, astype always returns a newly
allocated ndarray on the same context. If this is set to
`False`, and the dtype requested is the same as the ndarray's
dtype, the ndarray is returned instead of a copy.
Examples
--------
>>> x = mx.nd.sparse.zeros('row_sparse', (2,3), dtype='float32')
>>> y = x.astype('int32')
>>> y.dtype
<type 'numpy.int32'>
"""
if not copy and np.dtype(dtype) == self.dtype:
return self
res = zeros(shape=self.shape, ctx=self.context,
dtype=dtype, stype=self.stype)
self.copyto(res)
return res
def copyto(self, other):
"""Copies the value of this array to another array.
Parameters
----------
other : NDArray or CSRNDArray or RowSparseNDArray or Context
The destination array or context.
Returns
-------
NDArray or CSRNDArray or RowSparseNDArray
The copied array.
"""
# pylint: disable= no-member, protected-access
if isinstance(other, NDArray):
if other.handle is self.handle:
warnings.warn('You are attempting to copy an array to itself', RuntimeWarning)
return False
return _internal._copyto(self, out=other)
elif isinstance(other, Context):
hret = _ndarray_cls(_new_alloc_handle(self.stype, self.shape, other,
True, self.dtype, self._aux_types))
return _internal._copyto(self, out=hret)
else:
raise TypeError('copyto does not support type ' + str(type(other)))
# pylint: enable= no-member, protected-access
def check_format(self, full_check=True):
"""Check whether the NDArray format is valid.
Parameters
----------
full_check : bool, optional
If `True`, rigorous check, O(N) operations. Otherwise
basic check, O(1) operations (default True).
"""
check_call(_LIB.MXNDArraySyncCheckFormat(self.handle, ctypes.c_bool(full_check)))
def _data(self):
"""A deep copy NDArray of the data array associated with the BaseSparseNDArray.
This function blocks. Do not use it in performance critical code.
"""
self.wait_to_read()
hdl = NDArrayHandle()
check_call(_LIB.MXNDArrayGetDataNDArray(self.handle, ctypes.byref(hdl)))
return NDArray(hdl)
def _aux_data(self, i):
""" Get a deep copy NDArray of the i-th aux data array associated with the
BaseSparseNDArray.
This function blocks. Do not use it in performance critical code.
"""
self.wait_to_read()
hdl = NDArrayHandle()
check_call(_LIB.MXNDArrayGetAuxNDArray(self.handle, i, ctypes.byref(hdl)))
return NDArray(hdl)
# pylint: disable=abstract-method
class CSRNDArray(BaseSparseNDArray):
"""A sparse representation of 2D NDArray in the Compressed Sparse Row format.
A CSRNDArray represents an NDArray as three separate arrays: `data`,
`indptr` and `indices`. It uses the CSR representation where the column indices for
row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their corresponding values are stored
in ``data[indptr[i]:indptr[i+1]]``.
The column indices for a given row are expected to be sorted in ascending order.
Duplicate column entries for the same row are not allowed.
Example
-------
>>> a = mx.nd.array([[0, 1, 0], [2, 0, 0], [0, 0, 0], [0, 0, 3]])
>>> a = a.tostype('csr')
>>> a.data.asnumpy()
array([ 1., 2., 3.], dtype=float32)
>>> a.indices.asnumpy()
array([1, 0, 2])
>>> a.indptr.asnumpy()
array([0, 1, 2, 2, 3])
See Also
--------
csr_matrix: Several ways to construct a CSRNDArray
"""
def __reduce__(self):
return CSRNDArray, (None,), super(CSRNDArray, self).__getstate__()
def __iadd__(self, other):
(self + other).copyto(self)
return self
def __isub__(self, other):
(self - other).copyto(self)
return self
def __imul__(self, other):
(self * other).copyto(self)
return self
def __idiv__(self, other):
(self / other).copyto(self)
return self
def __itruediv__(self, other):
(self / other).copyto(self)
return self
def __getitem__(self, key):
"""x.__getitem__(i) <=> x[i]
Returns a newly created NDArray based on the indexing key.
Parameters
----------
key : int or mxnet.ndarray.NDArray.slice
Indexing key.
Examples
--------
>>> indptr = np.array([0, 2, 3, 6])
>>> indices = np.array([0, 2, 2, 0, 1, 2])
>>> data = np.array([1, 2, 3, 4, 5, 6])
>>> a = mx.nd.sparse.csr_matrix((data, indices, indptr), shape=(3, 3))
>>> a.asnumpy()
array([[ 1., 0., 2.],
[ 0., 0., 3.],
[ 4., 5., 6.]], dtype=float32)
>>> a[1:2].asnumpy()
array([[ 0., 0., 3.]], dtype=float32)
>>> a[1].asnumpy()
array([[ 0., 0., 3.]], dtype=float32)
>>> a[-1].asnumpy()
array([[ 4., 5., 6.]], dtype=float32)
"""
# pylint: disable= no-member, protected-access
if isinstance(key, int):
if key == -1:
begin = self.shape[0] - 1
else:
begin = key
return op.slice(self, begin=begin, end=begin+1)
if isinstance(key, py_slice):
if key.step is not None:
raise ValueError('CSRNDArray only supports continuous slicing on axis 0')
if key.start is not None or key.stop is not None:
begin = key.start if key.start else 0
end = key.stop if key.stop else self.shape[0]
return op.slice(self, begin=begin, end=end)
else:
return self
if isinstance(key, tuple):
raise ValueError('Multi-dimension indexing is not supported')
raise ValueError('Undefined behaviour for {}'.format(key))
# pylint: enable= no-member, protected-access
def __setitem__(self, key, value):
"""x.__setitem__(i, y) <=> x[i]=y
Set self[key] to value. Only slice key [:] is supported.
Parameters
----------
key : mxnet.ndarray.NDArray.slice
The indexing key.
value : NDArray or CSRNDArray or numpy.ndarray
The value to set.
Examples
--------
>>> src = mx.nd.sparse.zeros('csr', (3,3))
>>> src.asnumpy()
array([[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.]], dtype=float32)
>>> # assign CSRNDArray with same storage type
>>> x = mx.nd.ones((3,3)).tostype('csr')
>>> x[:] = src
>>> x.asnumpy()
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]], dtype=float32)
>>> # assign NDArray to CSRNDArray
>>> x[:] = mx.nd.ones((3,3)) * 2
>>> x.asnumpy()
array([[ 2., 2., 2.],
[ 2., 2., 2.],
[ 2., 2., 2.]], dtype=float32)
"""
if not self.writable:
raise ValueError('Failed to assign to a readonly CSRNDArray')
if isinstance(key, py_slice):
if key.step is not None or key.start is not None or key.stop is not None:
raise ValueError('Assignment with slice for CSRNDArray is not ' \
'implemented yet.')
if isinstance(value, NDArray):
# avoid copying to itself
if value.handle is not self.handle:
value.copyto(self)
elif isinstance(value, numeric_types):
raise ValueError("Assigning numeric types to CSRNDArray is " \
"not implemented yet.")
elif isinstance(value, (np.ndarray, np.generic)):
# TODO(haibin/anisub) check scipy.sparse and use _sync_copy_from to
# avoid the temporary copy
warnings.warn('Assigning non-NDArray object to CSRNDArray is not efficient',
RuntimeWarning)
tmp = _array(value)
tmp.copyto(self)
else:
raise TypeError('type %s not supported' % str(type(value)))
else:
assert(isinstance(key, (int, tuple)))
raise Exception('CSRNDArray only supports [:] for assignment')
@property
def indices(self):
"""A deep copy NDArray of the indices array of the CSRNDArray.
This generates a deep copy of the column indices of the current `csr` matrix.
Returns
-------
NDArray
This CSRNDArray's indices array.
"""
return self._aux_data(1)
@property
def indptr(self):
"""A deep copy NDArray of the indptr array of the CSRNDArray.
This generates a deep copy of the `indptr` of the current `csr` matrix.
Returns
-------
NDArray
This CSRNDArray's indptr array.
"""
return self._aux_data(0)
@property
def data(self):
"""A deep copy NDArray of the data array of the CSRNDArray.
This generates a deep copy of the `data` of the current `csr` matrix.
Returns
-------
NDArray
This CSRNDArray's data array.
"""
return self._data()
@indices.setter
def indices(self, indices):
raise NotImplementedError()
@indptr.setter
def indptr(self, indptr):
raise NotImplementedError()
@data.setter
def data(self, data):
raise NotImplementedError()
def tostype(self, stype):
"""Return a copy of the array with chosen storage type.
Returns
-------
NDArray or CSRNDArray
A copy of the array with the chosen storage stype
"""
# pylint: disable= no-member, protected-access
if stype == 'row_sparse':
raise ValueError("cast_storage from csr to row_sparse is not supported")
return op.cast_storage(self, stype=stype)
# pylint: enable= no-member, protected-access
def copyto(self, other):
"""Copies the value of this array to another array.
If ``other`` is a ``NDArray`` or ``CSRNDArray`` object, then ``other.shape`` and
``self.shape`` should be the same. This function copies the value from
``self`` to ``other``.
If ``other`` is a context, a new ``CSRNDArray`` will be first created on
the target context, and the value of ``self`` is copied.
Parameters
----------
other : NDArray or CSRNDArray or Context
The destination array or context.
Returns
-------
NDArray or CSRNDArray
The copied array. If ``other`` is an ``NDArray`` or ``CSRNDArray``, then the return
value and ``other`` will point to the same ``NDArray`` or ``CSRNDArray``.
"""
if isinstance(other, Context):
return super(CSRNDArray, self).copyto(other)
elif isinstance(other, NDArray):
stype = other.stype
if stype in ('default', 'csr'):
return super(CSRNDArray, self).copyto(other)
else:
raise TypeError('copyto does not support destination NDArray stype ' + str(stype))
else:
raise TypeError('copyto does not support type ' + str(type(other)))
def asscipy(self):
"""Returns a ``scipy.sparse.csr.csr_matrix`` object with value copied from this array
Examples
--------
>>> x = mx.nd.sparse.zeros('csr', (2,3))
>>> y = x.asscipy()
>>> type(y)
<type 'scipy.sparse.csr.csr_matrix'>
>>> y
<2x3 sparse matrix of type '<type 'numpy.float32'>'
with 0 stored elements in Compressed Sparse Row format>
"""
data = self.data.asnumpy()
indices = self.indices.asnumpy()
indptr = self.indptr.asnumpy()
if not spsp:
raise ImportError("scipy is not available. \
Please check if the scipy python bindings are installed.")
return spsp.csr_matrix((data, indices, indptr), shape=self.shape, dtype=self.dtype)
# pylint: disable=abstract-method
class RowSparseNDArray(BaseSparseNDArray):
"""A sparse representation of a set of NDArray row slices at given indices.
A RowSparseNDArray represents a multidimensional NDArray using two separate arrays: `data` and
`indices`. The number of dimensions has to be at least 2.
- data: an NDArray of any dtype with shape [D0, D1, ..., Dn].
- indices: a 1-D int64 NDArray with shape [D0] with values sorted in ascending order.
The `indices` stores the indices of the row slices with non-zeros,
while the values are stored in `data`. The corresponding NDArray ``dense``
represented by RowSparseNDArray ``rsp`` has
``dense[rsp.indices[i], :, :, :, ...] = rsp.data[i, :, :, :, ...]``
>>> dense.asnumpy()
array([[ 1., 2., 3.],
[ 0., 0., 0.],
[ 4., 0., 5.],
[ 0., 0., 0.],
[ 0., 0., 0.]], dtype=float32)
>>> rsp = dense.tostype('row_sparse')
>>> rsp.indices.asnumpy()
array([0, 2], dtype=int64)
>>> rsp.data.asnumpy()
array([[ 1., 2., 3.],
[ 4., 0., 5.]], dtype=float32)
A RowSparseNDArray is typically used to represent non-zero row slices of a large NDArray
of shape [LARGE0, D1, .. , Dn] where LARGE0 >> D0 and most row slices are zeros.
RowSparseNDArray is used principally in the definition of gradients for operations
that have sparse gradients (e.g. sparse dot and sparse embedding).
See Also
--------
row_sparse_array: Several ways to construct a RowSparseNDArray
"""
def __reduce__(self):
return RowSparseNDArray, (None,), super(RowSparseNDArray, self).__getstate__()
def __iadd__(self, other):
(self + other).copyto(self)
return self
def __isub__(self, other):
(self - other).copyto(self)
return self
def __imul__(self, other):
(self * other).copyto(self)
return self
def __idiv__(self, other):
(self / other).copyto(self)
return self
def __itruediv__(self, other):
(self / other).copyto(self)
return self
def __getitem__(self, key):
"""x.__getitem__(i) <=> x[i]
Returns a sliced view of this array.
Parameters
----------
key : mxnet.ndarray.NDArray.slice
Indexing key.
Examples
--------
>>> x = mx.nd.sparse.zeros('row_sparse', (2, 3))
>>> x[:].asnumpy()
array([[ 0., 0., 0.],
[ 0., 0., 0.]], dtype=float32)
"""
if isinstance(key, int):
raise Exception("__getitem__ with int key is not implemented for RowSparseNDArray yet")
if isinstance(key, py_slice):
if key.step is not None or key.start is not None or key.stop is not None:
raise Exception('RowSparseNDArray only supports [:] for __getitem__')
else:
return self
if isinstance(key, tuple):
raise ValueError('Multi-dimension indexing is not supported')
raise ValueError('Undefined behaviour for {}'.format(key))
def __setitem__(self, key, value):
"""x.__setitem__(i, y) <=> x[i]=y
Set self[key] to value. Only slice key [:] is supported.
Parameters
----------
key : mxnet.ndarray.NDArray.slice
The indexing key.
value : NDArray or numpy.ndarray
The value to set.
Examples
--------
>>> src = mx.nd.row_sparse([[1, 0, 2], [4, 5, 6]], [0, 2], (3,3))
>>> src.asnumpy()
array([[ 1., 0., 2.],
[ 0., 0., 0.],
[ 4., 5., 6.]], dtype=float32)
>>> # assign RowSparseNDArray with same storage type
>>> x = mx.nd.sparse.zeros('row_sparse', (3,3))
>>> x[:] = src
>>> x.asnumpy()
array([[ 1., 0., 2.],
[ 0., 0., 0.],
[ 4., 5., 6.]], dtype=float32)
>>> # assign NDArray to RowSparseNDArray
>>> x[:] = mx.nd.ones((3,3))
>>> x.asnumpy()
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]], dtype=float32)
"""
# pylint: disable= no-member, protected-access
if not self.writable:
raise ValueError('Failed to assign to a readonly RowSparseNDArray')
if isinstance(key, py_slice):
if key.step is not None or key.start is not None or key.stop is not None:
raise ValueError('Assignment with slice for RowSparseNDArray ' \
'is not implmented yet.')
if isinstance(value, NDArray):
# avoid copying to itself
if value.handle is not self.handle:
value.copyto(self)
elif isinstance(value, numeric_types):
_internal._set_value(float(value), out=self)
elif isinstance(value, (np.ndarray, np.generic)):
warnings.warn('Assigning non-NDArray object to RowSparseNDArray is not efficient',
RuntimeWarning)
tmp = _array(value)
tmp.copyto(self)
else:
raise TypeError('type %s not supported' % str(type(value)))
else:
assert(isinstance(key, (int, tuple)))
raise TypeError('RowSparseNDArray only supports [:] for assignment')
# pylint: enable= no-member, protected-access
@property
def indices(self):
"""A deep copy NDArray of the indices array of the RowSparseNDArray.
This generates a deep copy of the row indices of the current `row_sparse` matrix.
Returns
-------
NDArray
This RowSparseNDArray's indices array.
"""
return self._aux_data(0)
@property
def data(self):
"""A deep copy NDArray of the data array of the RowSparseNDArray.
This generates a deep copy of the `data` of the current `row_sparse` matrix.
Returns
-------
NDArray
This RowSparseNDArray's data array.
"""
return self._data()
@indices.setter
def indices(self, indices):
raise NotImplementedError()
@data.setter
def data(self, data):
raise NotImplementedError()
def tostype(self, stype):
"""Return a copy of the array with chosen storage type.
Returns
-------
NDArray or RowSparseNDArray
A copy of the array with the chosen storage stype
"""
# pylint: disable= no-member, protected-access
if stype == 'csr':
raise ValueError("cast_storage from row_sparse to csr is not supported")
return op.cast_storage(self, stype=stype)
# pylint: enable= no-member, protected-access
def copyto(self, other):
"""Copies the value of this array to another array.
If ``other`` is a ``NDArray`` or ``RowSparseNDArray`` object, then ``other.shape``
and ``self.shape`` should be the same. This function copies the value from
``self`` to ``other``.
If ``other`` is a context, a new ``RowSparseNDArray`` will be first created on
the target context, and the value of ``self`` is copied.
Parameters
----------
other : NDArray or RowSparseNDArray or Context
The destination array or context.
Returns
-------
NDArray or RowSparseNDArray
The copied array. If ``other`` is an ``NDArray`` or ``RowSparseNDArray``, then the
return value and ``other`` will point to the same ``NDArray`` or ``RowSparseNDArray``.
"""
if isinstance(other, Context):
return super(RowSparseNDArray, self).copyto(other)
elif isinstance(other, NDArray):
stype = other.stype
if stype in ('default', 'row_sparse'):
return super(RowSparseNDArray, self).copyto(other)
else:
raise TypeError('copyto does not support destination NDArray stype ' + str(stype))
else:
raise TypeError('copyto does not support type ' + str(type(other)))
def retain(self, *args, **kwargs):
"""Convenience fluent method for :py:func:`retain`.
The arguments are the same as for :py:func:`retain`, with
this array as data.
"""
if not gs_retain:
raise ImportError("gen_sparse could not be imported")
return gs_retain(*args, **kwargs)
def _prepare_src_array(source_array, dtype):
"""Prepare `source_array` so that it can be used to construct NDArray.
`source_array` is converted to a `np.ndarray` if it's neither an `NDArray` \
nor an `np.ndarray`.
"""
if not isinstance(source_array, NDArray) and not isinstance(source_array, np.ndarray):
try:
source_array = np.array(source_array, dtype=dtype)
except:
raise TypeError('values must be array like object')
return source_array
def _prepare_default_dtype(src_array, dtype):
"""Prepare the value of dtype if `dtype` is None. If `src_array` is an NDArray, numpy.ndarray
or scipy.sparse.csr.csr_matrix, return src_array.dtype. float32 is returned otherwise."""
if dtype is None:
if isinstance(src_array, (NDArray, np.ndarray)):
dtype = src_array.dtype
elif spsp and isinstance(src_array, spsp.csr.csr_matrix):
dtype = src_array.dtype
else:
dtype = mx_real_t
return dtype
def _check_shape(s1, s2):
"""check s1 == s2 if both are not None"""
if s1 and s2 and s1 != s2:
raise ValueError("Shape mismatch detected. " + str(s1) + " v.s. " + str(s2))
def csr_matrix(arg1, shape=None, ctx=None, dtype=None):
"""Creates a `CSRNDArray`, an 2D array with compressed sparse row (CSR) format.
The CSRNDArray can be instantiated in several ways:
- csr_matrix(D):
to construct a CSRNDArray with a dense 2D array ``D``
- **D** (*array_like*) - An object exposing the array interface, an object whose \
`__array__` method returns an array, or any (nested) sequence.
- **ctx** (*Context, optional*) - Device context \
(default is the current default context).
- **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \
The default dtype is ``D.dtype`` if ``D`` is an NDArray or numpy.ndarray, \
float32 otherwise.
- csr_matrix(S)
to construct a CSRNDArray with a sparse 2D array ``S``
- **S** (*CSRNDArray or scipy.sparse.csr.csr_matrix*) - A sparse matrix.
- **ctx** (*Context, optional*) - Device context \
(default is the current default context).
- **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \
The default dtype is ``S.dtype``.
- csr_matrix((M, N))
to construct an empty CSRNDArray with shape ``(M, N)``
- **M** (*int*) - Number of rows in the matrix
- **N** (*int*) - Number of columns in the matrix
- **ctx** (*Context, optional*) - Device context \
(default is the current default context).
- **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \
The default dtype is float32.
- csr_matrix((data, indices, indptr))
to construct a CSRNDArray based on the definition of compressed sparse row format \
using three separate arrays, \
where the column indices for row i are stored in ``indices[indptr[i]:indptr[i+1]]`` \
and their corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. \
The column indices for a given row are expected to be **sorted in ascending order.** \
Duplicate column entries for the same row are not allowed.
- **data** (*array_like*) - An object exposing the array interface, which \
holds all the non-zero entries of the matrix in row-major order.
- **indices** (*array_like*) - An object exposing the array interface, which \
stores the column index for each non-zero element in ``data``.
- **indptr** (*array_like*) - An object exposing the array interface, which \
stores the offset into ``data`` of the first non-zero element number of each \
row of the matrix.
- **shape** (*tuple of int, optional*) - The shape of the array. The default \
shape is inferred from the indices and indptr arrays.
- **ctx** (*Context, optional*) - Device context \
(default is the current default context).
- **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \
The default dtype is ``data.dtype`` if ``data`` is an NDArray or numpy.ndarray, \
float32 otherwise.
- csr_matrix((data, (row, col)))
to construct a CSRNDArray based on the COOrdinate format \
using three seperate arrays, \
where ``row[i]`` is the row index of the element, \
``col[i]`` is the column index of the element \
and ``data[i]`` is the data corresponding to the element. All the missing \
elements in the input are taken to be zeroes.
- **data** (*array_like*) - An object exposing the array interface, which \
holds all the non-zero entries of the matrix in COO format.
- **row** (*array_like*) - An object exposing the array interface, which \
stores the row index for each non zero element in ``data``.
- **col** (*array_like*) - An object exposing the array interface, which \
stores the col index for each non zero element in ``data``.
- **shape** (*tuple of int, optional*) - The shape of the array. The default \
shape is inferred from the ``row`` and ``col`` arrays.
- **ctx** (*Context, optional*) - Device context \
(default is the current default context).
- **dtype** (*str or numpy.dtype, optional*) - The data type of the output array. \
The default dtype is float32.
Parameters
----------
arg1: tuple of int, tuple of array_like, array_like, CSRNDArray, scipy.sparse.csr_matrix, \
scipy.sparse.coo_matrix, tuple of int or tuple of array_like
The argument to help instantiate the csr matrix. See above for further details.
shape : tuple of int, optional
The shape of the csr matrix.
ctx: Context, optional
Device context (default is the current default context).
dtype: str or numpy.dtype, optional
The data type of the output array.
Returns
-------
CSRNDArray
A `CSRNDArray` with the `csr` storage representation.
Example
-------
>>> a = mx.nd.sparse.csr_matrix(([1, 2, 3], [1, 0, 2], [0, 1, 2, 2, 3]), shape=(4, 3))
>>> a.asnumpy()
array([[ 0., 1., 0.],
[ 2., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 3.]], dtype=float32)
See Also
--------
CSRNDArray : MXNet NDArray in compressed sparse row format.
"""
# construct a csr matrix from (M, N) or (data, indices, indptr)
if isinstance(arg1, tuple):
arg_len = len(arg1)
if arg_len == 2:
# construct a sparse csr matrix from
# scipy coo matrix if input format is coo
if isinstance(arg1[1], tuple) and len(arg1[1]) == 2:
data, (row, col) = arg1
if isinstance(data, NDArray):
data = data.asnumpy()
if isinstance(row, NDArray):
row = row.asnumpy()
if isinstance(col, NDArray):
col = col.asnumpy()
coo = spsp.coo_matrix((data, (row, col)), shape=shape)
_check_shape(coo.shape, shape)
csr = coo.tocsr()
return array(csr, ctx=ctx, dtype=dtype)
else:
# empty matrix with shape
_check_shape(arg1, shape)
return empty('csr', arg1, ctx=ctx, dtype=dtype)
elif arg_len == 3:
# data, indices, indptr
return _csr_matrix_from_definition(arg1[0], arg1[1], arg1[2], shape=shape,
ctx=ctx, dtype=dtype)
else:
raise ValueError("Unexpected length of input tuple: " + str(arg_len))
else:
# construct a csr matrix from a sparse / dense one
if isinstance(arg1, CSRNDArray) or (spsp and isinstance(arg1, spsp.csr.csr_matrix)):
# construct a csr matrix from scipy or CSRNDArray
_check_shape(arg1.shape, shape)
return array(arg1, ctx=ctx, dtype=dtype)
elif isinstance(arg1, RowSparseNDArray):
raise ValueError("Unexpected input type: RowSparseNDArray")
else:
# construct a csr matrix from a dense one
# prepare default ctx and dtype since mx.nd.array doesn't use default values
# based on source_array
dtype = _prepare_default_dtype(arg1, dtype)
# create dns array with provided dtype. ctx is not passed since copy across
# ctx requires dtype to be the same
dns = _array(arg1, dtype=dtype)
if ctx is not None and dns.context != ctx:
dns = dns.as_in_context(ctx)
_check_shape(dns.shape, shape)
return dns.tostype('csr')
def _csr_matrix_from_definition(data, indices, indptr, shape=None, ctx=None,
dtype=None, indices_type=None, indptr_type=None):
"""Create a `CSRNDArray` based on data, indices and indptr"""
# pylint: disable= no-member, protected-access
storage_type = 'csr'
# context
ctx = current_context() if ctx is None else ctx
# types
dtype = _prepare_default_dtype(data, dtype)
indptr_type = _STORAGE_AUX_TYPES[storage_type][0] if indptr_type is None else indptr_type
indices_type = _STORAGE_AUX_TYPES[storage_type][1] if indices_type is None else indices_type
# prepare src array and types
data = _prepare_src_array(data, dtype)
indptr = _prepare_src_array(indptr, indptr_type)
indices = _prepare_src_array(indices, indices_type)
# TODO(junwu): Convert data, indptr, and indices to mxnet NDArrays
# if they are not for now. In the future, we should provide a c-api
# to accept np.ndarray types to copy from to result.data and aux_data
if not isinstance(data, NDArray):
data = _array(data, ctx, dtype)
if not isinstance(indptr, NDArray):
indptr = _array(indptr, ctx, indptr_type)