hub/venv/lib/python3.7/site-packages/scipy/sparse/dok.py

454 lines
15 KiB
Python
Raw Normal View History

"""Dictionary Of Keys based matrix"""
__docformat__ = "restructuredtext en"
__all__ = ['dok_matrix', 'isspmatrix_dok']
import itertools
import numpy as np
from .base import spmatrix, isspmatrix
from ._index import IndexMixin
from .sputils import (isdense, getdtype, isshape, isintlike, isscalarlike,
upcast, upcast_scalar, get_index_dtype, check_shape)
try:
from operator import isSequenceType as _is_sequence
except ImportError:
def _is_sequence(x):
return (hasattr(x, '__len__') or hasattr(x, '__next__')
or hasattr(x, 'next'))
class dok_matrix(spmatrix, IndexMixin, dict):
"""
Dictionary Of Keys based sparse matrix.
This is an efficient structure for constructing sparse
matrices incrementally.
This can be instantiated in several ways:
dok_matrix(D)
with a dense matrix, D
dok_matrix(S)
with a sparse matrix, S
dok_matrix((M,N), [dtype])
create the matrix with initial shape (M,N)
dtype is optional, defaulting to dtype='d'
Attributes
----------
dtype : dtype
Data type of the matrix
shape : 2-tuple
Shape of the matrix
ndim : int
Number of dimensions (this is always 2)
nnz
Number of nonzero elements
Notes
-----
Sparse matrices can be used in arithmetic operations: they support
addition, subtraction, multiplication, division, and matrix power.
Allows for efficient O(1) access of individual elements.
Duplicates are not allowed.
Can be efficiently converted to a coo_matrix once constructed.
Examples
--------
>>> import numpy as np
>>> from scipy.sparse import dok_matrix
>>> S = dok_matrix((5, 5), dtype=np.float32)
>>> for i in range(5):
... for j in range(5):
... S[i, j] = i + j # Update element
"""
format = 'dok'
def __init__(self, arg1, shape=None, dtype=None, copy=False):
dict.__init__(self)
spmatrix.__init__(self)
self.dtype = getdtype(dtype, default=float)
if isinstance(arg1, tuple) and isshape(arg1): # (M,N)
M, N = arg1
self._shape = check_shape((M, N))
elif isspmatrix(arg1): # Sparse ctor
if isspmatrix_dok(arg1) and copy:
arg1 = arg1.copy()
else:
arg1 = arg1.todok()
if dtype is not None:
2020-06-26 10:06:43 -04:00
arg1 = arg1.astype(dtype, copy=False)
dict.update(self, arg1)
self._shape = check_shape(arg1.shape)
self.dtype = arg1.dtype
else: # Dense ctor
try:
arg1 = np.asarray(arg1)
except Exception:
raise TypeError('Invalid input format.')
if len(arg1.shape) != 2:
raise TypeError('Expected rank <=2 dense array or matrix.')
from .coo import coo_matrix
d = coo_matrix(arg1, dtype=dtype).todok()
dict.update(self, d)
self._shape = check_shape(arg1.shape)
self.dtype = d.dtype
def update(self, val):
# Prevent direct usage of update
raise NotImplementedError("Direct modification to dok_matrix element "
"is not allowed.")
def _update(self, data):
"""An update method for dict data defined for direct access to
`dok_matrix` data. Main purpose is to be used for effcient conversion
from other spmatrix classes. Has no checking if `data` is valid."""
return dict.update(self, data)
def set_shape(self, shape):
new_matrix = self.reshape(shape, copy=False).asformat(self.format)
self.__dict__ = new_matrix.__dict__
dict.clear(self)
dict.update(self, new_matrix)
shape = property(fget=spmatrix.get_shape, fset=set_shape)
def getnnz(self, axis=None):
if axis is not None:
raise NotImplementedError("getnnz over an axis is not implemented "
"for DOK format.")
return dict.__len__(self)
def count_nonzero(self):
2020-06-26 10:06:43 -04:00
return sum(x != 0 for x in self.values())
getnnz.__doc__ = spmatrix.getnnz.__doc__
count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
def __len__(self):
return dict.__len__(self)
def get(self, key, default=0.):
"""This overrides the dict.get method, providing type checking
but otherwise equivalent functionality.
"""
try:
i, j = key
assert isintlike(i) and isintlike(j)
except (AssertionError, TypeError, ValueError):
raise IndexError('Index must be a pair of integers.')
if (i < 0 or i >= self.shape[0] or j < 0 or j >= self.shape[1]):
raise IndexError('Index out of bounds.')
return dict.get(self, key, default)
def _get_intXint(self, row, col):
return dict.get(self, (row, col), self.dtype.type(0))
def _get_intXslice(self, row, col):
return self._get_sliceXslice(slice(row, row+1), col)
def _get_sliceXint(self, row, col):
return self._get_sliceXslice(row, slice(col, col+1))
def _get_sliceXslice(self, row, col):
row_start, row_stop, row_step = row.indices(self.shape[0])
col_start, col_stop, col_step = col.indices(self.shape[1])
2020-06-26 10:06:43 -04:00
row_range = range(row_start, row_stop, row_step)
col_range = range(col_start, col_stop, col_step)
shape = (len(row_range), len(col_range))
# Switch paths only when advantageous
# (count the iterations in the loops, adjust for complexity)
if len(self) >= 2 * shape[0] * shape[1]:
# O(nr*nc) path: loop over <row x col>
return self._get_columnXarray(row_range, col_range)
# O(nnz) path: loop over entries of self
newdok = dok_matrix(shape, dtype=self.dtype)
2020-06-26 10:06:43 -04:00
for key in self.keys():
i, ri = divmod(int(key[0]) - row_start, row_step)
if ri != 0 or i < 0 or i >= shape[0]:
continue
j, rj = divmod(int(key[1]) - col_start, col_step)
if rj != 0 or j < 0 or j >= shape[1]:
continue
x = dict.__getitem__(self, key)
dict.__setitem__(newdok, (i, j), x)
return newdok
def _get_intXarray(self, row, col):
return self._get_columnXarray([row], col)
def _get_arrayXint(self, row, col):
return self._get_columnXarray(row, [col])
def _get_sliceXarray(self, row, col):
row = list(range(*row.indices(self.shape[0])))
return self._get_columnXarray(row, col)
def _get_arrayXslice(self, row, col):
col = list(range(*col.indices(self.shape[1])))
return self._get_columnXarray(row, col)
def _get_columnXarray(self, row, col):
# outer indexing
newdok = dok_matrix((len(row), len(col)), dtype=self.dtype)
for i, r in enumerate(row):
for j, c in enumerate(col):
v = dict.get(self, (r, c), 0)
if v:
dict.__setitem__(newdok, (i, j), v)
return newdok
def _get_arrayXarray(self, row, col):
# inner indexing
i, j = map(np.atleast_2d, np.broadcast_arrays(row, col))
newdok = dok_matrix(i.shape, dtype=self.dtype)
2020-06-26 10:06:43 -04:00
for key in itertools.product(range(i.shape[0]), range(i.shape[1])):
v = dict.get(self, (i[key], j[key]), 0)
if v:
dict.__setitem__(newdok, key, v)
return newdok
def _set_intXint(self, row, col, x):
key = (row, col)
if x:
dict.__setitem__(self, key, x)
elif dict.__contains__(self, key):
del self[key]
def _set_arrayXarray(self, row, col, x):
row = list(map(int, row.ravel()))
col = list(map(int, col.ravel()))
x = x.ravel()
2020-06-26 10:06:43 -04:00
dict.update(self, zip(zip(row, col), x))
for i in np.nonzero(x == 0)[0]:
key = (row[i], col[i])
if dict.__getitem__(self, key) == 0:
# may have been superseded by later update
del self[key]
def __add__(self, other):
if isscalarlike(other):
res_dtype = upcast_scalar(self.dtype, other)
new = dok_matrix(self.shape, dtype=res_dtype)
# Add this scalar to every element.
M, N = self.shape
2020-06-26 10:06:43 -04:00
for key in itertools.product(range(M), range(N)):
aij = dict.get(self, (key), 0) + other
if aij:
new[key] = aij
# new.dtype.char = self.dtype.char
elif isspmatrix_dok(other):
if other.shape != self.shape:
raise ValueError("Matrix dimensions are not equal.")
# We could alternatively set the dimensions to the largest of
# the two matrices to be summed. Would this be a good idea?
res_dtype = upcast(self.dtype, other.dtype)
new = dok_matrix(self.shape, dtype=res_dtype)
dict.update(new, self)
with np.errstate(over='ignore'):
dict.update(new,
2020-06-26 10:06:43 -04:00
((k, new[k] + other[k]) for k in other.keys()))
elif isspmatrix(other):
csc = self.tocsc()
new = csc + other
elif isdense(other):
new = self.todense() + other
else:
return NotImplemented
return new
def __radd__(self, other):
if isscalarlike(other):
new = dok_matrix(self.shape, dtype=self.dtype)
M, N = self.shape
2020-06-26 10:06:43 -04:00
for key in itertools.product(range(M), range(N)):
aij = dict.get(self, (key), 0) + other
if aij:
new[key] = aij
elif isspmatrix_dok(other):
if other.shape != self.shape:
raise ValueError("Matrix dimensions are not equal.")
new = dok_matrix(self.shape, dtype=self.dtype)
dict.update(new, self)
dict.update(new,
2020-06-26 10:06:43 -04:00
((k, self[k] + other[k]) for k in other.keys()))
elif isspmatrix(other):
csc = self.tocsc()
new = csc + other
elif isdense(other):
new = other + self.todense()
else:
return NotImplemented
return new
def __neg__(self):
if self.dtype.kind == 'b':
raise NotImplementedError('Negating a sparse boolean matrix is not'
' supported.')
new = dok_matrix(self.shape, dtype=self.dtype)
2020-06-26 10:06:43 -04:00
dict.update(new, ((k, -self[k]) for k in self.keys()))
return new
def _mul_scalar(self, other):
res_dtype = upcast_scalar(self.dtype, other)
# Multiply this scalar by every element.
new = dok_matrix(self.shape, dtype=res_dtype)
2020-06-26 10:06:43 -04:00
dict.update(new, ((k, v * other) for k, v in self.items()))
return new
def _mul_vector(self, other):
# matrix * vector
result = np.zeros(self.shape[0], dtype=upcast(self.dtype, other.dtype))
2020-06-26 10:06:43 -04:00
for (i, j), v in self.items():
result[i] += v * other[j]
return result
def _mul_multivector(self, other):
# matrix * multivector
result_shape = (self.shape[0], other.shape[1])
result_dtype = upcast(self.dtype, other.dtype)
result = np.zeros(result_shape, dtype=result_dtype)
2020-06-26 10:06:43 -04:00
for (i, j), v in self.items():
result[i,:] += v * other[j,:]
return result
def __imul__(self, other):
if isscalarlike(other):
2020-06-26 10:06:43 -04:00
dict.update(self, ((k, v * other) for k, v in self.items()))
return self
return NotImplemented
def __truediv__(self, other):
if isscalarlike(other):
res_dtype = upcast_scalar(self.dtype, other)
new = dok_matrix(self.shape, dtype=res_dtype)
2020-06-26 10:06:43 -04:00
dict.update(new, ((k, v / other) for k, v in self.items()))
return new
return self.tocsr() / other
def __itruediv__(self, other):
if isscalarlike(other):
2020-06-26 10:06:43 -04:00
dict.update(self, ((k, v / other) for k, v in self.items()))
return self
return NotImplemented
def __reduce__(self):
# this approach is necessary because __setstate__ is called after
# __setitem__ upon unpickling and since __init__ is not called there
# is no shape attribute hence it is not possible to unpickle it.
return dict.__reduce__(self)
# What should len(sparse) return? For consistency with dense matrices,
# perhaps it should be the number of rows? For now it returns the number
# of non-zeros.
def transpose(self, axes=None, copy=False):
if axes is not None:
raise ValueError("Sparse matrices do not support "
"an 'axes' parameter because swapping "
"dimensions is the only logical permutation.")
M, N = self.shape
new = dok_matrix((N, M), dtype=self.dtype, copy=copy)
dict.update(new, (((right, left), val)
2020-06-26 10:06:43 -04:00
for (left, right), val in self.items()))
return new
transpose.__doc__ = spmatrix.transpose.__doc__
def conjtransp(self):
"""Return the conjugate transpose."""
M, N = self.shape
new = dok_matrix((N, M), dtype=self.dtype)
dict.update(new, (((right, left), np.conj(val))
2020-06-26 10:06:43 -04:00
for (left, right), val in self.items()))
return new
def copy(self):
new = dok_matrix(self.shape, dtype=self.dtype)
dict.update(new, self)
return new
copy.__doc__ = spmatrix.copy.__doc__
def tocoo(self, copy=False):
from .coo import coo_matrix
if self.nnz == 0:
return coo_matrix(self.shape, dtype=self.dtype)
idx_dtype = get_index_dtype(maxval=max(self.shape))
2020-06-26 10:06:43 -04:00
data = np.fromiter(self.values(), dtype=self.dtype, count=self.nnz)
row = np.fromiter((i for i, _ in self.keys()), dtype=idx_dtype, count=self.nnz)
col = np.fromiter((j for _, j in self.keys()), dtype=idx_dtype, count=self.nnz)
A = coo_matrix((data, (row, col)), shape=self.shape, dtype=self.dtype)
A.has_canonical_format = True
return A
tocoo.__doc__ = spmatrix.tocoo.__doc__
def todok(self, copy=False):
if copy:
return self.copy()
return self
todok.__doc__ = spmatrix.todok.__doc__
def tocsc(self, copy=False):
return self.tocoo(copy=False).tocsc(copy=copy)
tocsc.__doc__ = spmatrix.tocsc.__doc__
def resize(self, *shape):
shape = check_shape(shape)
newM, newN = shape
M, N = self.shape
if newM < M or newN < N:
# Remove all elements outside new dimensions
2020-06-26 10:06:43 -04:00
for (i, j) in list(self.keys()):
if i >= newM or j >= newN:
del self[i, j]
self._shape = shape
resize.__doc__ = spmatrix.resize.__doc__
def isspmatrix_dok(x):
"""Is x of dok_matrix type?
Parameters
----------
x
object to check for being a dok matrix
Returns
-------
bool
True if x is a dok matrix, False otherwise
Examples
--------
>>> from scipy.sparse import dok_matrix, isspmatrix_dok
>>> isspmatrix_dok(dok_matrix([[5]]))
True
>>> from scipy.sparse import dok_matrix, csr_matrix, isspmatrix_dok
>>> isspmatrix_dok(csr_matrix([[5]]))
False
"""
return isinstance(x, dok_matrix)