torch/_lowrank.py - platform/external/pytorch - Git at Google

 """Implement various linear algebra algorithms for low rank matrices.
 """

 __all__ = ['svd_lowrank', 'pca_lowrank']

 from torch import Tensor
 import torch
 from . import _linalg_utils as _utils
 from .overrides import has_torch_function, handle_torch_function

 from typing import Optional, Tuple

 def get_approximate_basis(A: Tensor,
                           q: int,
                           niter: Optional[int] = 2,
                           M: Optional[Tensor] = None
                           ) -> Tensor:
     """Return tensor :math:`Q` with :math:`q` orthonormal columns such
     that :math:`Q Q^H A` approximates :math:`A`. If :math:`M` is
     specified, then :math:`Q` is such that :math:`Q Q^H (A - M)`
     approximates :math:`A - M`.

     .. note:: The implementation is based on the Algorithm 4.4 from
               Halko et al, 2009.

     .. note:: For an adequate approximation of a k-rank matrix
               :math:`A`, where k is not known in advance but could be
               estimated, the number of :math:`Q` columns, q, can be
               choosen according to the following criteria: in general,
               :math:`k <= q <= min(2*k, m, n)`. For large low-rank
               matrices, take :math:`q = k + 5..10`.  If k is
               relatively small compared to :math:`min(m, n)`, choosing
               :math:`q = k + 0..2` may be sufficient.

     .. note:: To obtain repeatable results, reset the seed for the
               pseudorandom number generator

     Args::
         A (Tensor): the input tensor of size :math:`(*, m, n)`

         q (int): the dimension of subspace spanned by :math:`Q`
                  columns.

         niter (int, optional): the number of subspace iterations to
                                conduct; ``niter`` must be a
                                nonnegative integer. In most cases, the
                                default value 2 is more than enough.

         M (Tensor, optional): the input tensor's mean of size
                               :math:`(*, 1, n)`.

     References::
         - Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
           structure with randomness: probabilistic algorithms for
           constructing approximate matrix decompositions,
           arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
           `arXiv <http://arxiv.org/abs/0909.4061>`_).
     """

     niter = 2 if niter is None else niter
     m, n = A.shape[-2:]
     dtype = _utils.get_floating_dtype(A)
     matmul = _utils.matmul

     R = torch.randn(n, q, dtype=dtype, device=A.device)

     # The following code could be made faster using torch.geqrf + torch.ormqr
     # but geqrf is not differentiable
     A_H = _utils.transjugate(A)
     if M is None:
         Q = torch.linalg.qr(matmul(A, R)).Q
         for i in range(niter):
             Q = torch.linalg.qr(matmul(A_H, Q)).Q
             Q = torch.linalg.qr(matmul(A, Q)).Q
     else:
         M_H = _utils.transjugate(M)
         Q = torch.linalg.qr(matmul(A, R) - matmul(M, R)).Q
         for i in range(niter):
             Q = torch.linalg.qr(matmul(A_H, Q) - matmul(M_H, Q)).Q
             Q = torch.linalg.qr(matmul(A, Q) - matmul(M, Q)).Q

     return Q


 def svd_lowrank(A: Tensor, q: Optional[int] = 6, niter: Optional[int] = 2,
                 M: Optional[Tensor] = None) -> Tuple[Tensor, Tensor, Tensor]:
     r"""Return the singular value decomposition ``(U, S, V)`` of a matrix,
     batches of matrices, or a sparse matrix :math:`A` such that
     :math:`A \approx U diag(S) V^T`. In case :math:`M` is given, then
     SVD is computed for the matrix :math:`A - M`.

     .. note:: The implementation is based on the Algorithm 5.1 from
               Halko et al, 2009.

     .. note:: To obtain repeatable results, reset the seed for the
               pseudorandom number generator

     .. note:: The input is assumed to be a low-rank matrix.

     .. note:: In general, use the full-rank SVD implementation
               :func:`torch.linalg.svd` for dense matrices due to its 10-fold
               higher performance characteristics. The low-rank SVD
               will be useful for huge sparse matrices that
               :func:`torch.linalg.svd` cannot handle.

     Args::
         A (Tensor): the input tensor of size :math:`(*, m, n)`

         q (int, optional): a slightly overestimated rank of A.

         niter (int, optional): the number of subspace iterations to
                                conduct; niter must be a nonnegative
                                integer, and defaults to 2

         M (Tensor, optional): the input tensor's mean of size
                               :math:`(*, 1, n)`.

     References::
         - Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
           structure with randomness: probabilistic algorithms for
           constructing approximate matrix decompositions,
           arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
           `arXiv <https://arxiv.org/abs/0909.4061>`_).

     """
     if not torch.jit.is_scripting():
         tensor_ops = (A, M)
         if (not set(map(type, tensor_ops)).issubset((torch.Tensor, type(None))) and has_torch_function(tensor_ops)):
             return handle_torch_function(svd_lowrank, tensor_ops, A, q=q, niter=niter, M=M)
     return _svd_lowrank(A, q=q, niter=niter, M=M)


 def _svd_lowrank(A: Tensor, q: Optional[int] = 6, niter: Optional[int] = 2,
                  M: Optional[Tensor] = None) -> Tuple[Tensor, Tensor, Tensor]:
     q = 6 if q is None else q
     m, n = A.shape[-2:]
     matmul = _utils.matmul
     if M is None:
         M_t = None
     else:
         M_t = _utils.transpose(M)
     A_t = _utils.transpose(A)

     # Algorithm 5.1 in Halko et al 2009, slightly modified to reduce
     # the number conjugate and transpose operations
     if m < n or n > q:
         # computing the SVD approximation of a transpose in
         # order to keep B shape minimal (the m < n case) or the V
         # shape small (the n > q case)
         Q = get_approximate_basis(A_t, q, niter=niter, M=M_t)
         Q_c = _utils.conjugate(Q)
         if M is None:
             B_t = matmul(A, Q_c)
         else:
             B_t = matmul(A, Q_c) - matmul(M, Q_c)
         assert B_t.shape[-2] == m, (B_t.shape, m)
         assert B_t.shape[-1] == q, (B_t.shape, q)
         assert B_t.shape[-1] <= B_t.shape[-2], B_t.shape
         U, S, Vh = torch.linalg.svd(B_t, full_matrices=False)
         V = Vh.mH
         V = Q.matmul(V)
     else:
         Q = get_approximate_basis(A, q, niter=niter, M=M)
         Q_c = _utils.conjugate(Q)
         if M is None:
             B = matmul(A_t, Q_c)
         else:
             B = matmul(A_t, Q_c) - matmul(M_t, Q_c)
         B_t = _utils.transpose(B)
         assert B_t.shape[-2] == q, (B_t.shape, q)
         assert B_t.shape[-1] == n, (B_t.shape, n)
         assert B_t.shape[-1] <= B_t.shape[-2], B_t.shape
         U, S, Vh = torch.linalg.svd(B_t, full_matrices=False)
         V = Vh.mH
         U = Q.matmul(U)

     return U, S, V


 def pca_lowrank(A: Tensor, q: Optional[int] = None, center: bool = True,
                 niter: int = 2) -> Tuple[Tensor, Tensor, Tensor]:
     r"""Performs linear Principal Component Analysis (PCA) on a low-rank
     matrix, batches of such matrices, or sparse matrix.

     This function returns a namedtuple ``(U, S, V)`` which is the
     nearly optimal approximation of a singular value decomposition of
     a centered matrix :math:`A` such that :math:`A = U diag(S) V^T`.

     .. note:: The relation of ``(U, S, V)`` to PCA is as follows:

                 - :math:`A` is a data matrix with ``m`` samples and
                   ``n`` features

                 - the :math:`V` columns represent the principal directions

                 - :math:`S ** 2 / (m - 1)` contains the eigenvalues of
                   :math:`A^T A / (m - 1)` which is the covariance of
                   ``A`` when ``center=True`` is provided.

                 - ``matmul(A, V[:, :k])`` projects data to the first k
                   principal components

     .. note:: Different from the standard SVD, the size of returned
               matrices depend on the specified rank and q
               values as follows:

                 - :math:`U` is m x q matrix

                 - :math:`S` is q-vector

                 - :math:`V` is n x q matrix

     .. note:: To obtain repeatable results, reset the seed for the
               pseudorandom number generator

     Args:

         A (Tensor): the input tensor of size :math:`(*, m, n)`

         q (int, optional): a slightly overestimated rank of
                            :math:`A`. By default, ``q = min(6, m,
                            n)``.

         center (bool, optional): if True, center the input tensor,
                                  otherwise, assume that the input is
                                  centered.

         niter (int, optional): the number of subspace iterations to
                                conduct; niter must be a nonnegative
                                integer, and defaults to 2.

     References::

         - Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
           structure with randomness: probabilistic algorithms for
           constructing approximate matrix decompositions,
           arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
           `arXiv <http://arxiv.org/abs/0909.4061>`_).

     """

     if not torch.jit.is_scripting():
         if type(A) is not torch.Tensor and has_torch_function((A,)):
             return handle_torch_function(pca_lowrank, (A,), A, q=q, center=center, niter=niter)

     (m, n) = A.shape[-2:]

     if q is None:
         q = min(6, m, n)
     elif not (q >= 0 and q <= min(m, n)):
         raise ValueError('q(={}) must be non-negative integer'
                          ' and not greater than min(m, n)={}'
                          .format(q, min(m, n)))
     if not (niter >= 0):
         raise ValueError('niter(={}) must be non-negative integer'
                          .format(niter))

     dtype = _utils.get_floating_dtype(A)

     if not center:
         return _svd_lowrank(A, q, niter=niter, M=None)

     if _utils.is_sparse(A):
         if len(A.shape) != 2:
             raise ValueError('pca_lowrank input is expected to be 2-dimensional tensor')
         c = torch.sparse.sum(A, dim=(-2,)) / m
         # reshape c
         column_indices = c.indices()[0]
         indices = torch.zeros(2, len(column_indices),
                               dtype=column_indices.dtype,
                               device=column_indices.device)
         indices[0] = column_indices
         C_t = torch.sparse_coo_tensor(
             indices, c.values(), (n, 1), dtype=dtype, device=A.device)

         ones_m1_t = torch.ones(A.shape[:-2] + (1, m), dtype=dtype, device=A.device)
         M = _utils.transpose(torch.sparse.mm(C_t, ones_m1_t))
         return _svd_lowrank(A, q, niter=niter, M=M)
     else:
         C = A.mean(dim=(-2,), keepdim=True)
         return _svd_lowrank(A - C, q, niter=niter, M=None)
	"""Implement various linear algebra algorithms for low rank matrices.
	"""

	__all__ = ['svd_lowrank', 'pca_lowrank']

	from torch import Tensor
	import torch
	from . import _linalg_utils as _utils
	from .overrides import has_torch_function, handle_torch_function

	from typing import Optional, Tuple

	def get_approximate_basis(A: Tensor,
	q: int,
	niter: Optional[int] = 2,
	M: Optional[Tensor] = None
	) -> Tensor:
	"""Return tensor :math:`Q` with :math:`q` orthonormal columns such
	that :math:`Q Q^H A` approximates :math:`A`. If :math:`M` is
	specified, then :math:`Q` is such that :math:`Q Q^H (A - M)`
	approximates :math:`A - M`.

	.. note:: The implementation is based on the Algorithm 4.4 from
	Halko et al, 2009.

	.. note:: For an adequate approximation of a k-rank matrix
	:math:`A`, where k is not known in advance but could be
	estimated, the number of :math:`Q` columns, q, can be
	choosen according to the following criteria: in general,
	:math:`k <= q <= min(2*k, m, n)`. For large low-rank
	matrices, take :math:`q = k + 5..10`. If k is
	relatively small compared to :math:`min(m, n)`, choosing
	:math:`q = k + 0..2` may be sufficient.

	.. note:: To obtain repeatable results, reset the seed for the
	pseudorandom number generator

	Args::
	A (Tensor): the input tensor of size :math:`(*, m, n)`

	q (int): the dimension of subspace spanned by :math:`Q`
	columns.

	niter (int, optional): the number of subspace iterations to
	conduct; ``niter`` must be a
	nonnegative integer. In most cases, the
	default value 2 is more than enough.

	M (Tensor, optional): the input tensor's mean of size
	:math:`(*, 1, n)`.

	References::
	- Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
	structure with randomness: probabilistic algorithms for
	constructing approximate matrix decompositions,
	arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
	`arXiv <http://arxiv.org/abs/0909.4061>`_).
	"""

	niter = 2 if niter is None else niter
	m, n = A.shape[-2:]
	dtype = _utils.get_floating_dtype(A)
	matmul = _utils.matmul

	R = torch.randn(n, q, dtype=dtype, device=A.device)

	# The following code could be made faster using torch.geqrf + torch.ormqr
	# but geqrf is not differentiable
	A_H = _utils.transjugate(A)
	if M is None:
	Q = torch.linalg.qr(matmul(A, R)).Q
	for i in range(niter):
	Q = torch.linalg.qr(matmul(A_H, Q)).Q
	Q = torch.linalg.qr(matmul(A, Q)).Q
	else:
	M_H = _utils.transjugate(M)
	Q = torch.linalg.qr(matmul(A, R) - matmul(M, R)).Q
	for i in range(niter):
	Q = torch.linalg.qr(matmul(A_H, Q) - matmul(M_H, Q)).Q
	Q = torch.linalg.qr(matmul(A, Q) - matmul(M, Q)).Q

	return Q


	def svd_lowrank(A: Tensor, q: Optional[int] = 6, niter: Optional[int] = 2,
	M: Optional[Tensor] = None) -> Tuple[Tensor, Tensor, Tensor]:
	r"""Return the singular value decomposition ``(U, S, V)`` of a matrix,
	batches of matrices, or a sparse matrix :math:`A` such that
	:math:`A \approx U diag(S) V^T`. In case :math:`M` is given, then
	SVD is computed for the matrix :math:`A - M`.

	.. note:: The implementation is based on the Algorithm 5.1 from
	Halko et al, 2009.

	.. note:: To obtain repeatable results, reset the seed for the
	pseudorandom number generator

	.. note:: The input is assumed to be a low-rank matrix.

	.. note:: In general, use the full-rank SVD implementation
	:func:`torch.linalg.svd` for dense matrices due to its 10-fold
	higher performance characteristics. The low-rank SVD
	will be useful for huge sparse matrices that
	:func:`torch.linalg.svd` cannot handle.

	Args::
	A (Tensor): the input tensor of size :math:`(*, m, n)`

	q (int, optional): a slightly overestimated rank of A.

	niter (int, optional): the number of subspace iterations to
	conduct; niter must be a nonnegative
	integer, and defaults to 2

	M (Tensor, optional): the input tensor's mean of size
	:math:`(*, 1, n)`.

	References::
	- Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
	structure with randomness: probabilistic algorithms for
	constructing approximate matrix decompositions,
	arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
	`arXiv <https://arxiv.org/abs/0909.4061>`_).

	"""
	if not torch.jit.is_scripting():
	tensor_ops = (A, M)
	if (not set(map(type, tensor_ops)).issubset((torch.Tensor, type(None))) and has_torch_function(tensor_ops)):
	return handle_torch_function(svd_lowrank, tensor_ops, A, q=q, niter=niter, M=M)
	return _svd_lowrank(A, q=q, niter=niter, M=M)


	def _svd_lowrank(A: Tensor, q: Optional[int] = 6, niter: Optional[int] = 2,
	M: Optional[Tensor] = None) -> Tuple[Tensor, Tensor, Tensor]:
	q = 6 if q is None else q
	m, n = A.shape[-2:]
	matmul = _utils.matmul
	if M is None:
	M_t = None
	else:
	M_t = _utils.transpose(M)
	A_t = _utils.transpose(A)

	# Algorithm 5.1 in Halko et al 2009, slightly modified to reduce
	# the number conjugate and transpose operations
	if m < n or n > q:
	# computing the SVD approximation of a transpose in
	# order to keep B shape minimal (the m < n case) or the V
	# shape small (the n > q case)
	Q = get_approximate_basis(A_t, q, niter=niter, M=M_t)
	Q_c = _utils.conjugate(Q)
	if M is None:
	B_t = matmul(A, Q_c)
	else:
	B_t = matmul(A, Q_c) - matmul(M, Q_c)
	assert B_t.shape[-2] == m, (B_t.shape, m)
	assert B_t.shape[-1] == q, (B_t.shape, q)
	assert B_t.shape[-1] <= B_t.shape[-2], B_t.shape
	U, S, Vh = torch.linalg.svd(B_t, full_matrices=False)
	V = Vh.mH
	V = Q.matmul(V)
	else:
	Q = get_approximate_basis(A, q, niter=niter, M=M)
	Q_c = _utils.conjugate(Q)
	if M is None:
	B = matmul(A_t, Q_c)
	else:
	B = matmul(A_t, Q_c) - matmul(M_t, Q_c)
	B_t = _utils.transpose(B)
	assert B_t.shape[-2] == q, (B_t.shape, q)
	assert B_t.shape[-1] == n, (B_t.shape, n)
	assert B_t.shape[-1] <= B_t.shape[-2], B_t.shape
	U, S, Vh = torch.linalg.svd(B_t, full_matrices=False)
	V = Vh.mH
	U = Q.matmul(U)

	return U, S, V


	def pca_lowrank(A: Tensor, q: Optional[int] = None, center: bool = True,
	niter: int = 2) -> Tuple[Tensor, Tensor, Tensor]:
	r"""Performs linear Principal Component Analysis (PCA) on a low-rank
	matrix, batches of such matrices, or sparse matrix.

	This function returns a namedtuple ``(U, S, V)`` which is the
	nearly optimal approximation of a singular value decomposition of
	a centered matrix :math:`A` such that :math:`A = U diag(S) V^T`.

	.. note:: The relation of ``(U, S, V)`` to PCA is as follows:

	- :math:`A` is a data matrix with ``m`` samples and
	``n`` features

	- the :math:`V` columns represent the principal directions

	- :math:`S ** 2 / (m - 1)` contains the eigenvalues of
	:math:`A^T A / (m - 1)` which is the covariance of
	``A`` when ``center=True`` is provided.

	- ``matmul(A, V[:, :k])`` projects data to the first k
	principal components

	.. note:: Different from the standard SVD, the size of returned
	matrices depend on the specified rank and q
	values as follows:

	- :math:`U` is m x q matrix

	- :math:`S` is q-vector

	- :math:`V` is n x q matrix

	.. note:: To obtain repeatable results, reset the seed for the
	pseudorandom number generator

	Args:

	A (Tensor): the input tensor of size :math:`(*, m, n)`

	q (int, optional): a slightly overestimated rank of
	:math:`A`. By default, ``q = min(6, m,
	n)``.

	center (bool, optional): if True, center the input tensor,
	otherwise, assume that the input is
	centered.

	niter (int, optional): the number of subspace iterations to
	conduct; niter must be a nonnegative
	integer, and defaults to 2.

	References::

	- Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
	structure with randomness: probabilistic algorithms for
	constructing approximate matrix decompositions,
	arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
	`arXiv <http://arxiv.org/abs/0909.4061>`_).

	"""

	if not torch.jit.is_scripting():
	if type(A) is not torch.Tensor and has_torch_function((A,)):
	return handle_torch_function(pca_lowrank, (A,), A, q=q, center=center, niter=niter)

	(m, n) = A.shape[-2:]

	if q is None:
	q = min(6, m, n)
	elif not (q >= 0 and q <= min(m, n)):
	raise ValueError('q(={}) must be non-negative integer'
	' and not greater than min(m, n)={}'
	.format(q, min(m, n)))
	if not (niter >= 0):
	raise ValueError('niter(={}) must be non-negative integer'
	.format(niter))

	dtype = _utils.get_floating_dtype(A)

	if not center:
	return _svd_lowrank(A, q, niter=niter, M=None)

	if _utils.is_sparse(A):
	if len(A.shape) != 2:
	raise ValueError('pca_lowrank input is expected to be 2-dimensional tensor')
	c = torch.sparse.sum(A, dim=(-2,)) / m
	# reshape c
	column_indices = c.indices()[0]
	indices = torch.zeros(2, len(column_indices),
	dtype=column_indices.dtype,
	device=column_indices.device)
	indices[0] = column_indices
	C_t = torch.sparse_coo_tensor(
	indices, c.values(), (n, 1), dtype=dtype, device=A.device)

	ones_m1_t = torch.ones(A.shape[:-2] + (1, m), dtype=dtype, device=A.device)
	M = _utils.transpose(torch.sparse.mm(C_t, ones_m1_t))
	return _svd_lowrank(A, q, niter=niter, M=M)
	else:
	C = A.mean(dim=(-2,), keepdim=True)
	return _svd_lowrank(A - C, q, niter=niter, M=None)