test/test_sparse.py - platform/external/pytorch - Git at Google

 # Owner(s): ["module: sparse"]

 import torch
 import itertools
 import functools
 import operator
 import random
 import unittest
 from torch.testing import make_tensor
 from torch.testing._internal.common_utils import TestCase, run_tests, skipIfRocm, do_test_dtypes, \
     load_tests, TEST_NUMPY, TEST_SCIPY, IS_WINDOWS, gradcheck, coalescedonoff, \
     DeterministicGuard, first_sample, TEST_WITH_CROSSREF, TEST_WITH_ROCM, skipIfTorchDynamo, \
     parametrize, subtest, is_coalesced_indices, suppress_warnings, instantiate_parametrized_tests, \
     skipIfCrossRef
 from torch.testing._internal.common_cuda import TEST_CUDA
 from numbers import Number
 from typing import Dict, Any
 from packaging import version
 from torch.testing._internal.common_cuda import \
     (SM53OrLater, SM80OrLater, TEST_MULTIGPU)
 from torch.testing._internal.common_device_type import \
     (instantiate_device_type_tests, ops, dtypes, dtypesIfCUDA, onlyCPU, onlyCUDA, precisionOverride,
      deviceCountAtLeast, OpDTypes, onlyNativeDeviceTypes)
 from torch.testing._internal.common_methods_invocations import \
     (op_db, reduction_ops, sparse_unary_ufuncs, sparse_masked_reduction_ops, binary_ufuncs)
 from torch.testing._internal.common_dtype import (
     all_types, all_types_and_complex, all_types_and_complex_and, floating_and_complex_types,
     floating_and_complex_types_and, integral_types, floating_types_and,
 )
 from torch.testing._internal.opinfo.definitions.sparse import validate_sample_input_sparse
 from torch.testing._internal.opinfo.refs import (
     ElementwiseBinaryPythonRefInfo,
     ReductionPythonRefInfo
 )

 def _op_supports_any_sparse(op):
     return (op.supports_sparse
             or op.supports_sparse_csr
             or op.supports_sparse_csc
             or op.supports_sparse_bsr
             or op.supports_sparse_bsc)


 reduction_ops_with_sparse_support = [
     op for op in reduction_ops if 'masked.' not in op.name and
     _op_supports_any_sparse(op) and not isinstance(op, ReductionPythonRefInfo)]

 binary_ufuncs_with_sparse_support = [
     op for op in binary_ufuncs if _op_supports_any_sparse(op) and
     not isinstance(op, ElementwiseBinaryPythonRefInfo)]

 like_fns_with_sparse_support = [op for op in op_db if _op_supports_any_sparse(op) and '_like' in op.name]

 if TEST_SCIPY:
     import scipy.sparse

 # load_tests from torch.testing._internal.common_utils is used to automatically filter tests for
 # sharding on sandcastle. This line silences flake warnings
 load_tests = load_tests

 # batched grad doesn't support sparse
 gradcheck = functools.partial(gradcheck, check_batched_grad=False)

 CUSPARSE_SPMM_COMPLEX128_SUPPORTED = (
     IS_WINDOWS and torch.version.cuda and version.parse(torch.version.cuda) > version.parse("11.2")
 ) or (not IS_WINDOWS and not TEST_WITH_ROCM)

 HIPSPARSE_SPMM_COMPLEX128_SUPPORTED = torch.version.hip and version.parse(torch.version.hip.split("-")[0]) >= version.parse("6.0")

 def all_sparse_layouts(test_name='layout', include_strided=False):
     return parametrize(test_name, [
         subtest(torch.strided, name='Strided'),
         subtest(torch.sparse_coo, name='SparseCOO'),
         subtest(torch.sparse_csr, name='SparseCSR'),
         subtest(torch.sparse_csc, name='SparseCSC'),
         subtest(torch.sparse_bsr, name='SparseBSR'),
         subtest(torch.sparse_bsc, name='SparseBSC'),
     ][(0 if include_strided else 1):])

 def gradcheck_semantics(test_name='gradcheck'):
     gradcheck_sparse = functools.partial(gradcheck, masked=False)
     gradcheck_masked = functools.partial(gradcheck, masked=True)
     gradcheck_sparse.masked = False
     gradcheck_masked.masked = True
     return parametrize(test_name, [
         subtest(gradcheck_sparse, name='sparse'),
         subtest(gradcheck_masked, name='masked')])


 class CrossRefSparseFakeMode(torch._subclasses.CrossRefFakeMode):
     def __init__(self) -> None:
         super().__init__(
             self.ignore_op, check_strides=False,
             check_aliasing=False,
         )  # TODO: enable stride/alias checking

     # empty_like excluded for now due to sparse complex
     # aten._to_dense.default this one is getting called with csc
     @staticmethod
     def ignore_op(func):
         return func in (
             torch.ops.aten.empty_like.default,
             torch.ops.aten.set_.source_Storage_storage_offset,
             torch.ops.aten.sspaddmm.out,
             torch.ops.aten._spdiags.default,
             torch.ops.aten._to_dense.default,
             torch.ops.aten.indices.default,
             torch.ops.aten._indices.default,
             torch.ops.aten.values.default,
             torch.ops.aten._values.default,
         )

 class TestSparseLegacyAndDeprecation(TestCase):

     @skipIfTorchDynamo("TorchDynamo fails with unknown reason")
     def test_legacy_warnings(self):

         def f1():
             "torch.sparse.SparseTensor() is deprecated."\
                 "  Please use torch.sparse_coo_tensor((0,), dtype=)"
             x_ref = torch.sparse_coo_tensor((0,), dtype=torch.float64)
             x = torch.sparse.DoubleTensor()
             self.assertEqual(x, x_ref)

         def f2():
             "torch.sparse.SparseTensor(cdata=x._cdata) is deprecated."\
                 "  Please use torch.sparse_coo_tensor(x._indices(), x._values(), x.shape)"
             x_ref = torch.tensor([[1, 2], [3, 4]], dtype=torch.float64).to_sparse()
             x = torch.sparse.DoubleTensor(cdata=x_ref._cdata)
             y = torch.sparse_coo_tensor(x._indices(), x._values(), x.shape)
             self.assertEqual(x, x_ref)
             self.assertEqual(y, x_ref)

         def f3():
             "torch.sparse.SparseTensor(indices, values, *, device=) is deprecated."\
                 "  Please use torch.sparse_coo_tensor(indices, values, dtype=, device=)"
             x_ref = torch.sparse_coo_tensor([[0, 0, 1, 1], [0, 1, 0, 1]], [1, 2, 3, 4], dtype=torch.float64)
             x = torch.sparse.DoubleTensor(torch.tensor([[0, 0, 1, 1], [0, 1, 0, 1]]),
                                           torch.tensor([1, 2, 3, 4], dtype=torch.float64))
             self.assertEqual(x, x_ref)

         def f4():
             "torch.sparse.SparseTensor(indices, values, shape, *, device=) is deprecated."\
                 "  Please use torch.sparse_coo_tensor(indices, values, shape, dtype=, device=)"
             x_ref = torch.sparse_coo_tensor([[0, 0, 1, 1], [0, 1, 0, 1]], [1, 2, 3, 4], (2, 3), dtype=torch.float64)
             x = torch.sparse.DoubleTensor(torch.tensor([[0, 0, 1, 1], [0, 1, 0, 1]]),
                                           torch.tensor([1, 2, 3, 4], dtype=torch.float64), (2, 3))
             self.assertEqual(x, x_ref)

         def f5():
             "torch.sparse.SparseTensor(shape, *, device=) is deprecated."\
                 "  Please use torch.sparse_coo_tensor(shape, dtype=, device=)"
             x_ref = torch.sparse_coo_tensor((2, 3), dtype=torch.float64)
             x = torch.sparse.DoubleTensor(2, 3)
             self.assertEqual(x, x_ref)

         for test_f in [f1, f2, f3, f4, f5]:

             with self.assertWarns(UserWarning, msg=test_f.__doc__) as cm:
                 test_f()
                 test_f()

             # Check warn-once:
             self.assertEqual(len(cm.warnings), 1)


 class TestSparseBase(TestCase):
     def run(self, result=None):
         if TEST_WITH_CROSSREF:
             with CrossRefSparseFakeMode():
                 return super().run(result)
         else:
             return super().run(result)

 class TestSparse(TestSparseBase):

     def setUp(self):
         TestCase.setUp(self)

         self.index_tensor = lambda *args, **kwargs: torch.tensor(*args, **kwargs, dtype=torch.int64)

         def sparse_empty_factory(*args, **kwargs):
             kwargs['layout'] = kwargs.get('layout', torch.sparse_coo)
             return torch.empty(*args, **kwargs)
         self.sparse_empty = sparse_empty_factory

         def sparse_tensor_factory(*args, **kwargs):
             return torch.sparse_coo_tensor(*args, **kwargs)
         self.sparse_tensor = sparse_tensor_factory

     def _gen_sparse(self, sparse_dim, nnz, with_size, dtype, device, coalesced):
         if isinstance(with_size, Number):
             with_size = [with_size] * sparse_dim

         x, i, v = self.genSparseTensor(with_size, sparse_dim, nnz, not coalesced, dtype=dtype, device=device)

         if not coalesced:
             self.assert_uncoalesced(x)

         return x, i, v

     def assert_uncoalesced(self, x):
         """
         Test if a CPU tensor is uncoalesced.  This is used to ensure
         correctness of the uncoalesced tensor generation algorithm.
         """
         assert not x.is_coalesced()
         existing_indices = set()
         indices = x._indices()
         for i in range(x._nnz()):
             index = str(indices[:, i])
             if index in existing_indices:
                 return True
             else:
                 existing_indices.add(index)

     def randn(self, *args, **kwargs):
         """
         Variant of torch.randn that also works in the TEST_CUDA case.
         """
         # TODO: Put this in torch.cuda.randn
         return torch.empty(*args, **kwargs).normal_()

     @dtypes(torch.double)
     def test_print_coalesced(self, device, dtype):
         self._test_print(device, dtype, True)

     @dtypes(torch.double)
     def test_print_uncoalesced(self, device, dtype):
         self._test_print(device, dtype, False)

     def _test_print(self, device, dtype, coalesced):
         shape_sparse_dim_nnz = [
             ((), 0, 2),
             ((0,), 0, 10),
             ((2,), 0, 3),
             ((100, 3), 1, 3),
             ((100, 20, 3), 2, 0),
             ((10, 0, 3), 0, 3),
             ((10, 0, 3), 0, 0),
         ]
         printed = []
         for shape, sparse_dim, nnz in shape_sparse_dim_nnz:
             indices_shape = torch.Size((sparse_dim, nnz))
             values_shape = torch.Size((nnz,) + shape[sparse_dim:])
             printed.append(f"# shape: {torch.Size(shape)}")
             printed.append(f"# nnz: {nnz}")
             printed.append(f"# sparse_dim: {sparse_dim}")
             printed.append(f"# indices shape: {indices_shape}")
             printed.append(f"# values shape: {values_shape}")

             indices = torch.arange(indices_shape.numel(), dtype=self.index_tensor(0).dtype,
                                    device=device).view(indices_shape)
             for d in range(sparse_dim):
                 indices[d].clamp_(max=(shape[d] - 1))  # make it valid index
             if not coalesced and indices.numel() > 0:
                 indices[:, -1] = indices[:, 0]  # make it uncoalesced
             values_numel = values_shape.numel()
             values = torch.arange(values_numel, dtype=dtype,
                                   device=device).view(values_shape).div_(values_numel / 2.)
             sp_tensor = self.sparse_tensor(indices, values, shape, dtype=dtype, device=device)

             dtypes = [torch.int32]
             if values.dtype == torch.double:
                 dtypes.append(torch.float)
             else:
                 dtypes.append(torch.double)
             for dtype in dtypes:
                 printed.append(f"########## {dtype} ##########")
                 x = sp_tensor.detach().to(dtype)
                 printed.append("# sparse tensor")
                 printed.append(str(x))
                 if x.dtype.is_floating_point:
                     printed.append("# after requires_grad_")
                     printed.append(str(x.requires_grad_()))
                     printed.append("# after addition")
                     printed.append(str(x + x))
                 printed.append("# _indices")
                 printed.append(str(x._indices()))
                 printed.append("# _values")
                 printed.append(str(x._values()))
             printed.append('')
         self.assertExpected('\n'.join(printed))

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_basic(self, device, dtype, coalesced):
         def test_shape(sparse_dims, nnz, with_size):
             if isinstance(with_size, Number):
                 with_size = [with_size] * sparse_dims
             x, i, v = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)
             self.assertEqual(i, x._indices())
             self.assertEqual(v, x._values())
             self.assertEqual(x.ndimension(), len(with_size))
             self.assertEqual(x.coalesce()._nnz(), nnz if x.is_coalesced() else nnz // 2)
             self.assertEqual(list(x.size()), with_size)

             # Test .indices() and .values()
             if not coalesced:
                 with self.assertRaisesRegex(RuntimeError, "Cannot get indices on an uncoalesced tensor"):
                     x.indices()
                 with self.assertRaisesRegex(RuntimeError, "Cannot get values on an uncoalesced tensor"):
                     x.values()
             else:
                 self.assertEqual(x.indices(), x._indices())
                 self.assertEqual(x.values(), x._values())

         test_shape(3, 10, 100)
         test_shape(3, 10, [100, 100, 100])
         test_shape(3, 10, [100, 100, 100, 5, 5, 5, 0])
         test_shape(3, 0, [0, 0, 100, 5, 5, 5, 0])

         # Make sure that coalesce handles duplicate indices correctly
         i = self.index_tensor([[9, 0, 0, 0, 8, 1, 1, 1, 2, 7, 2, 2, 3, 4, 6, 9]], device=device)
         v = torch.tensor([[idx**2, idx] for idx in range(i.size(1))], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([10, 2]), dtype=dtype, device=device)
         self.assertEqual(x.coalesce()._nnz(), 9)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble, torch.bfloat16)
     @precisionOverride({torch.bfloat16: 1e-2})
     @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1991")
     def test_coalesce(self, device, dtype, coalesced):

         def _test_coalesce(t):
             tc = t.coalesce()
             self.assertEqual(tc.to_dense(), t.to_dense())
             self.assertTrue(tc.is_coalesced())
             # Our code below doesn't work when nnz is 0, because
             # then it's a 0D tensor, not a 2D tensor.
             if t._nnz() == 0:
                 self.assertEqual(t._indices(), tc._indices())
                 self.assertEqual(t._values(), tc._values())
                 return tc

             value_map: Dict[Any, Any] = {}
             for idx, val in zip(t._indices().t(), t._values()):
                 idx_tup = tuple(idx.tolist())
                 if idx_tup in value_map:
                     value_map[idx_tup] += val
                 else:
                     value_map[idx_tup] = val.clone() if isinstance(val, torch.Tensor) else val

             new_indices = sorted(value_map.keys())
             _new_values = [value_map[idx] for idx in new_indices]
             if t._values().ndimension() < 2:
                 new_values = t._values().new(_new_values)
             else:
                 new_values = torch.stack(_new_values)

             new_indices = t._indices().new(new_indices).t()
             tg = t.new(new_indices, new_values, t.size())

             self.assertEqual(tc._indices(), tg._indices())
             self.assertEqual(tc._values(), tg._values())

             if t.is_coalesced():
                 self.assertEqual(tc._indices(), t._indices())
                 self.assertEqual(tc._values(), t._values())

         for empty_i, empty_v, empty_nnz in itertools.product([True, False], repeat=3):
             sparse_size = [] if empty_i else [2, 1]
             dense_size = [1, 0, 2] if empty_v else [1, 2]
             nnz = 0 if empty_nnz else 5

             t, _, _ = self._gen_sparse(len(sparse_size), nnz, sparse_size + dense_size, dtype, device, coalesced)
             _test_coalesce(t)  # this tests correctness

     @dtypes(torch.double)
     @skipIfTorchDynamo("https://github.com/pytorch/pytorch/issues/89395")
     def test_coalesce_reference_cycle(self, device, dtype):
         # Test coalesce doesn't create autograd graph cycles (gh-52253)

         # Sanity check that the helper class works as expected
         t = torch.rand(2)
         t_ref = torch._C._WeakTensorRef(t)
         self.assertFalse(t_ref.expired())

         del t
         self.assertTrue(t_ref.expired())

         def test_sparse_sum():
             i = torch.tensor([[0], [4]], dtype=torch.long, device=device)
             v = torch.tensor([[[-0.4567, -1.8797, 0.0380, 1.4316]]],
                              dtype=dtype, device=device)
             S = torch.sparse_coo_tensor(i, v)
             S = S.coalesce()
             S.requires_grad_(True)
             S2 = S.coalesce()
             self.assertTrue(S2.is_coalesced())
             return torch._C._WeakTensorRef(S2)

         ref = test_sparse_sum()
         self.assertTrue(ref.expired())

     @dtypes(torch.double)
     def test_ctor_large_sizes(self, device, dtype):
         # Test that integer overflow is detected when computing numel
         # of a sparse tensor with large dimensions (gh-57416). Notice
         # that numel is computed internally when constructing a
         # tensor, hence the overflow may appear during the tensor
         # construction step.
         N = 100000
         indices = torch.tensor([[N, N - 1]] * 4, dtype=torch.int64, device=device)
         values = torch.tensor([1, 2], dtype=dtype, device=device)
         self.assertRaises(RuntimeError,
                           lambda: torch.sparse_coo_tensor(
                               indices, values, (N + 1,) * 4, device=device))

     @dtypes(torch.double, torch.cdouble)
     def test_ctor_size_checks(self, device, dtype):
         indices = self.index_tensor([
             [0, 0, 0],
             [0, 3, 0],
             [0, 0, 0],
             [0, 0, 0],
         ], device=device)
         values = torch.tensor([2, 1, 3, 4], dtype=dtype, device=device)

         # indices inconsistent with size
         self.assertRaises(
             RuntimeError,
             lambda: self.sparse_tensor(indices, values, torch.Size([2, 1, 1])))

         # values inconsistent with size
         values = torch.tensor([
             [2, 1, 2, 1],
             [1, 0, 5, 2],
         ], dtype=dtype, device=device)
         self.assertRaises(
             RuntimeError,
             lambda: self.sparse_tensor(indices, values, torch.Size([2, 4, 2, 1])))

     @coalescedonoff
     @dtypes(torch.double)
     def test_ctor_is_coalesced_with_gradcheck(self, device, dtype, coalesced):
         for sparse_size, nnz in (((3, 3), 5), ((2, 3, 1, 5), 11)):
             t, _, _ = self._gen_sparse(len(sparse_size), nnz, sparse_size, dtype, device, coalesced)
             self.assertEqual(t.is_coalesced(), coalesced)

             def func(indices, values, shape, is_coalesced):
                 s = torch.sparse_coo_tensor(indices, values, shape, check_invariants=True, is_coalesced=is_coalesced)
                 self.assertEqual(s.is_coalesced(), is_coalesced)
                 return s.to_dense(masked_grad=False)

             if coalesced:
                 torch.autograd.gradcheck(func, (t._indices(), t._values().requires_grad_(True), t.shape, False))
                 torch.autograd.gradcheck(func, (t._indices(), t._values().requires_grad_(True), t.shape, True))
             else:
                 torch.autograd.gradcheck(func, (t._indices(), t._values().requires_grad_(True), t.shape, False))
                 with self.assertRaisesRegex(RuntimeError,
                                             "cannot set is_coalesced to true if indices correspond to uncoalesced COO tensor"):
                     torch.autograd.gradcheck(func, (t._indices(), t._values().requires_grad_(True), t.shape, True))

     @dtypes(*floating_and_complex_types_and(torch.float16, torch.bfloat16))
     @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
     @gradcheck_semantics()
     def test_to_dense_with_gradcheck(self, device, dtype, gradcheck):

         def test_tensor(x, res):
             x.to_dense()  # Tests triple to_dense for memory corruption
             x.to_dense()
             x.to_dense()
             dense_x = x.to_dense()
             safe_dense_x = self.safeToDense(x)
             dense_x = dense_x.to(res.dtype)
             safe_dense_x = safe_dense_x.to(res.dtype)
             self.assertEqual(res, dense_x)
             self.assertEqual(res, safe_dense_x)

             # Only run autograd test for float64
             if x.dtype != torch.float64:
                 return

             def fn(x):
                 return x.to_dense(masked_grad=gradcheck.masked)
             x.requires_grad_(True)
             gradcheck(fn, (x,))

         for value_type in [torch.double, torch.cdouble]:
             i = self.index_tensor([
                 [0, 1, 2, 2],
                 [0, 0, 0, 3],
                 [0, 0, 1, 4],
             ], device=device)
             # we don't have to_dense for half types on CPU because it is implemented
             # with a slower add_ operation
             v = torch.tensor([2, 1, 3, 4], dtype=dtype, device=device)
             x = self.sparse_tensor(i, v, torch.Size([3, 4, 5]), dtype=value_type, device=device)
             res = torch.tensor([
                 [[2, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0]],
                 [[1, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0]],
                 [[0, 3, 0, 0, 0],
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 4]],
             ], dtype=dtype, device=device)

             test_tensor(x, res)
             test_tensor(res, res)

             i = self.index_tensor([
                 [0, 1, 2, 2],
                 [0, 0, 0, 3],
                 [0, 0, 1, 4],
             ], device=device)
             v = torch.empty(4, 0, dtype=dtype, device=device)
             x = self.sparse_tensor(i, v, torch.Size([3, 4, 5, 0]), dtype=value_type, device=device)
             res = torch.empty((3, 4, 5, 0), dtype=dtype, device=device)
             test_tensor(x, res)

     @coalescedonoff
     @dtypes(torch.float16, torch.bfloat16, torch.float64, torch.int, torch.cfloat, torch.cdouble)
     def test_to_sparse(self, device, dtype, coalesced):
         shape = [5, 2, 10, 4]
         max_nnz = 1
         for value_type in [torch.double, torch.cdouble]:
             for dim, dim_sz in enumerate(shape, 1):
                 max_nnz *= dim_sz
                 rnnz = torch.randint(2, max_nnz, (1,)).item()
                 for nnz in [0, 1, rnnz]:
                     expected, _, _ = self._gen_sparse(dim, nnz, shape, dtype=value_type, device=device,
                                                       coalesced=coalesced)
                     expected = expected.to(dtype)

                     d = expected.to_dense()
                     result = d.to_sparse(dim)
                     self.assertEqual(d, result.to_dense())
                     self.assertEqual(expected.size(), result.size())
                     self.assertEqual(dim, result.sparse_dim())

     @dtypes(torch.double, torch.cdouble)
     def test_sparse_bool(self, device, dtype):
         a = torch.tensor([True, False], dtype=dtype, device=device).to(torch.bool)
         b = a.to_sparse().to_dense()
         self.assertEqual(a, b)

     @skipIfTorchDynamo("https://github.com/pytorch/pytorch/issues/108667")
     @dtypes(torch.double, torch.cdouble)
     def test_scalar(self, device, dtype):
         # tensor with value
         a = self.sparse_tensor(self.index_tensor([], device=device).unsqueeze(1), 12.3, [], dtype=dtype, device=device)
         self.assertEqual(1, a._values().numel())
         self.assertEqual(a, a.clone())
         a_coalesced = a.coalesce()
         self.assertTrue(a_coalesced.is_coalesced())
         self.assertEqual(torch.tensor(12.3, dtype=dtype, device=device), a.to_dense())
         self.assertEqual(a, a.to_dense().to_sparse())

         # tensor with multiple values
         a = self.sparse_tensor(self.index_tensor([], device=device).unsqueeze(1).expand(0, 2),
                                [12.3, 12.3], [], dtype=dtype, device=device)
         self.assertEqual(2, a._values().numel())
         self.assertEqual(a, a.clone())
         a_coalesced = a.coalesce()
         self.assertTrue(a_coalesced.is_coalesced())
         self.assertEqual(torch.tensor(12.3 * 2, dtype=dtype, device=device), a.to_dense())
         self.assertEqual(a.coalesce(), a.coalesce().to_dense().to_sparse())

         # tensor without value
         a = self.sparse_empty((), dtype=dtype, device=device)
         self.assertEqual(0, a._values().numel())
         self.assertEqual(a, a.clone())
         a_coalesced = a.coalesce()
         self.assertTrue(a_coalesced.is_coalesced())
         self.assertEqual(torch.tensor(0, dtype=dtype, device=device), a.to_dense())
         self.assertEqual(a, a.to_dense().to_sparse())

     @dtypes(torch.double, torch.cdouble)
     def test_shared(self, device, dtype):
         i = self.index_tensor([[2]], device=device)
         v = torch.tensor([5], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3]))
         v[0] = 6
         self.assertEqual(torch.tensor([0, 0, 6], dtype=dtype, device=device), self.safeToDense(x))
         i[0][0] = 0
         self.assertEqual(torch.tensor([6, 0, 0], dtype=dtype, device=device), self.safeToDense(x))

         i = self.index_tensor([[2]], device=device)
         v = torch.empty((1, 0), dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 0]))
         i[0][0] = 0
         self.assertEqual(torch.empty((3, 0), dtype=dtype, device=device), self.safeToDense(x))

     @dtypes(torch.double, torch.cdouble)
     @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
     @gradcheck_semantics()
     def test_to_dense_hybrid(self, device, dtype, gradcheck):

         def test_tensor(x, res):
             x.to_dense()  # Tests double to_dense for memory corruption
             x.to_dense()
             x.to_dense()
             self.assertEqual(res, x.to_dense())
             self.assertEqual(res, self.safeToDense(x))

             def fn(x):
                 return x.to_dense(masked_grad=gradcheck.masked)
             x.requires_grad_(True)
             gradcheck(fn, (x,))

         i = self.index_tensor([
             [0, 1, 2, 2],
             [0, 0, 0, 3],
         ], device=device)
         v = torch.tensor([[2, 3], [1, 2], [3, 4], [4, 5]], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 2]))
         res = torch.tensor([
             [[2, 3],
              [0, 0],
              [0, 0],
              [0, 0]],
             [[1, 2],
              [0, 0],
              [0, 0],
              [0, 0]],
             [[3, 4],
              [0, 0],
              [0, 0],
              [4, 5]],
         ], dtype=dtype, device=device)
         test_tensor(x, res)

         i = self.index_tensor([
             [0, 1, 2, 2],
             [0, 0, 0, 3],
         ], device=device)
         v = torch.empty((4, 2, 0), dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 2, 0]))
         res = torch.empty((3, 4, 2, 0), dtype=dtype, device=device)
         test_tensor(x, res)

     @dtypes(torch.double, torch.cdouble)
     def test_contig(self, device, dtype):
         def test_tensor(x, exp_i, exp_v):
             x = x.coalesce()
             self.assertEqual(exp_i, x._indices())
             self.assertEqual(exp_v, x._values())

         i = self.index_tensor([
             [1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
             [92, 31, 62, 50, 22, 65, 89, 74, 56, 34],
         ], device=device)
         v = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([100, 100]))
         exp_i = self.index_tensor([
             [0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
             [31, 92, 65, 50, 34, 62, 22, 56, 74, 89],
         ], device=device)
         exp_v = torch.tensor([2, 1, 6, 4, 10, 3, 5, 9, 8, 7], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

         i = self.index_tensor([
             [2, 0, 2, 1],
             [0, 0, 3, 0],
             [1, 0, 4, 0],
         ], device=device)
         v = torch.tensor([3, 2, 4, 1], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 5]))
         exp_i = self.index_tensor([
             [0, 1, 2, 2],
             [0, 0, 0, 3],
             [0, 0, 1, 4],
         ], device=device)
         exp_v = torch.tensor([2, 1, 3, 4], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

         i = self.index_tensor([
             [2, 0, 2, 1],
             [0, 0, 3, 0],
             [1, 0, 4, 0],
         ], device=device)
         v = torch.empty([4, 0], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 5, 0]))
         exp_i = self.index_tensor([
             [0, 1, 2, 2],
             [0, 0, 0, 3],
             [0, 0, 1, 4],
         ], device=device)
         exp_v = torch.empty([4, 0], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

         # Duplicate indices
         i = self.index_tensor([
             [0, 0, 2, 0],
             [0, 0, 3, 0],
             [0, 0, 4, 0],
         ], device=device)
         v = torch.tensor([3, 2, 4, 1], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 5]))
         exp_i = self.index_tensor([
             [0, 2],
             [0, 3],
             [0, 4],
         ], device=device)
         exp_v = torch.tensor([6, 4], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

         i = self.index_tensor([
             [0, 0, 2, 0],
             [0, 0, 3, 0],
             [0, 0, 4, 0],
         ], device=device)
         v = torch.empty([4, 0], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 5, 0]))
         exp_i = self.index_tensor([
             [0, 2],
             [0, 3],
             [0, 4],
         ], device=device)
         exp_v = torch.empty([2, 0], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

     @dtypes(torch.double, torch.cdouble)
     def test_contig_hybrid(self, device, dtype):
         def test_tensor(x, exp_i, exp_v):
             x = x.coalesce()
             self.assertEqual(exp_i, x._indices())
             self.assertEqual(exp_v, x._values())

         i = self.index_tensor([
             [1, 0, 35, 14, 39, 6, 71, 66, 40, 27],
             [92, 31, 62, 50, 22, 65, 89, 74, 56, 34],
         ], device=device)
         v = torch.tensor([
             [1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
             [6, 7], [7, 8], [8, 9], [9, 10], [10, 11],
         ], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([100, 100, 2]))
         exp_i = self.index_tensor([
             [0, 1, 6, 14, 27, 35, 39, 40, 66, 71],
             [31, 92, 65, 50, 34, 62, 22, 56, 74, 89],
         ], device=device)
         exp_v = torch.tensor([
             [2, 3], [1, 2], [6, 7], [4, 5], [10, 11],
             [3, 4], [5, 6], [9, 10], [8, 9], [7, 8],
         ], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

         i = self.index_tensor([
             [2, 0, 2, 1],
             [0, 0, 3, 0],
             [1, 0, 4, 0],
         ], device=device)
         v = torch.tensor([[3, 3, 3], [2, 2, 2], [4, 4, 4], [1, 1, 1]], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 5, 3]))
         exp_i = self.index_tensor([
             [0, 1, 2, 2],
             [0, 0, 0, 3],
             [0, 0, 1, 4],
         ], device=device)
         exp_v = torch.tensor([[2, 2, 2], [1, 1, 1], [3, 3, 3], [4, 4, 4]], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

         i = self.index_tensor([
             [2, 0, 2, 1],
             [0, 0, 3, 0],
             [1, 0, 4, 0],
         ], device=device)
         v = torch.empty([4, 3, 0], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 5, 3, 0]))
         exp_i = self.index_tensor([
             [0, 1, 2, 2],
             [0, 0, 0, 3],
             [0, 0, 1, 4],
         ], device=device)
         exp_v = torch.empty([4, 3, 0], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

         # Duplicate indices
         i = self.index_tensor([
             [0, 0, 2, 0],
             [0, 0, 3, 0],
             [0, 0, 4, 0],
         ], device=device)
         v = torch.tensor([[3, 2, 3], [2, 1, 1], [4, 3, 4], [1, 1, 1]], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 5, 3]))
         exp_i = self.index_tensor([
             [0, 2],
             [0, 3],
             [0, 4],
         ], device=device)
         exp_v = torch.tensor([[6, 4, 5], [4, 3, 4]], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

         i = self.index_tensor([
             [0, 0, 2, 0],
             [0, 0, 3, 0],
             [0, 0, 4, 0],
         ], device=device)
         v = torch.empty([4, 3, 0], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 4, 5, 3, 0]))
         exp_i = self.index_tensor([
             [0, 2],
             [0, 3],
             [0, 4],
         ], device=device)
         exp_v = torch.empty([2, 3, 0], dtype=dtype, device=device)
         test_tensor(x, exp_i, exp_v)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_clone(self, device, dtype, coalesced):
         def test_shape(sparse_dims, nnz, with_size):
             x = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0]
             if not coalesced:
                 self.assertFalse(x.is_coalesced())
                 y = x.clone()
                 self.assertFalse(y.is_coalesced())
             x = x.coalesce()
             self.assertTrue(x.is_coalesced())
             y = x.clone()
             self.assertTrue(y.is_coalesced())

         test_shape(4, 20, 5)
         test_shape(3, 10, [100, 100, 100, 5, 5, 5, 0])
         test_shape(3, 0, [0, 0, 100, 5, 5, 5, 0])

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble, torch.bfloat16)
     @precisionOverride({torch.bfloat16: 2e-2})
     def test_Sparse_to_Sparse_copy_(self, device, dtype, coalesced):
         # This is for testing torch.copy_(SparseTensor, SparseTensor)
         sparse_dims = 3
         nnz = 10
         sizes = [2, 3, 4, 5]  # hybrid sparse
         x1, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced)
         x2, _, _ = self._gen_sparse(sparse_dims, nnz + 10, sizes, dtype, device, coalesced)

         # test copy
         x2_dense = x2.to_dense()
         x1.copy_(x2)
         self.assertEqual(x2_dense, x1.to_dense())

         # test type conversion (when x1.copy_(x2), x1.dtype should stay the same)
         x1 = x1.to(torch.float32)

         x2 = x2.to(torch.float16)
         x1_dtype = x1.dtype
         x1.copy_(x2)
         self.assertEqual(x1_dtype, x1.dtype)

         x2 = x2.to(torch.float64)
         x1_dtype = x1.dtype
         x1.copy_(x2)
         self.assertEqual(x1_dtype, x1.dtype)

         # test no broadcast
         self.assertRaises(RuntimeError, lambda: x1.copy_(x2.narrow_copy(0, 0, 1)))

         # test raise error on copy_() between dense and sparse Tensors
         self.assertRaises(RuntimeError, lambda: x1.copy_(torch.randn(5, 5)))

         # test autograd
         x1, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced)
         x2, _, _ = self._gen_sparse(sparse_dims, nnz + 10, sizes, dtype, device, coalesced)
         x2.requires_grad_(True)
         x1.copy_(x2)
         y = x1 * 2
         x2_clone = x2.clone()
         y.backward(x2_clone)
         expected_grad = x2_clone * 2
         self.assertEqual(expected_grad.to_dense(), x2.grad.to_dense())
         self.assertEqual(None, x1.grad)

     @coalescedonoff
     @unittest.skipIf(not TEST_MULTIGPU, "multi-GPU not supported")
     @dtypes(torch.double, torch.cdouble)
     def test_Sparse_to_Sparse_copy_multi_gpu(self, device, dtype, coalesced):
         # This is for testing torch.copy_(SparseTensor, SparseTensor) across GPU devices
         sparse_dims = 3
         nnz = 10
         sizes = [2, 3, 4, 5]  # hybrid sparse
         x1, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced)
         x2, _, _ = self._gen_sparse(sparse_dims, nnz + 10, sizes, dtype, device, coalesced)
         x1 = x1.to('cuda:0')

         def test_cross_device(x1, x2):
             x1_device = x1.device
             x1.copy_(x2)
             self.assertEqual(x2.to('cuda:0').to_dense(), x1.to_dense())
             self.assertEqual(x1_device, x1.device)

         test_cross_device(x1, x2.to('cuda:1'))  # test across gpu devices
         test_cross_device(x1, x2.to('cpu'))  # test between cpu and gpu

         # test autograd
         x2 = x2.to('cuda:1')
         x2.requires_grad_(True)
         x1.copy_(x2)
         y = x1 * 2
         x2_clone = x2.clone().to('cuda:0')
         y.backward(x2_clone)
         expected_grad = x2_clone * 2
         self.assertEqual(expected_grad.to_dense(), x2.grad.to('cuda:0').to_dense())
         self.assertEqual(None, x1.grad)

     @onlyCUDA
     def test_cuda_empty(self, device):
         def test_tensor(x):
             y = x.to(device)
             self.assertEqual(x.sparse_dim(), y.sparse_dim())
             self.assertEqual(x.dense_dim(), y.dense_dim())
             x = y.cpu()
             self.assertEqual(y.sparse_dim(), x.sparse_dim())
             self.assertEqual(y.dense_dim(), x.dense_dim())

         x = torch.sparse_coo_tensor((2, 3, 4), dtype=torch.float32)
         test_tensor(x)

         x = torch.sparse_coo_tensor((2, 3, 4), dtype=torch.float16)
         test_tensor(x)

         x = torch.sparse_coo_tensor((2, 3, 4), dtype=torch.float16)
         test_tensor(x)

         x = torch.sparse_coo_tensor((2, 3, 4, 0), dtype=torch.float32)
         test_tensor(x)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_transpose(self, device, dtype, coalesced):
         def test_shape(sparse_dims, nnz, with_size):
             x = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0]
             y = self.safeToDense(x)

             for i, j in itertools.combinations(range(4), 2):
                 x = x.transpose_(i, j)
                 y = y.transpose(i, j)
                 self.assertEqual(self.safeToDense(x), y)

                 x = x.transpose(i, j)
                 y = y.transpose(i, j)
                 self.assertEqual(self.safeToDense(x), y)

         test_shape(4, 6, 3)
         test_shape(4, 3, [7, 7, 7, 3, 3, 3, 0])
         test_shape(4, 0, [0, 0, 7, 3, 3, 3, 0])

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
     @gradcheck_semantics()
     def test_permute(self, device, dtype, coalesced, gradcheck):
         # trivial checks
         s = torch.rand(3, 3, 3, device=device, dtype=dtype).to_sparse()
         with self.assertRaisesRegex(RuntimeError, "does not match the length"):
             s.permute(dims=(1, 0))
         with self.assertRaisesRegex(RuntimeError, "duplicate dims"):
             s.permute(dims=(1, 1, 1))
         # Calling permute on a sparse tensor with an empty tuple used to segfault,
         # see https://github.com/pytorch/pytorch/issues/116325
         x = torch.rand((), device=device, dtype=dtype).to_sparse()
         x.permute(())
         self.assertEqual(len(x.values()), 1)

         def test_shape(sparse_dims, nnz, with_size):
             ndim = len(with_size)
             valid_sparse_dims = torch.arange(-ndim, -ndim + sparse_dims)
             valid_dense_dims = torch.arange(-ndim + sparse_dims, 0)

             for dims in itertools.permutations(range(-ndim, 0)):
                 s = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0]
                 d = self.safeToDense(s)

                 dims_sparse, _ = torch.tensor(dims[:sparse_dims]).sort()
                 dims_dense, _ = torch.tensor(dims[sparse_dims:]).sort()

                 if (valid_sparse_dims == dims_sparse).all() and (valid_dense_dims == dims_dense).all():
                     # if valid permutation, test for correctness
                     s_permuted = s.permute(dims)
                     self.assertEqual(s_permuted, d.permute(dims))

                     # if s is coalesced, and perm does not touch 0-dim,
                     # the result has to be coalesced as well
                     if dims[0] == 0:
                         self.assertEqual(s_permuted.is_coalesced(), s.is_coalesced())
                     else:
                         self.assertFalse(s_permuted.is_coalesced())

                     gradcheck(lambda t: t.permute(dims).to_dense(masked_grad=gradcheck.masked), s.requires_grad_())
                 else:
                     # otherwise check if exception is thrown
                     fail_message = "transpositions between sparse and dense dimensions are not allowed"
                     with self.assertRaisesRegex(RuntimeError, fail_message):
                         s.permute(dims)

         test_shape(2, 3, [2, 3, 4, 5])
         test_shape(2, 3, [2, 2, 0])
         # if nnz=0, it is not true that t == t.to_dense().to_sparse()
         # unless t.sparse_dim == t.dim (i.e. t is not hybrid)
         test_shape(3, 0, [0, 0, 2])

     @coalescedonoff
     @onlyCPU
     @dtypes(torch.double)
     def test_coalesce_transpose_mm(self, device, dtype, coalesced):
         def test_shape(di, dj, dk, nnz):
             x, _, _ = self._gen_sparse(2, nnz, [dj, di], dtype, device, coalesced)
             y = torch.randn(dj, dk, dtype=dtype, device=device)

             x_coalesced = x.coalesce()
             self.assertTrue(x_coalesced.is_coalesced())

             x_coalesced_t = x_coalesced.t()
             # Transpose is `colasced`-preserving if the indices tensor is empty.
             self.assertEqual(x_coalesced_t.is_coalesced(), di * nnz == 0)

             res = torch.mm(x_coalesced_t, y)
             expected = torch.mm(self.safeToDense(x_coalesced_t), y)
             self.assertEqual(res, expected)

         test_shape(10, 20, 30, 20)
         test_shape(0, 20, 30, 0)
         test_shape(10, 0, 30, 0)
         test_shape(10, 20, 0, 0)
         test_shape(10, 20, 0, 20)

     @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1166")
     @dtypes(torch.double, torch.cdouble)
     def test_t_empty(self, device, dtype):
         def test_in_place(x):
             shape_original = x.shape
             x.t_()
             self.assertEqual(torch.Size([shape_original[1], shape_original[0]]), x.size())
             self.assertEqual(0, x._indices().numel())
             self.assertEqual(0, x._values().numel())
             self.assertEqual(x.sparse_dim(), 2)
             self.assertEqual(x.dense_dim(), 0)

         def test_not_in_place(x):
             shape_original = x.shape
             y = x.t()
             self.assertEqual(torch.Size([shape_original[1], shape_original[0]]), y.size())
             self.assertEqual(0, y._indices().numel())
             self.assertEqual(0, y._values().numel())
             self.assertEqual(x.sparse_dim(), 2)
             self.assertEqual(x.dense_dim(), 0)

         x = self.sparse_empty(2, 3, dtype=dtype, device=device)
         test_in_place(x)
         test_not_in_place(x)

         x = self.sparse_empty(2, 0, dtype=dtype, device=device)
         test_in_place(x)
         test_not_in_place(x)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_add_zeros(self, device, dtype, coalesced):
         def test_shape(sparse_dims, nnz, sizes):
             x, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced)
             zeros = torch.sparse_coo_tensor(sizes, device=x.device)
             r1 = zeros + x
             r2 = x + zeros
             self.assertEqual(r1, x)
             self.assertEqual(r2, x)

         test_shape(1, 20, [1])
         test_shape(4, 20, [3, 17, 19, 5])
         test_shape(2, 20, [3, 17, 19, 5])
         test_shape(2, 20, [3, 17, 19, 0])

     @dtypes(torch.double, torch.cdouble)
     def test_add_sub_nnz(self, device, dtype):
         # nnz should not grow unbounded (gh-34964)
         x = torch.randn(10, dtype=dtype, device=device).to_sparse()
         x.add_(x)
         x.add_(x)
         self.assertLessEqual(x._nnz(), 10)

         x.sub_(2 * x)
         x.sub_(2 * x)
         self.assertLessEqual(x._nnz(), 10)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_cat(self, device, dtype, coalesced):
         # shapes: list of tuples (sparse_dims, nnz, sizes)
         def test_shapes(shapes, dim, fail_message=None):
             inputs = [self._gen_sparse(shape[0], shape[1], shape[2], dtype, device, coalesced)[0]
                       for shape in shapes]
             if fail_message:
                 with self.assertRaisesRegex(RuntimeError, fail_message):
                     torch.cat(inputs, dim)
             else:
                 result = torch.cat(inputs, dim)
                 dense_result = torch.cat([t.to_dense() for t in inputs], dim)
                 self.assertEqual(dense_result, result.to_dense())

         test_shapes(
             [(3, 10, [2, 3, 4]), (3, 10, [2, 1, 4]), (3, 10, [2, 4, 4])], 1)

         # mismatched sizes
         test_shapes([(3, 10, [2, 3, 4]), (3, 10, [2, 1, 4])], 0,
                     "All tensors must have the same shape: \\[2, 3, 4].*\\[2, 1, 4]")
         # hybrid sparse/dense
         test_shapes(
             [(2, 10, [2, 3, 4]), (2, 10, [2, 1, 4]), (2, 10, [2, 4, 4])], 1)
         # cat along dense dim
         test_shapes([(2, 10, [2, 3, 4]), (2, 10, [2, 3, 7])], 2)
         test_shapes([(1, 10, [2, 3, 4]), (1, 10, [2, 3, 4])], 1)
         test_shapes([(1, 10, [2, 3, 4]), (1, 10, [2, 3, 4])], 2)
         # mismatched dimensions
         test_shapes([(2, 10, [2, 3, 4]), (3, 10, [2, 3, 4])], 0,
                     "All tensors must have the same.*2, 1, but tensor at position 1 has 3, 0.")
         # wrapped dimension
         test_shapes(
             [(3, 10, [2, 3, 4]), (3, 10, [2, 1, 4]), (3, 10, [2, 4, 4])], -2)

         # sparse with dense
         sp = self._gen_sparse(3, 10, [2, 3, 4], dtype, device, coalesced)[0]
         dn = sp.to_dense()
         with self.assertRaisesRegex(RuntimeError,
                                     "Concatenating sparse tensors, but a dense tensor was found at position 1."):
             torch.cat((sp, dn))

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_unsqueeze(self, device, dtype, coalesced):
         def test_shape(sparse_dims, nnz, sizes, unsqueeze_dim, fail_message=None):
             x, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced)
             if fail_message:
                 with self.assertRaisesRegex(IndexError, fail_message):
                     torch.unsqueeze(x, unsqueeze_dim)
             else:
                 result = torch.unsqueeze(x, unsqueeze_dim)
                 dense_result = torch.unsqueeze(x.to_dense(), unsqueeze_dim)
                 self.assertEqual(dense_result, result.to_dense())

         # basic case
         test_shape(3, 10, [5, 7, 11], 0)

         # hybrid sparse/dense, unsqueeze along sparse dim
         test_shape(3, 10, [5, 7, 11, 13, 17], 0)
         test_shape(3, 10, [5, 7, 11, 13, 17], 3)

         # unsqueeze along dense dimensions
         test_shape(3, 10, [5, 7, 11, 13, 17], 4)
         test_shape(3, 10, [5, 7, 11, 13, 17], 5)

         # wrapped dimensions
         test_shape(3, 10, [5, 7, 11, 13, 17], -1)
         test_shape(3, 10, [5, 7, 11, 13, 17], -6)

         # bounds
         test_shape(3, 10, [5, 7, 11, 13, 17], -7, "Dimension out of range")
         test_shape(3, 10, [5, 7, 11, 13, 17], 6, "Dimension out of range")

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_select(self, device, dtype, coalesced):
         def test_shape(sparse_dims, nnz, sizes, select_dim, select_index, fail_message=None):
             x, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced)
             if fail_message:
                 with self.assertRaisesRegex(IndexError, fail_message):
                     torch.select(x, select_dim, select_index)
             else:
                 result = torch.select(x, select_dim, select_index)
                 if result.is_sparse:
                     result = result.to_dense()
                 dense_result = torch.select(x.to_dense(), select_dim, select_index)
                 self.assertEqual(dense_result, result)


         sizes = [5, 7, 11, 13, 17]
         # hybrid sparse/dense, select sparse dim, result is dense
         for i in range(sizes[0]):
             test_shape(1, 10, sizes, 0, i)
         test_shape(1, 10, sizes, 0, sizes[0] + 1, r'select[(][)][:] index \d out of range.*')

         # hybrid sparse/dense, select sparse dim, result is sparse
         for d in range(3):
             for i in range(sizes[d]):
                 test_shape(3, 10, sizes, d, i)

         # hybrid sparse/dense, select dense dim, result is sparse
         for d in range(1, 3):
             for i in range(sizes[d]):
                 test_shape(1, 10, sizes, d, i)

     @dtypes(*integral_types())
     def test_select_no_type_promotion(self, device, dtype):
         # see https://github.com/pytorch/pytorch/issues/82150
         idx = torch.tensor([[0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1]])
         val = torch.ones(6, dtype=dtype)
         s = torch.sparse_coo_tensor(idx, val, size=(3, 3))

         for t in (s, s * torch.tensor(0, dtype=dtype)):
             # empty checks
             self.assertEqual(t.dtype, t[2].dtype)
             self.assertEqual(t.dtype, t[0, 1].dtype)
             # sum should not promote
             self.assertEqual(t.dtype, t[0, 0].dtype)
             self.assertEqual(t.dtype, t[1, 1].dtype)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_index_select(self, device, dtype, coalesced):
         def test_shape(sparse_dims, nnz, sizes, select_dim, select_index, fail_message=None):
             if isinstance(select_index, int):
                 select_index = [select_index]
             if isinstance(select_index, list):
                 select_index = torch.tensor(select_index, device=device, dtype=torch.long)
             x, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced)
             if fail_message:
                 with self.assertRaisesRegex(IndexError, fail_message):
                     torch.index_select(x, select_dim, select_index)
             else:
                 result = torch.index_select(x, select_dim, select_index)
                 if result.is_sparse:
                     result = result.to_dense()
                 dense_result = torch.index_select(x.to_dense(), select_dim, select_index)
                 self.assertEqual(dense_result, result)

         sizes = [5, 7, 11, 13, 17]
         for d in range(len(sizes)):
             for index in [0, sizes[d] - 1, [0, sizes[d] // 2, sizes[d] - 1]]:
                 test_shape(1, 10, sizes, d, index)
                 test_shape(len(sizes) // 2, 10, sizes, d, index)
                 test_shape(len(sizes), 10, sizes, d, index)

     def _test_index_select_exhaustive_index(self, sizes, dims, device, dtype, coalesced):
         t = make_tensor(sizes, dtype=dtype, device=device)
         t_sparse = t.to_sparse().coalesce() if coalesced else t.to_sparse()
         t_small_sparse, _, _ = self._gen_sparse(len(sizes), 2, sizes, dtype, device, coalesced)
         t_small = t_small_sparse.to_dense()
         for d in dims:
             # NOTE: indices are negative
             idx_dim_d_range = list(range(-sizes[d], 0))
             for idx_len in range(sizes[d], sizes[d] + 1):
                 # creates all possible valid indices into dim d of lenght idx_len
                 for idx in itertools.product(*itertools.repeat(idx_dim_d_range, idx_len)):
                     t_idx = torch.tensor(idx, dtype=torch.long, device=device)

                     # NOTE: index_select for dense does not support negative indices,
                     # hence + sizes[d]. See https://github.com/pytorch/pytorch/issues/76347

                     # tests the nnz > sizes[d] branch
                     dense_result = t.index_select(d, t_idx + sizes[d])
                     sparse_result = t_sparse.index_select(d, t_idx)
                     self.assertEqual(dense_result, sparse_result)

                     # tests the nnz <= sizes[d] branch
                     small_dense_result = t_small.index_select(d, t_idx + sizes[d])
                     small_sparse_result = t_small_sparse.index_select(d, t_idx)
                     self.assertEqual(small_dense_result, small_sparse_result)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_index_select_exhaustive_index_small(self, device, dtype, coalesced):
         # will trigger brute-force algo
         self._test_index_select_exhaustive_index((3, 3, 4), range(3), device, dtype, coalesced)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_index_select_exhaustive_index_large(self, device, dtype, coalesced):
         # will trigger more sophisticated algos
         self._test_index_select_exhaustive_index((100, 50, 3, 3), (2, 3), device, dtype, coalesced)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_index_select_empty_and_non_contiguous_index(self, device, dtype, coalesced):
         # empty index
         idx_empty = torch.tensor([], dtype=torch.long, device=device)
         t = make_tensor((5, 5), dtype=dtype, device=device)
         res_dense = t.index_select(0, idx_empty)
         res_sparse = t.to_sparse().index_select(0, idx_empty)
         self.assertEqual(res_dense, res_sparse)

         # non-contigous index
         idx = torch.randint(low=0, high=5, size=(10, 2), device=device)[:, 0]

         def run_test(sizes):
             # case nnz > size[d]
             t = make_tensor(sizes, dtype=dtype, device=device)
             res_dense = t.index_select(0, idx)
             res_sparse = t.to_sparse().index_select(0, idx)
             self.assertEqual(res_dense, res_sparse)

             # case nnz <= size[d]
             t_small_sparse, _, _ = self._gen_sparse(len(sizes), 2, sizes, dtype, device, coalesced)
             res_sparse = t_small_sparse.index_select(0, idx)
             res_dense = t_small_sparse.to_dense().index_select(0, idx)
             self.assertEqual(res_dense, res_sparse)

         # brute-force
         run_test((10, 10))
         # more sophisticated algos
         run_test((10, 100, 100))

     @onlyCPU
     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_index_select_parallelization(self, device, dtype, coalesced):
         """
         Test with sizes that will trigger parallelization (i.e. with sizes
         that are >= at::internal::GRAIN_SIZE)
         """
         def run_test(nnz, size):
             t_sparse, _, _ = self._gen_sparse(1, nnz, (size,), dtype, device, coalesced)
             t_dense = t_sparse.to_dense()

             # idx_small to (sort) and (binary) search into t_sparse
             idx_small = torch.randint(size, (nnz // 2,), device=device)
             # idx_large to (sort) and (binary) search into idx_large
             # NOTE: when coalesced=True, the (binary) search will be
             # done over t_sparse anyway, as it is already sorted.
             idx_large = torch.randint(size, (nnz * 2,), device=device)
             for idx in (idx_small, idx_large):
                 res_dense = t_dense.index_select(0, idx)
                 res_sparse = t_sparse.index_select(0, idx)
                 self.assertEqual(res_dense, res_sparse)

         # NOTE: GRAIN_SIZE = 32768
         # case nnz <= size[d]
         tlen = 70000  # > 2 * GRAIN_SIZE
         run_test(tlen, tlen)

         # case nnz > size[d]
         run_test(tlen, tlen // 2)

     @onlyCPU
     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_mm(self, device, dtype, coalesced):
         def test_shape(di, dj, dk, nnz):
             x, _, _ = self._gen_sparse(2, nnz, [di, dj], dtype, device, coalesced)
             t = torch.randn(di, dk, dtype=dtype, device=device)
             y = torch.randn(dj, dk, dtype=dtype, device=device)
             alpha = random.random()
             beta = random.random()

             res = torch.addmm(t, x, y, beta=beta, alpha=alpha)
             expected = torch.addmm(t, self.safeToDense(x), y, beta=beta, alpha=alpha)
             self.assertEqual(res, expected)

             res = torch.addmm(t, x, y)
             expected = torch.addmm(t, self.safeToDense(x), y)
             self.assertEqual(res, expected)

             res = torch.mm(x, y)
             expected = torch.mm(self.safeToDense(x), y)
             self.assertEqual(res, expected)

         test_shape(10, 100, 100, 20)
         test_shape(100, 1000, 200, 20)
         test_shape(64, 10000, 300, 20)
         test_shape(0, 100, 100, 0)
         test_shape(10, 0, 100, 0)
         test_shape(10, 100, 0, 0)
         test_shape(10, 100, 0, 20)

     @unittest.skipIf(
         IS_WINDOWS and TEST_CUDA,
         "bmm sparse-dense CUDA is not yet supported in Windows, at least up to CUDA 10.1"
     )
     @coalescedonoff
     @dtypes(torch.double)
     def test_bmm(self, device, dtype, coalesced):
         def test_shape(num_mats, dim_i, dim_j, dim_k, nnz):
             a_list = []
             b_list = []
             for mat_idx in range(num_mats):
                 a_mat = self._gen_sparse(2, nnz, [dim_i, dim_j], dtype, device, coalesced)[0]
                 b_mat = torch.randn([dim_j, dim_k], dtype=dtype, device=device)
                 a_list.append(a_mat)
                 b_list.append(b_mat)

             a = torch.stack(a_list)
             b = torch.stack(b_list)
             ab = a.bmm(b)

             # Compare each matrix against result from mm()
             for mat_idx in range(num_mats):
                 a_mat = a_list[mat_idx]
                 b_mat = b_list[mat_idx]
                 ab_mat_bmm = ab[mat_idx]
                 ab_mat_mm = a_mat.mm(b_mat)
                 self.assertEqual(ab_mat_bmm, ab_mat_mm)

         test_shape(10, 10, 100, 99, 20)
         test_shape(10, 100, 1000, 200, 20)
         test_shape(10, 64, 10000, 300, 20)
         test_shape(10, 0, 100, 99, 0)
         test_shape(10, 10, 0, 100, 0)
         test_shape(10, 10, 100, 0, 0)
         test_shape(10, 10, 100, 0, 20)
         test_shape(10, 10, 100, 0, 20)

         a = torch.rand([10, 23, 32], dtype=dtype, device=device)
         a[3] = torch.zeros(23, 32, dtype=dtype, device=device)
         a[6] = torch.zeros(23, 32, dtype=dtype, device=device)
         a = a.to_sparse()
         b = torch.rand([10, 32, 10], dtype=dtype, device=device)
         b[4] = torch.zeros(32, 10, dtype=dtype, device=device)
         b[6] = torch.zeros(32, 10, dtype=dtype, device=device)
         ab = a.bmm(b)
         for mat_idx in range(ab.size(0)):
             ab_mat = ab[mat_idx]
             ab_mat_check = a[mat_idx].mm(b[mat_idx])
             self.assertEqual(ab_mat, ab_mat_check)

         ab_traspose_check = b.transpose(1, 2).to_sparse().bmm(
             a.transpose(1, 2).to_dense()
         ).transpose(1, 2)
         self.assertEqual(ab, ab_traspose_check)

     @onlyCUDA
     @coalescedonoff
     @dtypes(torch.double)
     @unittest.skipIf(
         IS_WINDOWS,
         "bmm sparse-dense CUDA is not yet supported in Windows, at least up to CUDA 10.1"
     )
     def test_bmm_deterministic(self, device, dtype, coalesced):
         def test_shape(num_mats, dim_i, dim_j, dim_k, nnz):
             a_list = []
             b_list = []
             for mat_idx in range(num_mats):
                 a_list.append(self._gen_sparse(2, nnz, [dim_i, dim_j], dtype, device, coalesced)[0])
                 b_list.append(torch.randn([dim_j, dim_k], dtype=dtype, device=device))

             a = torch.stack(a_list).cuda()
             b = torch.stack(b_list).cuda()
             with DeterministicGuard(torch.are_deterministic_algorithms_enabled()):
                 torch.use_deterministic_algorithms(False)
                 ab_nondeterministic = torch.bmm(a, b)
                 torch.use_deterministic_algorithms(True)
                 ab_deterministic = torch.bmm(a, b)
             diff_abs = (ab_deterministic - ab_nondeterministic).abs()
             diff_rel = diff_abs / ab_deterministic.abs()
             diff_rel[torch.isnan(diff_rel)] = 0

             # deterministic and non-deterministic results should either be
             # equal or within a small relative difference
             equal_abs_or_rel = diff_abs.eq(0).logical_or(diff_rel.lt(0.001))
             self.assertTrue(equal_abs_or_rel.all())

         test_shape(10, 10, 100, 99, 20)
         test_shape(10, 100, 1000, 200, 20)
         test_shape(10, 64, 10000, 300, 20)
         test_shape(10, 0, 100, 99, 0)
         test_shape(10, 10, 0, 100, 0)
         test_shape(10, 10, 100, 0, 0)
         test_shape(10, 10, 100, 0, 20)
         test_shape(10, 10, 100, 0, 20)

     @onlyCUDA
     @unittest.skipIf(
         not IS_WINDOWS or not TEST_WITH_ROCM,
         "this test ensures bmm sparse-dense CUDA gives an error when run on Windows with CUDA < 11.0"
     )
     @dtypes(torch.double)
     def test_bmm_windows_error(self, device, dtype):
         a = torch.rand(2, 2, 2, dtype=dtype).to_sparse().cuda()
         b = torch.rand(2, 2, 2, dtype=dtype).cuda()
         with self.assertRaisesRegex(
                 RuntimeError,
                 "bmm sparse-dense CUDA is not supported on Windows with cuda before 11.0"):
             ab = a.bmm(b)

     @onlyCPU
     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_saddmm(self, device, dtype, coalesced):
         def test_shape(di, dj, dk, nnz):
             x = self._gen_sparse(2, nnz, [di, dj], dtype, device, coalesced)[0]
             t = self._gen_sparse(2, nnz, [di, dk], dtype, device, coalesced)[0]
             y = torch.randn(dj, dk, dtype=dtype, device=device)
             alpha = random.random()
             beta = random.random()

             res = torch.saddmm(t, x, y, beta=beta, alpha=alpha)
             expected = torch.addmm(self.safeToDense(t), self.safeToDense(x), y, beta=beta, alpha=alpha)
             self.assertEqual(self.safeToDense(res), expected)

             res = torch.saddmm(t, x, y)
             expected = torch.addmm(self.safeToDense(t), self.safeToDense(x), y)
             self.assertEqual(self.safeToDense(res), expected)

             res = torch.smm(x, y)
             expected = torch.mm(self.safeToDense(x), y)
             self.assertEqual(self.safeToDense(res), expected)

         test_shape(7, 5, 3, 20)
         test_shape(1000, 100, 100, 20)
         test_shape(3000, 64, 300, 20)
         test_shape(0, 100, 100, 0)
         test_shape(1000, 0, 100, 0)
         test_shape(1000, 100, 0, 0)

     @onlyCPU
     @coalescedonoff
     # adding a graph break before self.assertFalse(weight._indices().is_contiguous())
     # makes the test pass so some existent sparse related bug
     @skipIfTorchDynamo("skip")
     @dtypes(torch.double, torch.cdouble)
     def test_sspaddmm(self, device, dtype, coalesced):

         def test_shape(di, dj, dk, nnz):
             x = self._gen_sparse(2, nnz, [di, dj], dtype, device, coalesced)[0]
             t = self._gen_sparse(2, nnz, [di, dk], dtype, device, coalesced)[0]
             y = torch.randn(dj, dk, dtype=dtype, device=device)
             alpha = random.random()
             beta = random.random()

             res = t.sspaddmm(x, y, beta=beta, alpha=alpha)
             expected = torch.addmm(self.safeToDense(t), self.safeToDense(x), y, beta=beta, alpha=alpha)
             self.assertEqual(self.safeToDense(res), expected)

             res = t.sspaddmm(x, y)
             expected = torch.addmm(self.safeToDense(t), self.safeToDense(x), y)
             self.assertEqual(self.safeToDense(res), expected)

         test_shape(7, 5, 3, 20)
         test_shape(1000, 100, 100, 20)
         test_shape(3000, 64, 300, 20)
         test_shape(0, 100, 100, 0)
         test_shape(1000, 0, 100, 0)
         test_shape(1000, 100, 0, 0)

         # Test code from issue https://github.com/pytorch/pytorch/issues/45113
         batch_size, input_size, hidden_size = 5, 3, 7

         # Create coalesced sparse tensor with non-contiguous indices
         weight = torch.randn(hidden_size, input_size, dtype=dtype, device=device).to_sparse()
         self.assertTrue(weight.is_coalesced())
         non_contig_indices = weight.indices().mT.contiguous().mT
         weight = torch.sparse_coo_tensor(
             indices=non_contig_indices, values=weight.values(), size=weight.shape)
         weight._coalesced_(True)
         self.assertFalse(weight._indices().is_contiguous())
         # Create un/coalesced sparse tensor
         bias = torch.randn((hidden_size, 1), dtype=dtype, device=device).to_sparse()
         bias = torch.cat([bias] * batch_size, dim=1)

         if coalesced:
             bias = bias.coalesce()

         x = torch.randn(input_size, batch_size, dtype=dtype, device=device)
         res = bias.sspaddmm(weight, x)

         true_result = (bias.to_dense() + torch.matmul(weight.to_dense(), x)).to_sparse()
         self.assertEqual(self.safeToDense(res), self.safeToDense(true_result))

     @coalescedonoff
     @precisionOverride({torch.bfloat16: 5e-2, torch.float16: 5e-2})
     @dtypes(torch.double, torch.cdouble, torch.bfloat16, torch.float16)
     def test_sparse_addmm(self, device, dtype, coalesced):
         if (dtype is torch.bfloat16 or dtype is torch.float16) and device.startswith("cuda"):
             self.skipTest('addmm_sparse_cuda is not implemented for BFloat16 and Half')


         def test_shape(m, n, p, nnz, broadcast, alpha_beta=None):
             if alpha_beta is None:
                 alpha = random.random()
                 beta = random.random()
             else:
                 alpha, beta = alpha_beta
             if broadcast:
                 D1 = make_tensor((), dtype=dtype, device=device, requires_grad=True)
             else:
                 D1 = make_tensor([n, p], dtype=dtype, device=device, requires_grad=True)
             D2 = make_tensor([m, p], dtype=dtype, device=device, requires_grad=True)
             S = self._gen_sparse(2, nnz, [n, m], dtype, device, coalesced)[0]
             S_dense = S.to_dense().requires_grad_(True)
             S.requires_grad_(True)
             Y = torch.sparse.addmm(D1, S, D2, beta=beta, alpha=alpha)
             Y_dense = torch.addmm(D1, S_dense, D2, beta=beta, alpha=alpha)
             self.assertEqual(Y, Y_dense)

             if dtype not in {torch.double, torch.cdouble}:
                 # gradcheck will likely fail with low-precision input dtypes.
                 return

             def fn(S, D1, D2, beta=beta, alpha=alpha):
                 return torch.sparse.addmm(D1, S, D2, beta=beta, alpha=alpha)
             gradcheck(fn, (S, D1, D2), masked=True)

         test_shape(7, 8, 9, 20, False, None)
         test_shape(7, 8, 9, 20, True, None)
         test_shape(7, 8, 9, 20, False, (1, 0))
         test_shape(7, 8, 9, 20, True, (1, 0))
         test_shape(7, 8, 9, 20, False, (1, 1))
         test_shape(7, 8, 9, 20, True, (1, 1))

     @coalescedonoff
     @dtypes(torch.double)
     @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
     def test_sparse_mm(self, device, dtype, coalesced):
         def test_shape(d1, d2, d3, nnz, transposed):
             if transposed:
                 D = torch.randn(d3, d2, dtype=dtype,
                                 device=device).t_().requires_grad_(True)
             else:
                 D = torch.randn(d2, d3, dtype=dtype, device=device).requires_grad_(True)
             S = self._gen_sparse(2, nnz, [d1, d2], dtype, device, coalesced)[0]
             S_dense = S.to_dense().requires_grad_(True)
             S.requires_grad_(True)
             self.assertEqual(torch.sparse.mm(S, D), torch.mm(S_dense, D))

             def fn(S, D):
                 return torch.sparse.mm(S, D)
             gradcheck(fn, (S, D), masked=True)

         test_shape(7, 8, 9, 20, False)
         test_shape(7, 8, 9, 20, True)

     @coalescedonoff
     @dtypes(torch.double)
     @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
     @gradcheck_semantics()
     def test_sparse_mul(self, device, dtype, coalesced, gradcheck):
         # https://github.com/pytorch/pytorch/issues/79914
         a = torch.tensor([[0., 1]], dtype=dtype, device=device).to_sparse().requires_grad_(True)
         b = torch.tensor([[0., 1]], dtype=dtype, device=device).to_sparse().requires_grad_(True)
         gradcheck(lambda x, y: torch.sparse.sum(x * y).to_dense(masked_grad=gradcheck.masked), [a, b])

         def test_shape(sparse_dims, nnz, with_shape):
             a = self._gen_sparse(sparse_dims, nnz, with_shape, dtype, device, coalesced)[0].requires_grad_(True)
             b = self._gen_sparse(sparse_dims, nnz, with_shape, dtype, device, coalesced)[0].requires_grad_(True)

             self.assertEqual((a * b).to_dense(), a.to_dense() * b.to_dense(), masked=True)
             gradcheck(lambda x, y: (x * y).to_dense(), [a, b])
             # Issues with 0-dim indices/values
             gradcheck(lambda x, y: torch.sparse.sum(x * y).to_dense(), [a, b], masked=True)

         # TODO: Re-enable these
         # test_shape(2, 3, [2, 3, 4, 5])
         # test_shape(2, 3, [2, 2, 0])

     @coalescedonoff
     @dtypes(torch.double)
     def test_dsmm(self, device, dtype, coalesced):
         def test_shape(di, dj, dk, nnz):
             x = self._gen_sparse(2, nnz, [di, dj], dtype, device, coalesced)[0]
             y = self.randn(dj, dk, dtype=dtype, device=device)

             res = torch.dsmm(x, y)
             expected = torch.mm(self.safeToDense(x), y)
             self.assertEqual(res, expected)

         test_shape(7, 5, 3, 20)
         test_shape(1000, 100, 100, 20)
         test_shape(3000, 64, 300, 20)
         test_shape(0, 100, 100, 0)
         test_shape(1000, 0, 100, 0)
         test_shape(1000, 100, 0, 0)
         test_shape(1000, 100, 0, 20)

     @coalescedonoff
     @dtypes(torch.double)
     def test_hsmm(self, device, dtype, coalesced):
         def test_shape(di, dj, dk, nnz):
             x = self._gen_sparse(2, nnz, [di, dj], dtype, device, coalesced)[0]
             y = self.randn(dj, dk, dtype=dtype, device=device)

             res = torch.hsmm(x, y)
             expected = torch.mm(self.safeToDense(x), y)
             self.assertEqual(res.to_dense(), expected)

         test_shape(7, 5, 3, 20)
         test_shape(1000, 100, 100, 20)
         test_shape(3000, 64, 300, 20)
         test_shape(0, 100, 100, 0)
         test_shape(1000, 0, 100, 0)
         test_shape(1000, 100, 0, 0)
         test_shape(1000, 100, 0, 20)

     @coalescedonoff
     @dtypes(torch.double)
     def test_spadd(self, device, dtype, coalesced):

         def _test_spadd_shape(nnz, shape_i, shape_v=None):
             shape = shape_i + (shape_v or [])
             x, _, _ = self._gen_sparse(len(shape_i), nnz, shape, dtype, device, coalesced)
             y = self.randn(*shape, dtype=dtype, device=device)
             r = random.random()

             res = torch.add(y, x, alpha=r)
             expected = y + r * self.safeToDense(x)

             self.assertEqual(res, expected)

             # Non contiguous dense tensor
             s = list(shape)
             s[0] = shape[-1]
             s[-1] = shape[0]
             y = self.randn(*s, dtype=dtype, device=device)
             y.transpose_(0, len(s) - 1)
             r = random.random()

             res = torch.add(y, x, alpha=r)
             expected = y + r * self.safeToDense(x)

             self.assertEqual(res, expected)

             x, i, v = self._gen_sparse(len(shape_i), nnz, shape, dtype, device, coalesced)
             nnz = i.size(1)

             # Non contiguous sparse indices tensor
             x_ = self.sparse_tensor(i[:, ::2], v[:(nnz + 1) // 2], x.shape, dtype=dtype, device=device)
             res = torch.add(y, x_, alpha=r)
             expected = y + r * self.safeToDense(x_)
             self.assertEqual(res, expected)

             # Non contiguous sparse values tensor

             x_ = self.sparse_tensor(i[:, :(nnz + 1) // 2], v[::2], x.shape, dtype=dtype, device=device)
             res = torch.add(y, x_, alpha=r)
             expected = y + r * self.safeToDense(x_)
             self.assertEqual(res, expected)

             # Non contiguous sparse indices and values tensors
             x_ = self.sparse_tensor(i[:, 1::2], v[1::2], x.shape, dtype=dtype, device=device)
             res = torch.add(y, x_, alpha=r)
             expected = y + r * self.safeToDense(x_)
             self.assertEqual(res, expected)

         def _test_spadd():
             _test_spadd_shape(10, [5, 6])
             _test_spadd_shape(10, [10, 10, 10])
             _test_spadd_shape(10, [50, 30, 20])
             _test_spadd_shape(10, [5, 5, 5, 5, 5, 5])
             _test_spadd_shape(0, [0, 30, 20])
             _test_spadd_shape(0, [50, 0, 20])
             _test_spadd_shape(0, [50, 30, 0])

         def _test_spadd_hybrid():
             _test_spadd_shape(10, [5, 6], [2, 3])
             _test_spadd_shape(10, [10, 10, 10], [3])
             _test_spadd_shape(10, [50, 30, 20], [2])
             _test_spadd_shape(10, [5, 5, 5, 5, 5, 5], [2])
             _test_spadd_shape(0, [0, 30, 20], [2, 0])
             _test_spadd_shape(0, [50, 0, 20], [2, 0])
             _test_spadd_shape(0, [50, 30, 0], [2, 0])
             _test_spadd_shape(10, [50, 30, 20], [2, 0])

         _test_spadd()
         _test_spadd_hybrid()

     @coalescedonoff
     @dtypes(torch.float)
     def test_sparse_add_out_bfloat16(self, device, dtype, coalesced):
         # fp32
         x, _, _ = self._gen_sparse(3, 5, 10, dtype, device, coalesced)
         y, _, _ = self._gen_sparse(3, 5, 10, dtype, device, coalesced)
         res_fp32 = torch.add(x, y)

         # bfloat16
         x = x.bfloat16()
         y = y.bfloat16()
         res_bf16 = torch.add(x, y)
         res_bf16 = res_bf16.float()  # to compare with reference
         self.assertEqual(res_fp32, res_bf16, atol=1e-2, rtol=0)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_norm(self, device, dtype, coalesced):
         def test_shape(sparse_dims, nnz, with_size):
             x, _, _ = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)
             y = x.coalesce()
             self.assertEqual(x.norm(), y._values().norm())

         test_shape(3, 10, 100)
         test_shape(4, 10, [100, 100, 100, 5, 5, 5, 0])
         test_shape(4, 0, [0, 0, 100, 5, 5, 5, 0])

         # Unsupported arguments should error
         kwarg_error_pairs = [
             ({'keepdim': True},
              RuntimeError, r'norm_sparse currently does not support keepdim=True'),
             ({'dim': 0},
              RuntimeError, r'norm_sparse currently only supports full reductions'),
             ({'dtype': torch.double, 'p': 'fro'},
              ValueError, r'dtype argument is not supported in frobenius norm'),
             ({'dtype': torch.double, 'p': 0},
              RuntimeError, r"norm_sparse currently does not support 'dtype' argument")
         ]
         x = self._gen_sparse(3, 10, 100, dtype, device, coalesced)[0]
         for kwargs, err, msg in kwarg_error_pairs:
             with self.assertRaisesRegex(err, msg):
                 x.norm(**kwargs)

     @coalescedonoff
     @dtypes(torch.double)
     @unittest.skipIf(TEST_WITH_CROSSREF, "fallback triggers cuda device error")
     def test_sparse_sum(self, device, dtype, coalesced):

         def run_tests(S, td=None):
             D = S.coalesce().to_dense().detach().requires_grad_(True)
             if td is None:
                 S_sum = torch.sparse.sum(S)
                 D_sum = D.sum()
                 self.assertEqual(S_sum.item(), D_sum.item())

                 def fn(S):
                     return torch.sparse.sum(S)
                 gradcheck(fn, (S,), masked=True)
             else:
                 S_sum = torch.sparse.sum(S, td)
                 D_sum = D.sum(td)
                 self.assertEqual(S_sum.to_dense() if S_sum.is_sparse else S_sum, D_sum)

                 def fn(S):
                     res = torch.sparse.sum(S, td)
                     return res.to_dense(masked_grad=True)
                 gradcheck(fn, (S,), masked=True)

         nnz = 10
         sparse_dims = 2
         with_size = [5, 5, 1, 4]  # use a dense dim = 1 to test for squeeze
         test_dims = []
         for i in range(1, 5):
             test_dims += itertools.combinations(range(len(with_size)), i)

         # https://github.com/pytorch/pytorch/issues/16501
         x = torch.tensor([[1., 0., 0., 1.],
                           [0., 1., 0., 0.],
                           [0., 1., 1., 0.],
                           [0., 1., 0., 2.]], dtype=dtype, device=device).to_sparse()
         self.assertEqual(torch.sparse.sum(x, dim=0), torch.sparse.sum(x, dim=-2))
         self.assertEqual(torch.sum(x.to_dense(), dim=0), torch.sparse.sum(x, dim=0).to_dense())

         S = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0]

         # dim out of range
         self.assertRaises(IndexError, lambda: torch.sparse.sum(S, 5))

         # dim 0 appears multiple times in the list of dims
         self.assertRaises(RuntimeError, lambda: torch.sparse.sum(S, [0, 0]))

         # sum an empty tensor
         empty_S = torch.sparse_coo_tensor(size=with_size, dtype=dtype, device=device)
         self.assertEqual(torch.sparse.sum(empty_S, [0]).to_dense(), torch.sum(empty_S.to_dense(), [0]))
         self.assertEqual(torch.sparse.sum(empty_S), torch.tensor(0, dtype=dtype, device=device))
         empty_S.requires_grad_(True)
         empty_S_sum = torch.sparse.sum(empty_S)
         empty_S_sum.backward()
         self.assertEqual(empty_S.grad.to_dense(), empty_S.clone().detach().to_dense())

         # test values().sum()
         S = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0]
         run_tests(S.requires_grad_(True))

         for test_dim in test_dims:
             S = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0]
             run_tests(S.requires_grad_(True), test_dim)

     def _test_basic_ops_shape(self, nnz_x1, nnz_x2, shape_i, shape_v, dtype, device, coalesced):
         shape = shape_i + (shape_v)
         x1, _, _ = self._gen_sparse(len(shape_i), nnz_x1, shape, dtype, device, coalesced)
         x2, _, _ = self._gen_sparse(len(shape_i), nnz_x2, shape, dtype, device, coalesced)

         y1 = x1 + x2
         y2 = x1.clone()
         y2.add_(x2)
         expected = self.safeToDense(x1) + self.safeToDense(x2)
         self.assertEqual(self.safeToDense(y1), expected)
         self.assertEqual(self.safeToDense(y2), expected)

         y1 = x1 - x2
         y2 = x1.clone()
         y2.sub_(x2)
         expected = self.safeToDense(x1) - self.safeToDense(x2)
         self.assertEqual(self.safeToDense(y1), expected)
         self.assertEqual(self.safeToDense(y2), expected)

         y1 = x1 * x2
         y2 = x1.clone()
         y2.mul_(x2)
         expected = self.safeToDense(x1) * self.safeToDense(x2)
         self.assertEqual(self.safeToDense(y1), expected)
         self.assertEqual(self.safeToDense(y2), expected)

         y1 = x1 * 37.5
         y2 = x1.clone()
         y2.mul_(37.5)
         expected = self.safeToDense(x1) * 37.5
         self.assertEqual(self.safeToDense(y1), expected)
         self.assertEqual(self.safeToDense(y2), expected)

         y1 = x1 / 37.5
         y2 = x1.clone()
         y2.div_(37.5)
         expected = self.safeToDense(x1) / 37.5
         self.assertEqual(self.safeToDense(y1), expected)
         self.assertEqual(self.safeToDense(y2), expected)

         y1 = x1 // 37.5
         y2 = x1.clone()
         y2.floor_divide_(37.5)
         expected = self.safeToDense(x1) // 37.5
         self.assertEqual(self.safeToDense(y1), expected)
         self.assertEqual(self.safeToDense(y2), expected)

         # TODO: add back inplace support
         y1 = x1 ** 2
         y2 = x1.clone()
         y2 = y2.pow(2)
         expected = self.safeToDense(x1) ** 2
         self.assertEqual(self.safeToDense(y1), expected)
         self.assertEqual(self.safeToDense(y2), expected)

         y = x1.clone()
         y.zero_()
         expected = torch.zeros(x1.size(), dtype=dtype, device=device)
         self.assertEqual(self.safeToDense(y), expected)

         self.assertEqual(x1.is_coalesced(), coalesced)
         y = x1.coalesce()
         z = x1.coalesce()
         self.assertEqual(x1.is_coalesced(), coalesced)
         self.assertTrue(y.is_coalesced())
         y._values().add_(1)
         if not x1.is_coalesced():
             # check that coalesce is out of place if the original tensor is not
             # coalesced.
             self.assertEqual(z._values() + 1, y._values())
         else:
             # check that coalesce is in-place if the original tensor is
             # coalesced.
             self.assertEqual(z._values(), y._values())

     @coalescedonoff
     @dtypes(torch.double)
     def test_basic_ops(self, device, dtype, coalesced):

         def _test_basic_ops():
             self._test_basic_ops_shape(9, 12, [5, 6], [], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 12, [10, 10, 10], [], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 12, [50, 30, 20], [], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 12, [5, 5, 5, 5, 5, 5], [], dtype, device, coalesced)
             self._test_basic_ops_shape(0, 12, [10, 10, 10], [], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 0, [10, 10, 10], [], dtype, device, coalesced)
             self._test_basic_ops_shape(0, 0, [10, 10, 10], [], dtype, device, coalesced)
             self._test_basic_ops_shape(0, 0, [10, 10, 0], [], dtype, device, coalesced)
             self._test_basic_ops_shape(0, 0, [], [], dtype, device, coalesced)

         def _test_basic_ops_hybrid():
             self._test_basic_ops_shape(9, 12, [5, 6], [2, 3], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 12, [10, 10, 10], [3], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 12, [50, 30, 20], [2], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 12, [5, 5, 5, 5, 5, 5], [2], dtype, device, coalesced)
             self._test_basic_ops_shape(0, 12, [10, 10, 10], [2], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 0, [10, 10, 10], [2], dtype, device, coalesced)
             self._test_basic_ops_shape(0, 0, [10, 10, 10], [2], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 12, [10, 10, 10], [2, 0], dtype, device, coalesced)
             self._test_basic_ops_shape(0, 12, [10, 10, 10], [2, 0], dtype, device, coalesced)
             self._test_basic_ops_shape(9, 0, [10, 10, 10], [2, 0], dtype, device, coalesced)
             self._test_basic_ops_shape(0, 0, [10, 10, 10], [2, 0], dtype, device, coalesced)
             self._test_basic_ops_shape(0, 0, [10, 10, 0], [2, 0], dtype, device, coalesced)

         _test_basic_ops()
         _test_basic_ops_hybrid()

     @dtypes(torch.double, torch.cdouble)
     def test_add_dense_sparse_mismatch(self, device, dtype):
         def test_shape(dense_size, sparse_dims_shape, dense_dims_shape, sparse_size):
             x = torch.zeros(dense_size, dtype=dtype, device=device)
             sparse_y = self.sparse_tensor(torch.zeros(sparse_dims_shape, dtype=torch.int64, device=device),
                                           torch.randn(dense_dims_shape, dtype=dtype, device=device),
                                           torch.Size(sparse_size))
             with self.assertRaisesRegex(
                     RuntimeError,
                     "add: expected 'self' and 'other' to have same size"):
                 x + sparse_y

         test_shape([3, 4], [1, 4], [4, 4, 4], [3, 4, 4])
         test_shape([3, 4, 0], [1, 4], [4, 4, 4, 0], [3, 4, 4, 0])

     @skipIfTorchDynamo("Not a TorchDynamo suitable test")
     @dtypes(torch.double, torch.cdouble)
     def test_add_noncontiguous(self, device, dtype):
         indices = self.index_tensor([[1, 2], [0, 2]], device=device)
         values = torch.tensor([1.], dtype=dtype, device=device).expand(2, 3, 4, 5)
         x = self.sparse_tensor(indices, values, dtype=dtype, device=device)
         assert not x._values().is_contiguous()
         y = x + x
         expected = self.safeToDense(x) + self.safeToDense(x)
         self.assertEqual(self.safeToDense(y), expected)

     def _test_sparse_mask_shape(self, nnz_x1, nnz_x2, shape_i, shape_v, dtype, device, coalesced):
         shape = shape_i + (shape_v or [])
         x1, _, _ = self._gen_sparse(len(shape_i), nnz_x1, shape, dtype, device, coalesced)
         x2, _, _ = self._gen_sparse(len(shape_i), nnz_x2, shape, dtype, device, coalesced)

         y1 = x1 + x2
         y2 = x1.clone()
         y2.add_(x2)
         expected = self.safeToDense(x1) + self.safeToDense(x2)
         self.assertEqual(self.safeToDense(y1), expected)
         self.assertEqual(self.safeToDense(y2), expected)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_sparse_mask(self, device, dtype, coalesced):
         def _test_sparse_mask_fixed():
             i = self.index_tensor([
                 [1, 3, 0, 4],
                 [2, 1, 2, 3],
             ], device=device)
             v = torch.tensor([1, 2, 3, 4], dtype=dtype, device=device)
             x = self.sparse_tensor(i, v, torch.Size([5, 4]), dtype=dtype, device=device).coalesce()
             dense = torch.tensor([
                 [1, 2, 3, 4],
                 [5, 6, 7, 8],
                 [9, 10, 11, 12],
                 [13, 14, 15, 16],
                 [17, 18, 19, 20],
             ], dtype=dtype, device=device)
             exp_v = torch.tensor([7, 14, 3, 20], dtype=dtype, device=device)
             res_dense_lhs = dense.sparse_mask(x)
             sparse = dense.to_sparse()
             res_sparse_lhs = sparse.sparse_mask(x)
             expected = self.sparse_tensor(i, exp_v, torch.Size([5, 4]), dtype=dtype, device=device)
             self.assertEqual(res_dense_lhs.coalesce(), expected.coalesce())
             # check no side effects for the coalesce flag.
             self.assertTrue(sparse.is_coalesced())
             self.assertEqual(res_sparse_lhs.coalesce(), expected.coalesce())

             i = self.index_tensor([
                 [1, 3, 0, 4],
                 [2, 1, 2, 3],
             ], device=device)
             v = torch.empty([4, 0], dtype=dtype, device=device)
             x = self.sparse_tensor(i, v, torch.Size([5, 4, 0])).coalesce()
             dense = torch.empty([5, 4, 0], dtype=dtype, device=device)
             exp_v = torch.empty([4, 0], dtype=dtype, device=device)
             res_dense_lhs = dense.sparse_mask(x)
             sparse = dense.to_sparse(2)
             res_sparse_lhs = sparse.sparse_mask(x)
             expected = self.sparse_tensor(i, exp_v, torch.Size([5, 4, 0]), dtype=dtype, device=device)
             self.assertEqual(res_dense_lhs.coalesce(), expected.coalesce())
             # check no side effects for the coalesce flag.
             self.assertTrue(sparse.is_coalesced())
             self.assertEqual(res_sparse_lhs.coalesce(), expected.coalesce())

         _test_sparse_mask_fixed()

         self._test_sparse_mask_shape(9, 12, [5, 6], [], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 12, [10, 10, 10], [], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 12, [50, 30, 20], [], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 12, [5, 5, 5, 5, 5, 5], [], dtype, device, coalesced)
         self._test_sparse_mask_shape(0, 12, [10, 10, 10], [], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 0, [10, 10, 10], [], dtype, device, coalesced)
         self._test_sparse_mask_shape(0, 0, [10, 10, 10], [], dtype, device, coalesced)
         self._test_sparse_mask_shape(0, 0, [10, 10, 0], [], dtype, device, coalesced)

         # check repetitions and matchings in the intersection
         lhs = torch.randint(0, 5, (100,), device=device)
         rhs = torch.randint(0, 5, (100,), device=device).to_sparse()
         self.assertEqual(lhs.to_sparse().sparse_mask(rhs), lhs.sparse_mask(rhs))

         # check coalesce
         sparse_c = torch.rand(3, 3, device=device).to_sparse()
         sparse_unc = torch.rand(3, 3, device=device).to_sparse()._coalesced_(False)
         for lhs, rhs in [(sparse_c, sparse_unc), (sparse_unc, sparse_c)]:
             res_all_sparse = lhs.sparse_mask(rhs)
             res_dense_sparse = lhs.to_dense().sparse_mask(rhs)
             self.assertEqual(res_all_sparse.coalesce(), res_dense_sparse.coalesce())
             self.assertEqual(rhs.is_coalesced(), res_all_sparse.is_coalesced())

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_sparse_mask_hybrid(self, device, dtype, coalesced):
         def _test_sparse_mask_hybrid_fixed():
             i = self.index_tensor([
                 [1, 3, 0, 4],
                 [2, 1, 2, 3],
             ])
             v = torch.tensor([[1, 2], [2, 3], [3, 4], [4, 5]])
             # TODO: This is also testing that, if coalesce is a no-op,
             # the indices don't get permuted. I don't know if we actually
             # want to give this invariant.
             x = self.sparse_tensor(i, v, torch.Size([5, 4, 2])).coalesce()
             dense = torch.tensor([
                 [[1, 3], [2, 2], [3, 3], [4, 2]],
                 [[5, 7], [6, 7], [7, 9], [8, 9]],
                 [[9, 2], [10, 4], [11, 1], [12, 3]],
                 [[13, 5], [14, 1], [15, 1], [16, 6]],
                 [[17, 7], [18, 2], [19, 7], [20, 1]],
             ])
             res_dense_lhs = dense.sparse_mask(x)
             sparse = dense.to_sparse(2)
             res_sparse_lhs = sparse.sparse_mask(x)
             exp_v = torch.tensor([[7, 9], [14, 1], [3, 3], [20, 1]])
             expected = self.sparse_tensor(i, exp_v, torch.Size([5, 4, 2]))
             self.assertEqual(res_dense_lhs.coalesce(), expected.coalesce())
             # check no side effects for the coalesce flag
             self.assertTrue(sparse.is_coalesced())
             self.assertEqual(res_sparse_lhs.coalesce(), expected.coalesce())

             i = self.index_tensor([
                 [1, 3, 0, 4],
                 [2, 1, 2, 3],
             ])
             v = torch.empty(4, 2, 0)
             x = self.sparse_tensor(i, v, torch.Size([5, 4, 2, 0])).coalesce()
             dense = torch.empty(5, 4, 2, 0)
             res_dense_lhs = dense.sparse_mask(x)
             sparse = dense.to_sparse(2)
             res_sparse_lhs = sparse.sparse_mask(x)
             exp_v = torch.empty(4, 2, 0)
             expected = self.sparse_tensor(i, exp_v, torch.Size([5, 4, 2, 0]))
             self.assertEqual(res_dense_lhs.coalesce(), expected.coalesce())
             # check no side effects for the coalesce flag
             self.assertTrue(sparse.is_coalesced())
             self.assertEqual(res_sparse_lhs.coalesce(), expected.coalesce())

         _test_sparse_mask_hybrid_fixed()

         self._test_sparse_mask_shape(9, 12, [5, 6], [2, 3], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 12, [10, 10, 10], [3], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 12, [50, 30, 20], [2], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 12, [5, 5, 5, 5, 5, 5], [2], dtype, device, coalesced)
         self._test_sparse_mask_shape(0, 12, [10, 10, 10], [2], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 0, [10, 10, 10], [2], dtype, device, coalesced)
         self._test_sparse_mask_shape(0, 0, [10, 10, 10], [2], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 12, [10, 10, 10], [2, 0], dtype, device, coalesced)
         self._test_sparse_mask_shape(0, 12, [10, 10, 10], [2, 0], dtype, device, coalesced)
         self._test_sparse_mask_shape(9, 0, [10, 10, 10], [2, 0], dtype, device, coalesced)
         self._test_sparse_mask_shape(0, 0, [10, 10, 10], [2, 0], dtype, device, coalesced)
         self._test_sparse_mask_shape(0, 0, [10, 10, 0], [2, 0], dtype, device, coalesced)

     @dtypes(torch.double, torch.cdouble)
     @skipIfCrossRef
     def test_sparse_mask_backward(self, device, dtype):
         from itertools import product, repeat

         shape = (5, 5)
         sparse_dims = len(shape)
         nnzs = (0, 5, 15, 25)

         lhs_data = torch.arange(1, 26, device=device).reshape(shape).to(dtype).to_sparse(sparse_dims)
         rhs_data = lhs_data.clone()

         for nnz in nnzs:
             for lhs_is_coalesced, rhs_is_coalesced in product(*repeat((True, False), 2)):
                 lhs = torch.sparse_coo_tensor(
                     lhs_data._indices()[:, :nnz],
                     lhs_data._values()[:nnz],
                     lhs_data.shape
                 ).clone()._coalesced_(lhs_is_coalesced).requires_grad_(True)

                 rhs = torch.sparse_coo_tensor(
                     lhs_data._indices()[:, -nnz:],
                     lhs_data._values()[-nnz:],
                     lhs_data.shape
                 ).clone()._coalesced_(rhs_is_coalesced)

                 # To test masked semantics we need to make sure that
                 # sparsity_pattern(lhs) == sparsity_pattern(lhs.grad).
                 # lhs.sparse_mask(lhs_mask) accomplishes that.
                 lhs_mask = lhs.detach().clone()
                 gradcheck(lambda x: x.sparse_mask(lhs_mask).sparse_mask(rhs).to_dense(masked_grad=True), (lhs,), masked=True)
                 gradcheck(lambda x: x.sparse_mask(rhs).to_dense(masked_grad=False), (lhs,), masked=False)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_zeros(self, device, dtype, coalesced):
         def _test_zeros(nnzs, shape, out_shape_i, out_shape_v=None):
             out_shape = out_shape_i + (out_shape_v or [])
             for nnz in nnzs:
                 out, _, _ = self._gen_sparse(len(out_shape_i), nnz, out_shape, dtype, device, coalesced)
                 torch.zeros(*shape, out=out, dtype=dtype, device=device)
                 self.assertEqual(tuple(out.size()), tuple(shape))
                 self.assertTrue(out._indices().numel() == out._values().numel() == 0)
                 self.assertEqual(out._nnz(), 0)
                 self.assertEqual(out.sparse_dim(), len(shape))
                 self.assertEqual(out.dense_dim(), 0)

         def test_shape(i_shapes, v_shapes, shape, nnzs):
             for i_dim in range(1, len(i_shapes) + 1):
                 for v_dim in range(len(v_shapes) + 1):
                     _test_zeros(nnzs, shape, i_shapes[:i_dim], v_shapes[:v_dim])
         test_shape([2, 3, 4], [3, 4, 5, 6], [2, 3, 4], [9, 12])
         test_shape([0, 3, 4], [3, 4, 5, 6], [2, 3, 4], [0])
         test_shape([2, 3, 4], [0, 4, 5, 6], [2, 3, 4], [9, 12])
         test_shape([2, 3, 4], [3, 4, 5, 6], [2, 3, 0], [9, 12])
         test_shape([0, 3, 4], [3, 4, 5, 6], [2, 3, 0], [0])
         test_shape([2, 3, 4], [0, 4, 5, 6], [2, 3, 0], [9, 12])

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_zeros_like(self, device, dtype, coalesced):
         def _test_zeros_like(nnzs, template_shape_i, template_shape_v=None):
             template_shape_v = template_shape_v or []
             template_shape = template_shape_i + template_shape_v
             for nnz in nnzs:
                 t, _, _ = self._gen_sparse(len(template_shape_i), nnz, template_shape, dtype, device, coalesced)
                 res = torch.zeros_like(t)
                 self.assertEqual(tuple(res.size()), tuple(template_shape))
                 self.assertTrue(res._indices().numel() == res._values().numel() == 0)
                 self.assertEqual(res._nnz(), 0)
                 self.assertEqual(res.sparse_dim(), len(template_shape_i))
                 self.assertEqual(res.dense_dim(), len(template_shape_v))

         def test_shape(i_shapes, v_shapes, nnzs):
             for i_dim in range(1, len(i_shapes) + 1):
                 for v_dim in range(len(v_shapes) + 1):
                     _test_zeros_like(nnzs, i_shapes[:i_dim], v_shapes[:v_dim])
         test_shape([2, 3, 4], [3, 4, 5, 6], [9, 12])
         test_shape([0, 3, 4], [3, 4, 5, 6], [0])
         test_shape([2, 3, 4], [0, 4, 5, 6], [9, 12])
         test_shape([2, 3, 4], [3, 4, 5, 6], [9, 12])
         test_shape([0, 3, 4], [3, 4, 5, 6], [0])
         test_shape([2, 3, 4], [0, 4, 5, 6], [9, 12])

         sparse_tensor, _, _ = self._gen_sparse(len([2, 3]), 9, [2, 3] + [5, 6], dtype, device, coalesced)
         data = (sparse_tensor, sparse_tensor, sparse_tensor, sparse_tensor.unsqueeze(0))
         mem_formats = [torch.channels_last, torch.contiguous_format, torch.preserve_format, torch.channels_last_3d]
         for x, mem_format in zip(data, mem_formats):

             with self.assertRaisesRegex(RuntimeError, "memory format option is only supported by strided tensors"):
                 result = torch.zeros_like(x, memory_format=mem_format)

             result = torch.zeros_like(x, layout=torch.strided, memory_format=mem_format)
             self.assertTrue(result.layout == torch.strided)

         dense_tensor = sparse_tensor.to_dense()
         result = torch.zeros_like(dense_tensor, layout=torch.sparse_coo)
         self.assertEqual(dense_tensor.shape, result.shape)
         self.assertEqual(result.layout, torch.sparse_coo)

         sparse_zeros = torch.sparse_coo_tensor(dense_tensor.shape)
         self.assertEqual(result._indices().shape, sparse_zeros._indices().shape)
         self.assertEqual(result._values().shape, sparse_zeros._values().shape)

     def _assert_sparse_invars(self, t):
         # SparseTensor has the following invariants:
         # - sparse_dim + dense_dim = len(SparseTensor.shape)
         # - SparseTensor._indices().shape = (sparse_dim, nnz)
         # - SparseTensor._values().shape = (nnz, SparseTensor.shape[sparse_dim:])
         self.assertEqual(t.sparse_dim() + t.dense_dim(), len(t.shape))
         self.assertEqual(tuple(t._indices().shape), (t.sparse_dim(), t._nnz()))
         self.assertEqual(tuple(t._values().shape), (t._nnz(), ) + t.shape[t.sparse_dim():])

     def _test_empty_like(self, sparse_tensor, dtype, device, coalesced):

         result = torch.empty_like(sparse_tensor)
         self.assertTrue(result.is_sparse)
         self._assert_sparse_invars(result)
         self.assertEqual(result.shape, sparse_tensor.shape)
         self.assertEqual(result.dtype, sparse_tensor.dtype)
         self.assertEqual(result.device, sparse_tensor.device)
         self.assertEqual(result.sparse_dim(), sparse_tensor.sparse_dim())
         self.assertEqual(result.dense_dim(), sparse_tensor.dense_dim())

         sparse_tensor, _, _ = self._gen_sparse(len([2, 3]), 9, [2, 3] + [5, 6], dtype, device, coalesced)
         data = (sparse_tensor, sparse_tensor, sparse_tensor, sparse_tensor.unsqueeze(0))
         mem_formats = [torch.channels_last, torch.contiguous_format, torch.preserve_format, torch.channels_last_3d]
         for x, mem_format in zip(data, mem_formats):

             with self.assertRaisesRegex(RuntimeError, "memory format option is only supported by strided tensors"):
                 result = torch.empty_like(x, memory_format=mem_format)

             result = torch.empty_like(x, layout=torch.strided, memory_format=mem_format)
             self.assertTrue(result.layout == torch.strided)

         with self.assertRaisesRegex(
             RuntimeError, r"Could not run 'aten::empty_strided' with arguments from the 'Sparse(CPU|CUDA)' backend"
         ):
             dense_tensor = sparse_tensor.to_dense()
             result = torch.empty_like(dense_tensor, layout=torch.sparse_coo)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_empty_like(self, device, dtype, coalesced):
         # tests https://github.com/pytorch/pytorch/issues/43699

         if coalesced:
             input_coalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[0, 1, 2]]),
                 values=torch.tensor([3.0, -4.0, 5.0]),
                 size=[3, ],
                 dtype=dtype,
                 device=device
             ).coalesce()
             self._test_empty_like(input_coalesced, dtype, device, coalesced)

             # hybrid sparse input
             input_coalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[1, 3], [2, 4]]),
                 values=torch.tensor([[-1.0, 3.0], [-5.0, 7.0]]),
                 size=[4, 5, 2],
                 dtype=dtype,
                 device=device
             ).coalesce()
             self._test_empty_like(input_coalesced, dtype, device, coalesced)

         if not coalesced:
             # test uncoalesced input
             input_uncoalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[0], [1], [2], [0], [1], [2]]).transpose(1, 0),
                 values=torch.tensor([2.0, -3.0, -4.0, 1.0, -1.0, 1.5]),
                 size=[3, ],
                 dtype=dtype,
                 device=device
             )
             self._test_empty_like(input_uncoalesced, dtype, device, coalesced)

             # test on empty sparse tensor
             input_uncoalesced = torch.sparse_coo_tensor(
                 indices=torch.zeros([2, 0]),
                 values=torch.zeros([0, 5, 5, 5, 5, 5, 5, 0]),
                 size=[0, 0, 5, 5, 5, 5, 5, 5, 0],
                 dtype=dtype,
                 device=device
             )
             self._test_empty_like(input_uncoalesced, dtype, device, coalesced)

     def _test_narrow(self, input, narrow_args):
         expected = input.to_dense().narrow(*narrow_args)
         self.assertEqual(expected, input.narrow_copy(*narrow_args).to_dense())

     def _all_narrow_combs(self, shape):
         for dim, dim_sz in enumerate(shape):
             for start in range(dim_sz):
                 for length in range(dim_sz - start):
                     yield [dim, start, length]

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_narrow(self, device, dtype, coalesced):
         shape = [3, 3, 4, 2]
         input, _, _ = self._gen_sparse(4, 19, shape, dtype, device, coalesced)
         for narrow_args in self._all_narrow_combs(shape):
             self._test_narrow(input, narrow_args)

         self.assertRaises(RuntimeError, lambda: input.narrow_copy(-1, 0, 3))  # dim < 0
         self.assertRaises(RuntimeError, lambda: input.narrow_copy(10, 0, 3))  # dim > input.dim()
         self.assertRaises(RuntimeError, lambda: input.narrow_copy(0, shape[0] + 1, 3))  # start > size of dim
         self.assertRaises(RuntimeError, lambda: input.narrow_copy(0, 2, shape[0]))  # start+length > size of dim

         with_dense, _, _ = self._gen_sparse(2, 7, shape, dtype, device, coalesced)
         for narrow_args in self._all_narrow_combs(shape):
             self._test_narrow(with_dense, narrow_args)

         self.assertRaises(RuntimeError, lambda: with_dense.narrow_copy(10, 0, 3))  # dim > sparseDim + denseDim

     def _test_log1p_tensor(self, sparse_tensor, coalesced):
         def is_integral(dtype):
             return dtype in integral_types()

         dense_tensor = sparse_tensor.to_dense()
         expected_output = dense_tensor.log1p()
         is_integral_dtype = is_integral(sparse_tensor.dtype)
         self.assertEqual(expected_output, sparse_tensor.log1p().to_dense())
         if is_integral_dtype:
             with self.assertRaisesRegex(RuntimeError, "result type .* can't be cast to"):
                 sparse_tensor.coalesce().log1p_()
         else:
             self.assertEqual(expected_output, sparse_tensor.coalesce().log1p_().to_dense())

         if not coalesced:
             # test in-place op on uncoalesced input
             with self.assertRaisesRegex(RuntimeError, "log1p_ requires coalesced input"):
                 sparse_tensor.log1p_()

         if is_integral_dtype:
             with self.assertRaisesRegex(RuntimeError, "only Tensors of floating point dtype can require gradients"):
                 sparse_tensor.requires_grad_()

     @coalescedonoff
     @dtypes(*all_types())
     def test_log1p(self, device, dtype, coalesced):
         if coalesced:
             input_coalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[0], [1], [2]]).transpose(1, 0),
                 values=torch.tensor([3.0, 4.0, 5.0]),
                 size=[3, ],
                 device=device,
                 dtype=dtype
             ).coalesce()
             self._test_log1p_tensor(input_coalesced, coalesced)

             # hybrid sparse input
             input_coalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[1, 3], [2, 4]]),
                 values=torch.tensor([[1.0, 3.0], [5.0, 7.0]]),
                 size=[4, 5, 2],
                 device=device,
                 dtype=dtype
             ).coalesce()
             self._test_log1p_tensor(input_coalesced, coalesced)

         if not coalesced:
             # test uncoalesced input
             input_uncoalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[0], [1], [2], [0], [1], [2]]).transpose(1, 0),
                 values=torch.tensor([2.0, 3.0, 4.0, 1.0, 1.0, 1.0]),
                 size=[3, ],
                 device=device,
                 dtype=dtype
             )
             self._test_log1p_tensor(input_uncoalesced, coalesced)

             # test on empty sparse tensor
             input_uncoalesced = torch.sparse_coo_tensor(
                 indices=torch.zeros([2, 0]),
                 values=torch.zeros([0, 5, 5, 5, 5, 5, 5, 0]),
                 size=[0, 0, 5, 5, 5, 5, 5, 5, 0],
                 device=device,
                 dtype=dtype
             )
             # empty tensors are coalesced at creation (nnz < 2) we must force the uncoalesced state
             input_uncoalesced._coalesced_(False)
             self._test_log1p_tensor(input_uncoalesced, coalesced)

     def _test_neg_negative(self, sparse_tensor):
         dense_tensor = sparse_tensor.to_dense()
         expected_output = dense_tensor.neg()

         ops = (
             torch.neg, torch.Tensor.neg, torch.Tensor.neg_,
             torch.negative, torch.Tensor.negative, torch.Tensor.negative_,
             operator.neg
         )
         for op in ops:
             sparse_tensor_copy = sparse_tensor.clone()
             self.assertEqual(expected_output, op(sparse_tensor_copy).to_dense())

             if op in (torch.neg, torch.negative):
                 sparse_tensor_out = torch.zeros_like(sparse_tensor)
                 op(sparse_tensor, out=sparse_tensor_out)
                 self.assertEqual(expected_output, sparse_tensor_out.to_dense())

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_neg_negative(self, device, dtype, coalesced):

         if coalesced:
             input_coalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[0, 1, 2]]),
                 values=torch.tensor([3.0, -4.0, 5.0]),
                 size=[3, ],
                 dtype=dtype,
                 device=device
             ).coalesce()
             self._test_neg_negative(input_coalesced)

             # hybrid sparse input
             input_coalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[1, 3], [2, 4]]),
                 values=torch.tensor([[-1.0, 3.0], [-5.0, 7.0]]),
                 size=[4, 5, 2],
                 dtype=dtype,
                 device=device
             ).coalesce()
             self._test_neg_negative(input_coalesced)

         if not coalesced:
             # test uncoalesced input
             input_uncoalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[0], [1], [2], [0], [1], [2]]).transpose(1, 0),
                 values=torch.tensor([2.0, -3.0, -4.0, 1.0, -1.0, 1.5]),
                 size=[3, ],
                 dtype=dtype,
                 device=device
             )
             self._test_neg_negative(input_uncoalesced)

             # test on empty sparse tensor
             input_uncoalesced = torch.sparse_coo_tensor(
                 indices=torch.zeros([2, 0]),
                 values=torch.zeros([0, 5, 5, 5, 5, 5, 5, 0]),
                 size=[0, 0, 5, 5, 5, 5, 5, 5, 0],
                 dtype=dtype,
                 device=device
             )
             self._test_neg_negative(input_uncoalesced)

     def _test_asin_arcsin(self, sparse_tensor, coalesced):
         def is_integral(dtype):
             return dtype in integral_types()
         is_integral_dtype = is_integral(sparse_tensor.dtype)

         dense_tensor = sparse_tensor.to_dense()
         expected_output = dense_tensor.asin()

         ops = (
             torch.asin, torch.Tensor.asin,
             torch.arcsin, torch.Tensor.arcsin,
         )
         for op in ops:
             self.assertEqual(expected_output, op(sparse_tensor).to_dense())
             if op in (torch.asin, torch.arcsin):
                 sparse_tensor_out = torch.zeros_like(sparse_tensor)
                 if not is_integral_dtype:
                     op(sparse_tensor, out=sparse_tensor_out)
                     self.assertEqual(expected_output, sparse_tensor_out.to_dense())
                 else:
                     with self.assertRaisesRegex(RuntimeError, "result type .* can't be cast to"):
                         op(sparse_tensor, out=sparse_tensor_out)

         for op in (torch.Tensor.asin_, torch.Tensor.arcsin_):
             if is_integral_dtype:
                 # test coalesce on integral dtype tensor
                 with self.assertRaisesRegex(RuntimeError, "result type .* can't be cast to"):
                     op(sparse_tensor.clone().coalesce()).to_dense()
             else:
                 self.assertEqual(expected_output, op(sparse_tensor.clone().coalesce()).to_dense())

             if not coalesced:
                 # test in-place op on uncoalesced input
                 with self.assertRaisesRegex(RuntimeError, "asin_ requires coalesced input"):
                     op(sparse_tensor)

     @coalescedonoff
     @dtypes(*all_types())
     def test_asin_arcsin(self, device, dtype, coalesced):
         if coalesced:
             input_coalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[0, 1, 2, 3]]),
                 values=torch.tensor([0.5, -0.5, 0.7, -0.7]),
                 size=[4, ],
                 dtype=dtype,
                 device=device
             ).coalesce()
             self._test_asin_arcsin(input_coalesced, coalesced)

             # hybrid sparse input
             input_coalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[1, 3], [2, 4]]),
                 values=torch.tensor([[-0.1, 0.24], [-0.44, 0.1]]),
                 size=[4, 5, 2],
                 dtype=dtype,
                 device=device
             ).coalesce()
             self._test_asin_arcsin(input_coalesced, coalesced)

         if not coalesced:
             # test uncoalesced input
             input_uncoalesced = torch.sparse_coo_tensor(
                 indices=torch.tensor([[0], [1], [2], [0], [1], [2]]).transpose(1, 0),
                 values=torch.tensor([0.3, -0.3, -0.4, 0.3, -0.5, 0.15]),
                 size=[3, ],
                 dtype=dtype,
                 device=device
             )
             self._test_asin_arcsin(input_uncoalesced, coalesced)

             # test on empty sparse tensor
             input_uncoalesced = torch.sparse_coo_tensor(
                 indices=torch.zeros([2, 0]),
                 values=torch.zeros([0, 5, 5, 5, 5, 5, 5, 0]),
                 size=[0, 0, 5, 5, 5, 5, 5, 5, 0],
                 dtype=dtype,
                 device=device
             )
             # empty tensors are coalesced at creation (nnz < 2) we must force the uncoalesced state
             input_uncoalesced._coalesced_(False)
             self._test_asin_arcsin(input_uncoalesced, coalesced)

     @coalescedonoff
     @dtypes(torch.double)
     def test_mv(self, device, dtype, coalesced):
         def test_shape(di, dj, dk, nnz):
             x, _, _ = self._gen_sparse(2, nnz, [di, dj], dtype, device, coalesced)
             t = torch.randn(dk, dtype=dtype, device=device)

             res = x.matmul(t)
             expected = self.safeToDense(x).matmul(t)
             self.assertEqual(res, expected)

         test_shape(10, 100, 100, 20)
         test_shape(100, 1000, 1000, 20)
         test_shape(64, 10000, 10000, 20)
         test_shape(0, 100, 100, 0)
         test_shape(10, 0, 0, 0)
         test_shape(10, 100, 100, 0)
         test_shape(10, 100, 100, 20)

         with self.assertRaisesRegex(RuntimeError, r"mv: expected self\.size\(-1\) == vec\.size\(-1\)"):
             test_shape(10, 100, 10, 20)

         with self.assertRaisesRegex(RuntimeError, "mv: two tensor dim should be 2 and 1"):
             x, _, _ = self._gen_sparse(2, 20, [10, 100], dtype, device, coalesced)
             y, _, _ = self._gen_sparse(2, 20, [10, 100], dtype, device, coalesced)
             res = x.mv(y)

     @dtypes(*floating_and_complex_types())
     def test_sparse_add_coalesce(self, device, dtype):
         i = self.index_tensor([[1, 2, 1]], device=device)
         v = torch.tensor([3, 4, 5], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3]))
         y = self.sparse_tensor(i, v, torch.Size([3]))
         z = x + y

         self.assertFalse(z._indices().numel() != 2 and z.is_coalesced())

         i = self.index_tensor([[1, 2, 1]], device=device)
         v = torch.empty([3, 0], dtype=dtype, device=device)
         x = self.sparse_tensor(i, v, torch.Size([3, 0]))
         y = self.sparse_tensor(i, v, torch.Size([3, 0]))
         z = x + y

         self.assertFalse(z._indices().numel() != 2 and z.is_coalesced())

     @onlyCUDA
     def test_storage_not_null(self, device):
         x = torch.sparse_coo_tensor((2,), dtype=torch.float32, device=device)
         self.assertNotEqual(x.get_device(), -1)

         x = torch.sparse_coo_tensor((2, 0), dtype=torch.float32, device=device)
         self.assertNotEqual(x.get_device(), -1)

     @onlyCUDA
     @deviceCountAtLeast(2)
     def test_same_gpu(self, devices):
         def check_device(x, device_id):
             self.assertEqual(x.get_device(), device_id)
             self.assertEqual(x._values().get_device(), device_id)
             self.assertEqual(x._indices().get_device(), device_id)

         dev1, dev2 = devices[0], devices[1]

         i = self.index_tensor([[2]], device=dev2)
         v = torch.tensor([5], device=dev2)
         x = self.sparse_tensor(i, v, torch.Size([3]), device=1)
         check_device(x, 1)

         i = self.index_tensor([[2]], device=dev2)
         v = torch.empty(1, 0, device=dev2)
         x = self.sparse_tensor(i, v, torch.Size([3, 0]), device=1)
         check_device(x, 1)

         x = self.sparse_empty(3, device=1)
         check_device(x, 1)

         x = self.sparse_empty(3, 0, device=1)
         check_device(x, 1)

     def _test_new_device(self, size, device=torch.cuda):
         with torch.cuda.device(device):
             x = torch.sparse_coo_tensor(size, device='cuda', dtype=torch.float64)
         self.assertEqual(x.get_device(), device)
         x1 = x.new()
         x2 = x.new(2, 3)
         self.assertEqual(x1.get_device(), device)
         self.assertEqual(x2.get_device(), device)

     @onlyCUDA
     def test_new_device_single_gpu(self):
         self._test_new_device((), 0)
         self._test_new_device((30, 20), 0)
         self._test_new_device((30, 20, 10), 0)
         self._test_new_device((30, 20, 10, 0), 0)

     @onlyCUDA
     @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
     def test_new_device_multi_gpu(self):
         self._test_new_device((), 1)
         self._test_new_device((30, 20), 1)
         self._test_new_device((30, 20, 10), 1)
         self._test_new_device((30, 20, 10, 0), 1)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_new(self, device, dtype, coalesced):
         def test_shape(sparse_dims, nnz, with_size):
             x, indices, values = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)
             if not x.is_cuda:
                 # CUDA sparse tensors currently requires the size to be
                 # specified if nDimV > 0
                 out = x.new(indices, values).coalesce()
                 x_c = x.coalesce()
                 self.assertEqual((out.indices(), out.values()), (x_c.indices(), x_c.values()))
             self.assertEqual(x.new(indices, values, x.size()), x)

         test_shape(3, 10, 100)
         test_shape(3, 0, [100, 100, 0])

     @onlyCPU  # not really, but we only really want to run this once
     @dtypes(torch.float64, torch.float32, torch.float16, torch.cfloat, torch.cdouble)
     def test_factory(self, device, dtype):
         for test_empty_tensor in [True, False]:
             if test_empty_tensor:
                 default_size = torch.Size([1, 3, 0])
                 size = torch.Size([3, 3, 0])
             else:
                 default_size = torch.Size([1, 3])
                 size = torch.Size([3, 3])
             for include_size in [True, False]:
                 for use_tensor_idx in [True, False]:
                     for use_tensor_val in [True, False]:
                         for use_cuda in ([False] if not torch.cuda.is_available() else [True, False]):
                             # have to include size with cuda sparse tensors
                             include_size = include_size or use_cuda
                             long_dtype = torch.int64
                             device = torch.device('cpu') if not use_cuda else \
                                 torch.device(torch.cuda.device_count() - 1)
                             indices = torch.tensor(([0], [2]), dtype=long_dtype) if use_tensor_idx else ([0], [2])
                             if test_empty_tensor:
                                 values = torch.empty(1, 0).to(dtype)
                             else:
                                 if use_tensor_val:
                                     values = torch.tensor([1.], dtype=dtype)
                                 else:
                                     values = 1.
                             if include_size:
                                 sparse_tensor = torch.sparse_coo_tensor(indices, values, size, dtype=dtype,
                                                                         device=device, requires_grad=True)
                             else:
                                 sparse_tensor = torch.sparse_coo_tensor(indices, values, dtype=dtype,
                                                                         device=device, requires_grad=True)
                             self.assertEqual(indices, sparse_tensor._indices())
                             self.assertEqual(values, sparse_tensor._values())
                             self.assertEqual(size if include_size else default_size, sparse_tensor.size())
                             self.assertEqual(dtype, sparse_tensor.dtype)
                             if use_cuda:
                                 self.assertEqual(device, sparse_tensor._values().device)
                             self.assertEqual(True, sparse_tensor.requires_grad)

     @dtypes(torch.double, torch.cdouble)
     def test_factory_size_check(self, device, dtype):
         indices = self.index_tensor([[1, 2],
                                     [0, 2]], device=device)
         values = torch.tensor([.5, .5], dtype=dtype, device=device)
         sizes = torch.Size([2, 3])
         with self.assertRaisesRegex(RuntimeError, "size is inconsistent with indices"):
             torch.sparse_coo_tensor(indices, values, sizes, dtype=dtype, device=device)

         indices.fill_(-1)
         with self.assertRaisesRegex(RuntimeError, "found negative index"):
             torch.sparse_coo_tensor(indices, values, sizes, dtype=dtype, device=device)

         indices = self.index_tensor([[1, 2],
                                     [0, 2]], device=device)
         values = torch.empty([2, 1, 0], dtype=dtype, device=device)
         sizes = torch.Size([2, 3, 1, 0])
         with self.assertRaisesRegex(RuntimeError, "size is inconsistent with indices"):
             torch.sparse_coo_tensor(indices, values, sizes, dtype=dtype, device=device)

         indices = self.index_tensor([[1, 2],
                                     [0, 2]], device=device)
         values = torch.empty([2, 2, 2], dtype=dtype, device=device)
         sizes = torch.Size([0, 0, 2, 2])
         with self.assertRaisesRegex(RuntimeError, "size is inconsistent with indices"):
             torch.sparse_coo_tensor(indices, values, sizes, dtype=dtype, device=device)

         indices = self.index_tensor([[1, 2],
                                     [0, 2]], device=device)
         values = torch.tensor([[1, 1, 1], [1, 1, 1]], dtype=dtype, device=device)
         sizes = torch.Size([3, 3, 2])
         with self.assertRaisesRegex(RuntimeError, "values has incorrect size"):
             torch.sparse_coo_tensor(indices, values, sizes, dtype=dtype, device=device)

         indices = self.index_tensor([[1, 2],
                                     [0, 2]], device=device)
         values = torch.empty([2, 1, 0], dtype=dtype, device=device)
         sizes = torch.Size([3, 3, 2, 0])
         with self.assertRaisesRegex(RuntimeError, "values has incorrect size"):
             torch.sparse_coo_tensor(indices, values, sizes, dtype=dtype, device=device)

     def test_factory_empty_indices(self, device):
         tensor = torch.sparse_coo_tensor(torch.Size([2, 0]), device=device)
         expected_indices = torch.empty((2, 0), dtype=torch.long, device=device)
         self.assertEqual(tensor._indices(), expected_indices)

         tensor = torch.sparse_coo_tensor(torch.Size([2, 2, 0]), device=device)
         expected_indices = torch.empty((3, 0), dtype=torch.long, device=device)
         self.assertEqual(tensor._indices(), expected_indices)

         tensor = torch.sparse_coo_tensor(torch.Size([2, 2, 0, 0]), device=device)
         expected_indices = torch.empty((4, 0), dtype=torch.long, device=device)
         self.assertEqual(tensor._indices(), expected_indices)

     @dtypes(torch.double, torch.cdouble)
     def test_factory_nnz(self, device, dtype):
         indices = self.index_tensor([[0]], device=device)  # (sparse_dim, nnz): (1, 1)
         values = torch.tensor([[1, 1], [1, 1]], dtype=dtype, device=device)  # (nnz, ...): (2, 2)
         sizes = torch.Size([2, 2])
         with self.assertRaisesRegex(RuntimeError, "indices and values must have same nnz"):
             torch.sparse_coo_tensor(indices, values, sizes, dtype=dtype, device=device)

         indices = self.index_tensor([[0]], device=device)  # (sparse_dim, nnz): (1, 1)
         values = torch.empty([2, 0], dtype=dtype, device=device)  # (nnz, ...): (2, 0)
         sizes = torch.Size([2, 0])
         with self.assertRaisesRegex(RuntimeError, "indices and values must have same nnz"):
             torch.sparse_coo_tensor(indices, values, sizes, dtype=dtype, device=device)

     @dtypes(torch.double, torch.cdouble)
     def test_factory_nnz_zero(self, device, dtype):
         def test_shape(i_shape, v_shape, size, expected_size):
             if size:
                 t = torch.sparse_coo_tensor(torch.empty(i_shape), torch.empty(v_shape), torch.Size(size),
                                             dtype=dtype, device=device)
             else:
                 t = torch.sparse_coo_tensor(torch.empty(i_shape), torch.empty(v_shape), dtype=dtype, device=device)
             expected_indices = torch.empty(i_shape, device=device, dtype=torch.int64)
             expected_values = torch.empty(v_shape, device=device, dtype=dtype)
             expected_size = torch.Size(expected_size)
             self.assertEqual(t._indices(), expected_indices)
             self.assertEqual(t._values(), expected_values)
             self.assertEqual(t.size(), expected_size)

         test_shape([1, 0], [0, 2, 4, 0], None, [0, 2, 4, 0])
         test_shape([3, 0], [0, 2, 4, 0], None, [0, 0, 0, 2, 4, 0])
         test_shape([1, 0], [0, 2, 4, 0], [0, 2, 4, 0], [0, 2, 4, 0])
         test_shape([3, 0], [0, 2, 4, 0], [0, 0, 0, 2, 4, 0], [0, 0, 0, 2, 4, 0])
         test_shape([3, 0], [0, 2, 4, 0], [1, 2, 3, 2, 4, 0], [1, 2, 3, 2, 4, 0])

     @dtypes(torch.double, torch.cdouble)
     def test_factory_dense_dim(self, device, dtype):
         indices = self.index_tensor([[0]], device=device)
         values = torch.tensor([[[1, 1, 1], [1, 1, 1]]], dtype=dtype, device=device)
         sizes = torch.Size([1, 3, 4])
         with self.assertRaisesRegex(RuntimeError, "values has incorrect size"):
             torch.sparse_coo_tensor(indices, values, sizes)

         indices = self.index_tensor([[0]], device=device)
         values = torch.empty([1, 2, 3, 0], dtype=dtype, device=device)
         sizes = torch.Size([1, 3, 4, 0])
         with self.assertRaisesRegex(RuntimeError, "values has incorrect size"):
             torch.sparse_coo_tensor(indices, values, sizes)

     @onlyCPU
     @dtypes(torch.float16, torch.float32, torch.float64, torch.cfloat, torch.cdouble, torch.int64)
     def test_factory_type_inference(self, device, dtype):
         t = torch.sparse_coo_tensor(torch.tensor(([0], [2])), torch.tensor([1.], dtype=dtype))
         self.assertEqual(dtype, t.dtype)
         t = torch.sparse_coo_tensor(torch.tensor(([0], [2])), torch.tensor([1]))
         self.assertEqual(torch.int64, t.dtype)

         t = torch.sparse_coo_tensor(torch.tensor(([0], [2])), torch.HalfTensor(1, 0))
         self.assertEqual(torch.float16, t.dtype)
         t = torch.sparse_coo_tensor(torch.tensor(([0], [2])), torch.FloatTensor(1, 0))
         self.assertEqual(torch.float32, t.dtype)
         t = torch.sparse_coo_tensor(torch.tensor(([0], [2])), torch.DoubleTensor(1, 0))
         self.assertEqual(torch.float64, t.dtype)
         t = torch.sparse_coo_tensor(torch.tensor(([0], [2])), torch.LongTensor(1, 0))
         self.assertEqual(torch.int64, t.dtype)

     @onlyCUDA
     def test_factory_device_type_inference(self, device):
         # both indices/values are CUDA

         cpu_cuda = ('cpu', 'cuda')
         cpu_cuda_none = cpu_cuda + (None,)
         for indices_device, values_device, device in itertools.product(cpu_cuda,
                                                                        cpu_cuda,
                                                                        cpu_cuda_none):
             indices = torch.tensor(([0], [2]), device=indices_device)
             values = torch.tensor([1.], device=values_device)
             empty_values = torch.empty(1, 0).to(values_device)
             shape = (1, 3)
             empty_shape = (1, 3, 0)
             if device is None and indices_device != values_device:
                 with self.assertRaises(RuntimeError):
                     torch.sparse_coo_tensor(indices, values, shape, device=device)
                 with self.assertRaises(RuntimeError):
                     torch.sparse_coo_tensor(indices, empty_values, empty_shape, device=device)
             else:
                 t = torch.sparse_coo_tensor(indices, values, shape, device=device)
                 t_empty = torch.sparse_coo_tensor(indices, empty_values, empty_shape, device=device)
                 should_be_cuda = (device == 'cuda' or (device is None and values_device == 'cuda'))
                 self.assertEqual(should_be_cuda, t.is_cuda)
                 self.assertEqual(t.is_cuda, t_empty.is_cuda)

     @onlyCPU
     def test_factory_copy(self, device):
         def test_tensor(indices, values, indices_equal, values_equal):
             sparse_tensor = torch.sparse_coo_tensor(indices, values, dtype=torch.float64, device=device)
             if indices_equal:
                 self.assertEqual(indices.data_ptr(), sparse_tensor._indices().data_ptr())
             else:
                 self.assertNotEqual(indices.data_ptr(), sparse_tensor._indices().data_ptr())
             if values_equal:
                 self.assertEqual(values.data_ptr(), sparse_tensor._values().data_ptr())
             else:
                 self.assertNotEqual(values.data_ptr(), sparse_tensor._values().data_ptr())

         # both correct
         indices = torch.tensor(([0], [2]), dtype=torch.int64)
         values = torch.tensor([1.], dtype=torch.float64)
         test_tensor(indices, values, True, True)

         indices = torch.tensor(([0], [2]), dtype=torch.int64)
         values = torch.DoubleTensor(1, 0)
         test_tensor(indices, values, True, True)

         # only indices correct
         indices = torch.tensor(([0], [2]), dtype=torch.int64)
         values = torch.tensor([1.], dtype=torch.float32)
         test_tensor(indices, values, True, False)

         indices = torch.tensor(([0], [2]), dtype=torch.int64)
         values = torch.tensor([1.], dtype=torch.float16)
         test_tensor(indices, values, True, False)

         indices = torch.tensor(([0], [2]), dtype=torch.int64)
         values = torch.FloatTensor(1, 0)
         test_tensor(indices, values, True, True)  # An empty tensor's data_ptr is always equal to 0

         # only values correct
         indices = torch.tensor(([0], [2]), dtype=torch.int32)
         values = torch.tensor([1.], dtype=torch.float64)
         test_tensor(indices, values, False, True)

         indices = torch.tensor(([0], [2]), dtype=torch.int32)
         values = torch.DoubleTensor(1, 0)
         test_tensor(indices, values, False, True)

         # neither correct
         indices = torch.tensor(([0], [2]), dtype=torch.int32)
         values = torch.tensor([1.], dtype=torch.float32)
         test_tensor(indices, values, False, False)

         indices = torch.tensor(([0], [2]), dtype=torch.int32)
         values = torch.FloatTensor(1, 0)
         test_tensor(indices, values, False, True)  # An empty tensor's data_ptr is always equal to 0

         # complex support
         indices = torch.tensor(([0], [2]), dtype=torch.int64)
         values = make_tensor([1, ], dtype=torch.cdouble, device=device)
         test_tensor(indices, values, True, False)

         indices = torch.tensor(([0], [2]), dtype=torch.int32)
         values = make_tensor([1, 1], dtype=torch.cdouble, device=device)
         test_tensor(indices, values, False, False)

     @onlyCPU  # just run once, we test both cpu and cuda
     def test_legacy_new_device(self, device):
         i = torch.tensor([[0, 1, 1], [2, 0, 2]])
         v = torch.tensor([3., 4., 5.])
         size = torch.Size([2, 3])

         x = torch.sparse_coo_tensor(i, v, size, device='cpu')
         self.assertRaises(RuntimeError, lambda: x.new(device='cuda'))
         self.assertRaises(RuntimeError, lambda: x.new(i, v, device='cuda'))
         self.assertRaises(RuntimeError, lambda: x.new(i, v, size, device='cuda'))
         self.assertRaises(RuntimeError, lambda: x.new(torch.Size([2, 3, 4]), device='cuda'))

         if torch.cuda.is_available():
             x = torch.sparse_coo_tensor(i, v, size, device='cuda')
             self.assertRaises(RuntimeError, lambda: x.new(device='cpu'))
             self.assertRaises(RuntimeError, lambda: x.new(i, v, device='cpu'))
             self.assertRaises(RuntimeError, lambda: x.new(i, v, size, device='cpu'))
             self.assertRaises(RuntimeError, lambda: x.new(torch.Size([2, 3, 4]), device='cpu'))

     def test_legacy_new(self, device):
         i = torch.tensor([[0, 1, 1], [2, 0, 2]])
         v = torch.tensor([3., 4., 5.])
         size = torch.Size([2, 3])
         s = torch.sparse_coo_tensor(i, v, size)

         self.assertEqual(torch.sparse_coo, s.new(device='cpu').layout)
         self.assertRaises(TypeError, lambda: s.new(v.untyped_storage()))
         self.assertRaises(TypeError, lambda: s.new(v))
         self.assertEqual(torch.sparse_coo, s.new(torch.Size([2, 3])).layout)
         self.assertRaises(TypeError, lambda: s.new([6]))

     @onlyCPU  # not really, but we only really want to run this once
     def test_dtypes(self, device):
         all_sparse_dtypes = all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)
         do_test_dtypes(self, all_sparse_dtypes, torch.sparse_coo, torch.device('cpu'))
         if torch.cuda.is_available():
             do_test_dtypes(self, all_sparse_dtypes, torch.sparse_coo, torch.device('cuda:0'))

     def _test_empty_full(self, device, dtype, requires_grad):
         shape = (2, 3)
         layout = torch.sparse_coo

         def check_value(tensor, value=None, dtype=dtype, requires_grad=requires_grad):
             self.assertEqual(shape, tensor.shape)
             self.assertIs(dtype, tensor.dtype)
             self.assertIs(layout, tensor.layout)
             self.assertEqual(tensor.requires_grad, requires_grad)
             if tensor.is_cuda and device is not None:
                 self.assertEqual(device, tensor.device)
             if value is not None:
                 fill = tensor.empty(shape, dtype=dtype).fill_(value)
                 self.assertEqual(tensor, fill)

         v = torch.sparse_coo_tensor(shape, dtype=dtype, device=device, requires_grad=requires_grad)
         check_value(v)

         out = v.new()
         check_value(torch.zeros(shape, out=out, device=device, requires_grad=requires_grad))

         int64_dtype = torch.int64
         check_value(v.new_empty(shape), requires_grad=False)
         check_value(v.new_empty(shape, dtype=int64_dtype, device=device, requires_grad=False),
                     dtype=int64_dtype, requires_grad=False)
         check_value(torch.empty_like(v), requires_grad=False)
         check_value(torch.empty_like(v, dtype=int64_dtype, layout=layout, device=device, requires_grad=False),
                     dtype=int64_dtype, requires_grad=False)

     @onlyCPU  # not really, but we only really want to run this once
     @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
     @parametrize('requires_grad', (True, False))
     def test_empty_full(self, device, dtype, requires_grad):
         if requires_grad and not (dtype.is_floating_point or dtype.is_complex):
             self.skipTest(f'requires_grad==True requires float or complex dtype, got {dtype}')

         self._test_empty_full(device, dtype, requires_grad)
         if torch.cuda.is_available():
             self._test_empty_full(None, dtype, requires_grad)
             self._test_empty_full(torch.device('cuda:0'), dtype, requires_grad)

     def test_is_sparse(self, device):
         x = torch.randn(3, 3)
         self.assertFalse(x.is_sparse)

         x = torch.randn(3, 3, 0)
         self.assertFalse(x.is_sparse)

         x = self.sparse_empty(1, 0, device=device)
         self.assertTrue(x.is_sparse)

     def test_resize_as(self, device):
         def do_test(t):
             y = t.new().resize_as_(t).zero_()
             self.assertEqual(y.shape, t.shape)
             # Check that y can be added to t. Currently, this requires that
             # sparse_dim and dense_dim match.
             self.assertEqual(t, t + y)

         do_test(self.sparse_empty([3, 0], device=device))
         do_test(self.sparse_empty([3, 3], device=device))

     def _test_resize_shape(self, x_i, x_v, x_size, y_i, y_v, y_size, dtype, device):
         x_v_numel = torch.zeros(x_v).numel()
         x = torch.sparse_coo_tensor(torch.zeros(x_i),
                                     torch.arange(x_v_numel).resize_(x_v).to(torch.float),
                                     torch.Size(x_size), dtype=dtype, device=device)
         x_dense = x.to_dense()
         y = torch.sparse_coo_tensor(torch.zeros(y_i),
                                     torch.ones(y_v).to(torch.float),
                                     torch.Size(y_size), dtype=dtype, device=device)
         y_dense = y.to_dense()
         x.resize_as_(y)
         x_dense.resize_as_(y_dense)
         self.assertEqual(x.shape, y.shape)
         self.assertEqual(x.sparse_dim(), y.sparse_dim())
         self.assertEqual(x.dense_dim(), y.dense_dim())
         self.assertEqual(x.shape, x_dense.shape)
         self.assertEqual(y.shape, y_dense.shape)
         # Here we make sure that the original data are preserved after resizing
         self.assertEqual(x.to_dense().view(-1)[0:x_v_numel].view(x_v),
                          x_dense.view(-1)[0:x_v_numel].view(x_v))

     @dtypes(torch.double, torch.cdouble)
     def test_resize(self, device, dtype):
         # 1. Expand the size of some dense dimensions [Supported]
         self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
                                 [1, 1], [1, 2, 4], [2, 2, 4],
                                 dtype=dtype, device=device)

         self._test_resize_shape([1, 1], [1, 2, 0], [2, 2, 0],
                                 [1, 1], [1, 2, 4], [2, 2, 4],
                                 dtype=dtype, device=device)

         # 2. Expand the size of some sparse dimensions [Supported]
         self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
                                 [1, 1], [1, 2, 3], [4, 2, 3],
                                 dtype=dtype, device=device)

         # 3. Change the shapes of both sparse and dense dimensions when nnz is zero [Supported]
         self._test_resize_shape([1, 0], [0, 2, 3], [2, 2, 3],
                                 [2, 0], [0, 2, 4, 5], [1, 1, 2, 4, 5],
                                 dtype=dtype, device=device)

         self._test_resize_shape([1, 0], [0, 2, 3], [2, 2, 3],
                                 [2, 0], [0, 2, 4, 0], [1, 1, 2, 4, 0],
                                 dtype=dtype, device=device)

         # 4. Add dims to dense dimensions [Not Supported]
         with self.assertRaisesRegex(RuntimeError, "changing the number of dense dimensions"):
             self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
                                     [1, 1], [1, 2, 3, 4], [2, 2, 3, 4],
                                     dtype=dtype, device=device)

         with self.assertRaisesRegex(RuntimeError, "changing the number of dense dimensions"):
             self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
                                     [1, 1], [1, 2, 3, 0], [2, 2, 3, 0],
                                     dtype=dtype, device=device)

         # 5. Remove dims from dense dimensions [Not Supported]
         with self.assertRaisesRegex(RuntimeError, "changing the number of dense dimensions"):
             self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
                                     [1, 1], [1, 2], [2, 2],
                                     dtype=dtype, device=device)

         # 6. Change the number of sparse dimensions on a non-empty sparse tensor [Not Supported]
         with self.assertRaisesRegex(RuntimeError, "changing the number of sparse dimensions"):
             self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
                                     [2, 1], [1, 2, 3], [1, 2, 2, 3],
                                     dtype=dtype, device=device)

         # 7. Shrink the size of some sparse dimensions on a non-empty sparse tensor [Not Supported]
         with self.assertRaisesRegex(RuntimeError, "shrinking the size of sparse dimensions"):
             self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
                                     [1, 1], [1, 2, 3], [1, 2, 3],
                                     dtype=dtype, device=device)

         # 8. Shrink the size of some dense dimensions on a non-empty sparse tensor [Not Supported]
         with self.assertRaisesRegex(RuntimeError, "shrinking the size of dense dimensions"):
             self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
                                     [1, 1], [1, 2, 2], [2, 2, 2],
                                     dtype=dtype, device=device)

         with self.assertRaisesRegex(RuntimeError, "shrinking the size of dense dimensions"):
             self._test_resize_shape([1, 1], [1, 2, 3], [2, 2, 3],
                                     [1, 1], [1, 2, 0], [2, 2, 0],
                                     dtype=dtype, device=device)

     def test_is_nonzero(self, device):
         self.assertTrue(torch.sparse_coo_tensor(([0],), 1., (1,), device=device).is_nonzero())
         self.assertFalse(torch.sparse_coo_tensor(([0],), 0., (1,), device=device).is_nonzero())
         self.assertFalse(torch.sparse_coo_tensor(([0], [0]), 0., (1, 1), device=device).is_nonzero())
         self.assertFalse(torch.sparse_coo_tensor(([0, 0],), (0., 0.), (1,), device=device).is_nonzero())
         self.assertFalse(torch.sparse_coo_tensor(([0, 0],), (-1., 1.), (1,), device=device).is_nonzero())

         # scalar sparse tensor
         self.assertTrue(torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [], device=device).is_nonzero())
         with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with no values is ambiguous"):
             torch.sparse_coo_tensor(([0, 1],), torch.empty(2, 0), (4, 0), device=device).is_nonzero()
         self.assertTrue(torch.sparse_coo_tensor(([0],), 2.3 - 4.5j, (1,), dtype=torch.cfloat, device=device)
                         .is_nonzero())
         self.assertTrue(torch.sparse_coo_tensor(([0],), 2.3 - 4.5j, (1,), dtype=torch.cdouble, device=device)
                         .is_nonzero())
         self.assertFalse(torch.sparse_coo_tensor(([0],), 0. + 0j, (1,), dtype=torch.cfloat, device=device)
                          .is_nonzero())
         self.assertFalse(torch.sparse_coo_tensor(([0],), 0. + 0j, (1,), dtype=torch.cdouble, device=device)
                          .is_nonzero())

     @dtypes(torch.double, torch.cdouble)
     def test_change_tensor_metadata(self, device, dtype):
         i = self.index_tensor([[0], [1]], device=device)
         v = torch.tensor([[3, 4, 5]], dtype=dtype, device=device)
         t = torch.sparse_coo_tensor(i, v, torch.Size([1, 2, 3]), dtype=dtype, device=device)
         i.resize_(2, 3)
         v.resize_(4, 5)
         self.assertEqual(list(t.coalesce().indices().size()), [2, 1])
         self.assertEqual(list(t.coalesce().values().size()), [1, 3])

         i = self.index_tensor([[0], [1]], device=device)
         v = torch.tensor([[3, 4, 5]], dtype=dtype, device=device)
         t = torch.sparse_coo_tensor(i, v, torch.Size([1, 2, 3]))
         i.resize_as_(self.index_tensor([0, 1], device=device))
         v.resize_as_(torch.tensor([3, 4, 5], dtype=dtype, device=device))
         self.assertEqual(list(t.coalesce().indices().size()), [2, 1])
         self.assertEqual(list(t.coalesce().values().size()), [1, 3])

         i = self.index_tensor([[0], [1]], device=device)
         v = torch.tensor([[3, 4, 5]], dtype=dtype, device=device)
         t = torch.sparse_coo_tensor(i, v, torch.Size([1, 2, 3]))
         i.as_strided_((2, 1), (1, 1))
         v.as_strided_((1, 3), (1, 1))
         self.assertEqual(list(t.coalesce().indices().size()), [2, 1])
         self.assertEqual(list(t.coalesce().values().size()), [1, 3])

         i = self.index_tensor([[0], [1]], device=device)
         v = torch.tensor([[3, 4, 5]], dtype=dtype, device=device)
         t = torch.sparse_coo_tensor(i, v, torch.Size([1, 2, 3]))
         i.set_(self.index_tensor([0, 1], device=device))
         v.set_(torch.tensor([3, 4, 5], dtype=dtype, device=device))
         self.assertEqual(list(t.coalesce().indices().size()), [2, 1])
         self.assertEqual(list(t.coalesce().values().size()), [1, 3])

         i = self.index_tensor([[0], [1]], device=device)
         v = torch.tensor([[3, 4, 5]], dtype=dtype, device=device)
         t = torch.sparse_coo_tensor(i, v, torch.Size([1, 2, 3]))
         i.transpose_(0, 1)
         v.transpose_(0, 1)
         self.assertEqual(list(t.coalesce().indices().size()), [2, 1])
         self.assertEqual(list(t.coalesce().values().size()), [1, 3])

     @coalescedonoff
     @dtypes(torch.double)
     def test_pickle(self, device, dtype, coalesced):
         import pickle

         shape_sparse_dim_nnz = [
             ((), 0, 2),
             ((0,), 0, 10),
             ((2,), 0, 3),
             ((100, 3), 1, 3),
             ((100, 20, 3), 2, 0),
             ((10, 0, 3), 0, 3),
             ((10, 0, 3), 0, 0),
         ]

         for shape, sparse_dim, nnz in shape_sparse_dim_nnz:
             indices_shape = torch.Size((sparse_dim, nnz))
             values_shape = torch.Size((nnz,) + shape[sparse_dim:])
             indices = torch.arange(indices_shape.numel(), dtype=self.index_tensor(0).dtype,
                                    device=device).view(indices_shape)
             for d in range(sparse_dim):
                 indices[d].clamp_(max=(shape[d] - 1))  # make it valid index
             if not coalesced and indices.numel() > 0:
                 indices[:, -1] = indices[:, 0]  # make it uncoalesced
             values_numel = values_shape.numel()
             values = torch.arange(values_numel, dtype=dtype,
                                   device=device).view(values_shape).div_(values_numel / 2.)
             sp_tensor = self.sparse_tensor(indices, values, shape)
             serialized = pickle.dumps(sp_tensor)
             sp_tensor_loaded = pickle.loads(serialized)
             self.assertEqual(sp_tensor, sp_tensor_loaded)

     def test_any(self, device):
         t = torch.sparse_coo_tensor(torch.tensor(([0, 0], [2, 0])), torch.tensor([False, False]), device=device)
         t_any = torch.tensor(False)
         self.assertEqual(torch.any(t), t_any)
         t = torch.sparse_coo_tensor(torch.tensor(([0, 0], [2, 0])), torch.tensor([True, False]), device=device)
         t_any = torch.tensor(True)
         self.assertEqual(torch.any(t), t_any)

     def test_isnan(self, device):
         t = torch.sparse_coo_tensor(torch.tensor(([0, 0], [0, 2])), torch.tensor([1, 4]), device=device)
         t_nan = torch.sparse_coo_tensor(torch.tensor(([0, 0], [0, 2])), torch.tensor([False, False]), device=device)
         self.assertEqual(torch.isnan(t).int(), t_nan.int())
         t = torch.sparse_coo_tensor(torch.tensor(([0, 0], [0, 2])), torch.tensor([1, float("nan")]), device=device)
         t_nan = torch.sparse_coo_tensor(torch.tensor(([0, 0], [0, 2])), torch.tensor([False, True]), device=device)
         self.assertEqual(torch.isnan(t).int(), t_nan.int())

     @coalescedonoff
     @dtypes(torch.float32, torch.float64)
     def test_div_rounding_mode(self, device, dtype, coalesced):
         sparse, _, _ = self._gen_sparse(2, 10, (10, 10), dtype,
                                         device, coalesced)
         dense = self.safeToDense(sparse)

         for mode in (None, 'floor', 'trunc'):
             actual = sparse.div(-2, rounding_mode=mode)
             expect = dense.div(-2, rounding_mode=mode)
             self.assertEqual(self.safeToDense(actual), expect)

             # Test inplace
             actual = sparse.clone().div_(-2, rounding_mode=mode)
             self.assertEqual(self.safeToDense(actual), expect)

             # Test out argument
             actual.zero_()
             torch.div(sparse, -2, rounding_mode=mode, out=actual)
             self.assertEqual(self.safeToDense(actual), expect)

     def test_div_by_sparse_error(self, device):
         self.assertRaisesRegex(RuntimeError, 'Sparse division requires',
                                lambda: torch.tensor(1., device=device).to_sparse()
                                / torch.tensor(1., device=device).to_sparse())

     def test_floor_divide_by_sparse_error(self, device):
         self.assertRaisesRegex(RuntimeError, 'Sparse floor division requires',
                                lambda: torch.tensor(1., device=device).to_sparse()
                                // torch.tensor(1., device=device).to_sparse())

     @unittest.skipIf(not TEST_NUMPY, "Numpy not found")
     @onlyCPU
     def test_sparse_to_numpy(self, device):
         t = torch.sparse_coo_tensor(torch.tensor(([0, 0], [2, 0])), torch.tensor([1, 4]))
         self.assertRaises(TypeError, lambda: t.numpy())

     @coalescedonoff
     @dtypes(torch.double)
     def test_softmax(self, device, dtype, coalesced):
         import torch.nn.functional as F

         def to_dense(sparse, fill_value=None):
             """
             Return dense tensor from a sparse tensor using given fill value.
             """
             if fill_value is None or fill_value == 0:
                 return sparse.to_dense()
             sparse = sparse.coalesce()
             dense = torch.full(sparse.shape, fill_value, dtype=sparse.dtype, device=sparse.device)
             for idx, value in zip(sparse._indices().t(), sparse._values()):
                 dense[tuple(idx)] = value
             return dense

         def softmax_to_dense(sparse, dim):
             """Dense softmax of a sparse tensor. Useful only for testing softmax
             correctness.

             When computing softmax of a sparse tensor, the value of
             unspecified items is negative infinity rather than zero so
             that

               softmax(sparse.to_dense(fill_value=-inf), dim) == softmax(sparse, dim).to_dense()

             holds for non-empty lines. One empty lines, the softmax
             values are defined as 0 in order to preserve the sparsity
             of result.

             Note that in PyTorch, ``to_dense`` method does not
             implement the ``fill_value`` keyword argument.
             """
             dtype = sparse.dtype
             device = sparse.device
             dense = to_dense(sparse, fill_value=-float('inf'))
             r = F.softmax(dense, dim)
             # softmax on empty lines results nan, replace with zeros to match the definition
             r[r != r] = 0
             return r

         def sparse_softmax(sparse, dim):
             """Pure Python softmax of a sparse tensor. Assuming -inf for
             unspecified sparse tensor data. This is a prototype of
             sparse softmax algorithm in Python.
             """
             dtype = sparse.dtype
             device = sparse.device

             # softmax is non-linear operation, so sparse tensors must
             # be coalesced.
             sparse = sparse.coalesce()
             inf = float('inf')
             indices = sparse._indices()
             values = sparse._values()

             if dim < sparse.sparse_dim():
                 nnz = sparse._nnz()

                 # compute pool indices
                 size = sparse.size()
                 strides = torch.ones((sparse.sparse_dim(), 1), dtype=indices.dtype, device=indices.device)
                 for i in reversed(range(sparse.sparse_dim() - 1)):
                     strides[i, 0] = strides[i + 1, 0] * size[i + 1]
                 strides[dim, 0] = 0

                 pool = (indices * strides).sum(dim=0)
                 i2p = {}
                 for i in range(nnz):
                     c = int(pool[i])
                     if c not in i2p:
                         i2p[c] = len(i2p)
                     pool[i] = i2p[c]

                 # compute max
                 dense_size = tuple(size[sparse.sparse_dim():])
                 mx = torch.empty((pool.max() + 1,) + dense_size, dtype=dtype, device=device)
                 mx[:] = -inf
                 for n in range(nnz):
                     p = pool[n]
                     mx[p] = torch.max(mx[p], values[n])

                 # apply exp to (v - mx) and sum the results
                 exp_values = torch.empty_like(values)
                 exp_sums = torch.zeros_like(mx)
                 for n in range(nnz):
                     p = pool[n]
                     v = exp_values[n] = (values[n] - mx[p]).exp()
                     exp_sums[p] = exp_sums[p] + v

                 # normalize with the sum of exponents
                 for n in range(nnz):
                     p = pool[n]
                     exp_values[n] = exp_values[n] / exp_sums[p]

                 return torch.sparse_coo_tensor(indices,
                                                exp_values,
                                                sparse.size(),
                                                dtype=dtype, device=device)

             elif dim < sparse.sparse_dim() + sparse.dense_dim():
                 return torch.sparse_coo_tensor(indices,
                                                F.softmax(values, dim - sparse.sparse_dim() + 1),
                                                sparse.size(),
                                                dtype=dtype, device=device)
             else:
                 raise ValueError(
                     f'`dim(={dim})` must be smaller than `sparse_dim(={sparse.sparse_dim()}) + dense_dim(={sparse.dense_dim()})`')

         def softmax_jacobian_analytic(x, dim):
             """Return Jacobian of softmax using analytic formula

                D_jS_i = S_i * (1[i==j] - S_j).

             where S = softmax(x, dim), x is dense tensor, i,j in
             range(x.shape[dim]).
             """
             y = F.softmax(x, dim)
             y[y != y] = 0  # replace nan-s with zeros
             J = torch.zeros((x.shape[dim],) + tuple(x.shape), dtype=x.dtype, device=x.device)
             si = [slice(None)] * len(y.shape)
             sj = [slice(None)] * len(y.shape)
             s = [slice(None)] * len(J.shape)
             for i in range(y.shape[dim]):
                 si[dim] = i
                 s[dim + 1] = i
                 yi = y[tuple(si)]
                 for j in range(y.shape[dim]):
                     sj[dim] = j
                     s[0] = j
                     if i == j:
                         J[tuple(s)] = yi * (1 - yi)
                     else:
                         yj = y[tuple(sj)]
                         J[tuple(s)] = - yi * yj
                     sj[dim] = slice(None)
                 si[dim] = slice(None)
                 s[dim + 1] = slice(None)
             return J

         def softmax_jacobian_autograd(x, dim, log=False):
             """Return Jacobian of softmax using PyTorch autograd feature.

             x can be dense or sparse tensor.
             """
             import itertools

             if x.is_sparse:
                 x = x.coalesce()

             dtype = x.dtype
             device = x.device
             shape = tuple(x.shape)
             J = torch.zeros((shape[dim],) + shape, dtype=dtype, device=device)
             for i in range(shape[dim]):
                 if x.is_sparse:
                     sparse_dim = x.sparse_dim()
                     dense_dim = x.dense_dim()
                     if dim < sparse_dim:
                         ranges = []
                         for j, sz in enumerate(shape[:sparse_dim]):
                             if dim == j:
                                 ranges.append([i])
                             else:
                                 ranges.append(list(range(sz)))
                         indices = torch.tensor(list(itertools.product(*ranges)), dtype=torch.long, device=device).t()
                         values = torch.ones((indices.shape[1],) + shape[sparse_dim:], dtype=dtype, device=device)
                     else:
                         ranges = []
                         for j, sz in enumerate(shape[:sparse_dim]):
                             ranges.append(list(range(sz)))
                         indices = torch.tensor(list(itertools.product(*ranges)), dtype=torch.long, device=device).t()
                         values = torch.zeros((indices.shape[1],) + shape[sparse_dim:], dtype=dtype, device=device)
                         sv = [slice(None)] * (dense_dim + 1)
                         sv[dim - sparse_dim + 1] = i
                         values[tuple(sv)] = 1
                     v = torch.sparse_coo_tensor(indices, values, shape, dtype=dtype, device=device)
                 else:
                     v = torch.zeros_like(x)
                     sv = [slice(None)] * len(v.shape)
                     sv[dim] = i
                     v[tuple(sv)] = 1
                 x_ = x.clone()
                 x_.requires_grad_(True)

                 if log:
                     if x_.is_sparse:
                         y = torch.sparse.log_softmax(x_, dim)
                     else:
                         y = F.log_softmax(x_, dim)
                 else:
                     if x_.is_sparse:
                         y = torch.sparse.softmax(x_, dim)
                     else:
                         y = F.softmax(x_, dim)
                         # replace nan-s with zeros
                         y.data[y != y] = 0
                 y.backward(v)
                 g = x_.grad
                 if not g.is_sparse:
                     # replace nan-s with zeros
                     g.data[g != g] = 0
                 J[i] = g.to_dense() if g.is_sparse else g
             return J

         @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1166")
         def test_op(sparse_dims, nnz, with_size, coalesced):
             if isinstance(with_size, Number):
                 with_size = [with_size] * sparse_dims

             x, i, v = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)

             def sparse_log(x):
                 return torch.sparse_coo_tensor(x._indices(), x._values().log(),
                                                x.size(), dtype=x.dtype, device=x.device)

             # Check dim out of bounds
             with self.assertRaisesRegex(IndexError, r"Dimension out of range"):
                 torch.sparse.softmax(x, x.dim())
             with self.assertRaisesRegex(IndexError, r"Dimension out of range"):
                 torch.sparse.softmax(x, -x.dim() - 1)

             for dim in range(x.dim()):
                 # Check sparse softmax definition

                 # check Python sparse softmax
                 y = sparse_softmax(x, dim)
                 r1 = softmax_to_dense(x, dim)
                 r2 = y.to_dense()
                 self.assertEqual(r1, r2)

                 # check C++ sparse softmax
                 for d in (dim, dim - x.dim()):
                     y1 = torch.sparse.softmax(x, d)
                     self.assertEqual(y, y1)

                     # check C++ sparse log_softmax
                     ly1 = torch.sparse.log_softmax(x, d)
                     self.assertEqual(ly1, sparse_log(y1))

                 # Check autograd support on sparse softmax

                 # check softmax Jacobian definition for dense input
                 x1 = to_dense(x, fill_value=float('-inf'))
                 J = softmax_jacobian_analytic(x1, dim)
                 assert J.shape[0] == x.shape[dim]
                 assert J.shape[dim + 1] == x.shape[dim]

                 # check softmax Jacobian from autograd, dense input
                 J2 = softmax_jacobian_autograd(x1, dim)
                 self.assertEqual(J, J2)

                 # check softmax Jacobian from autograd, sparse input
                 J3 = softmax_jacobian_autograd(x, dim)
                 self.assertEqual(J, J3)

                 '''
                 y = softmax(x, dim)
                 z = log(y) = log_softmax(x, dim)
                 Dy/Dx = J
                 Dz/Dx = Dz/Dy Dy/Dx = 1/y * J
                 => J = J_log * y
                 '''
                 # log_softmax Jacobian from autograd, dense input
                 J2_log = softmax_jacobian_autograd(x1, dim, log=True)

                 # log_softmax Jacobian from autograd, sparse input
                 J3_log = softmax_jacobian_autograd(x, dim, log=True)

                 J = J.transpose(0, dim + 1)
                 J2_log = J2_log.transpose(0, dim + 1)
                 J3_log = J3_log.transpose(0, dim + 1)
                 self.assertEqual(J, J2_log * r1)
                 self.assertEqual(J, J3_log * r1)

                 if dim == 0:
                     # check dtype argument
                     other_dtype = torch.float32
                     y2 = torch.sparse.softmax(x, dim, dtype=other_dtype)
                     self.assertEqual(y2.dtype, other_dtype)
                     self.assertEqual(y2, y1.type(other_dtype))

                     ly2 = torch.sparse.log_softmax(x, dim, dtype=other_dtype)
                     self.assertEqual(ly2.dtype, other_dtype)
                     self.assertEqual(ly2, ly1.type(other_dtype))

         test_op(1, 10, [3], coalesced)
         test_op(1, 10, [2, 3], coalesced)
         test_op(1, 10, [3, 2], coalesced)
         test_op(2, 10, [2, 3, 4], coalesced)
         test_op(2, 10, [3, 4], coalesced)
         test_op(2, 5, [5, 4], coalesced)
         test_op(2, 10, [3, 4, 2], coalesced)
         test_op(3, 10, [3, 4, 2], coalesced)
         test_op(3, 100, [3, 4, 2], coalesced)
         test_op(3, 100, [3, 4, 2, 3], coalesced)
         test_op(3, 100, [3, 4, 2, 3, 5, 2], coalesced)
         test_op(4, 100, [3, 4, 2, 3, 5, 2], coalesced)


     def _check_zero_nnz_softmax_op(self, func, ndim, device, dtype):
         # create a sparse tensor with shape (0,..., 3) it has no materialize values
         t = torch.sparse_coo_tensor([[] for _ in range(ndim)], [], (0,) * (ndim - 1) + (3,), device=device, dtype=dtype)
         out = func(t, 0)
         self.assertEqual(out, torch.zeros_like(t))

         # gradient
         t = t.requires_grad_()
         gradcheck(lambda x: func(x, 0).to_dense(), (t,), masked=True)


     @dtypes(torch.double, torch.float)
     @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
     def test_softmax_zero_nnz(self, device, dtype):
         self._check_zero_nnz_softmax_op(torch.sparse.softmax, 1, device, dtype)
         self._check_zero_nnz_softmax_op(torch.sparse.softmax, 10, device, dtype)

     @dtypes(torch.double, torch.float)
     @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
     def test_log_softmax_zero_nnz(self, device, dtype):
         self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 1, device, dtype)
         self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 10, device, dtype)

     # TODO: Check after why ROCm's cusparseXcsrgemm2Nnz function doesn't return the same nnz value as CUDA
     @skipIfRocm
     @coalescedonoff
     @dtypes(*floating_and_complex_types())
     @dtypesIfCUDA(*floating_types_and(*[torch.half] if SM53OrLater else [],
                                       *[torch.bfloat16] if SM80OrLater else [],
                                       torch.complex64,
                                       *[torch.complex128] if CUSPARSE_SPMM_COMPLEX128_SUPPORTED else []))
     @unittest.skipIf(TEST_WITH_CROSSREF, "not working with fake tensor")
     @precisionOverride({torch.bfloat16: 1e-2, torch.float16: 1e-2, torch.complex64: 1e-2, torch.float32: 1e-2})
     def test_sparse_matmul(self, device, dtype, coalesced):
         """
         This function test `torch.sparse.mm` when both the mat1 and mat2 are sparse tensors.
         """

         def ref_sparse_mm(a, b):
             return a.to_dense() @ b.to_dense()

         def grad_with_custom_sparsity_pattern_test_helper(sparse_dims, nnz, shape_a, shape_b):
             def test_grad_dense(a_s, b_s, g_s):
                 a = a_s.to_dense().detach()
                 b = b_s.to_dense().detach()
                 g = g_s.to_dense().detach()

                 a.requires_grad_(True)
                 b.requires_grad_(True)
                 c = a @ b
                 c.backward(g)
                 return a.grad.sparse_mask(a_s.coalesce()), b.grad.sparse_mask(b_s.coalesce())

             a, _, _ = self._gen_sparse(sparse_dims, nnz, shape_a, dtype, device, coalesced)
             b, _, _ = self._gen_sparse(sparse_dims, nnz, shape_b, dtype, device, coalesced)
             a.requires_grad_(True)
             b.requires_grad_(True)

             c = torch.sparse.mm(a, b)
             c2 = c.to_dense().detach()
             c2 = torch.rand_like(c2)
             g = c2.sparse_mask(c.coalesce())

             c.backward(g)

             a_grad, b_grad = test_grad_dense(a, b, g)

             # We convert grad to dense since dense and sparse mm
             # implementations handle materialized zeroes differently.
             self.assertEqual(a.grad.to_dense(), a_grad.to_dense())
             self.assertEqual(b.grad.to_dense(), b_grad.to_dense())

         def test_sparse_matmul(sparse_dims, nnz, shape_a, shape_b):
             a, i_a, v_a = self._gen_sparse(sparse_dims, nnz, shape_a, dtype, device, coalesced)
             b, i_b, v_b = self._gen_sparse(sparse_dims, nnz, shape_b, dtype, device, coalesced)

             # dense implementation
             r1 = ref_sparse_mm(a, b)

             # cpp implementation
             r2 = torch.sparse.mm(a, b)
             self.assertEqual(r1, r2.to_dense())

             # Check result is truly coalesced
             self.assertTrue(r2.is_coalesced() and is_coalesced_indices(r2))

             if dtype in [torch.double, torch.cdouble]:
                 a.requires_grad_(True)
                 b.requires_grad_(True)

                 # check autograd support on sparse matmul
                 def fn(D1, D2):
                     return torch.sparse.mm(D1, D2).to_dense()

                 if a.is_cuda:
                     # For cuda, `nondet_tol` is set with `1e-5`
                     # This is because cuSparse sometimes returns approximate zero values like `~e-323`
                     # TODO: Check this cuSparse issue.
                     # This happens when you do chain multiplication `torch.sparse.mm` operations
                     gradcheck(fn, (a, b), nondet_tol=1e-5, masked=True)
                 else:
                     gradcheck(fn, (a, b), masked=True)
                 grad_with_custom_sparsity_pattern_test_helper(sparse_dims, nnz, shape_a, shape_b)

         def test_error_cases():
             def fn(sparse_dims, nnz, shape_a, shape_b):
                 a, i_a, v_a = self._gen_sparse(sparse_dims, nnz, shape_a, dtype, device, coalesced)
                 b, i_b, v_b = self._gen_sparse(sparse_dims, nnz, shape_b, dtype, device, coalesced)
                 r2 = torch.sparse.mm(a, b)

             # This is not a matrix
             self.assertRaises(RuntimeError, lambda: fn(3, 4, [2, 2, 2], [2, 2, 2]))

             # Shapes does not
             self.assertRaisesRegex(RuntimeError,
                                    r"mat1 and mat2 shapes cannot be multiplied \(2x3 and 4x2\)",
                                    lambda: fn(2, 10, [2, 3], [4, 2]))

             def different_dtypes():
                 a, i_a, v_a = self._gen_sparse(2, 10, [2, 2], dtype, device, coalesced)
                 b, i_b, v_b = self._gen_sparse(2, 10, [2, 2], dtype, device, coalesced)
                 r2 = torch.sparse.mm(a.to(torch.float64), a.to(torch.float32))

             self.assertRaisesRegex(RuntimeError, 'mat1 dtype Double does not match mat2 dtype Float', different_dtypes)

         def test_backward_noncontiguous():
             # Sparse.mm backward used to wrong with non-contiguous grads,
             # see https://github.com/pytorch/pytorch/issues/102493.
             n_reps = 7
             for _ in range(n_reps):
                 A = torch.eye(5).to_sparse().requires_grad_(True)
                 B = torch.eye(5).to_sparse()
                 out = torch.sparse.mm(A, B)
                 out.coalesce().values().sum().backward()
                 self.assertEqual(A.grad, A)

         for n in range(2, 5):
             for m in range(2, 8):
                 for p in range(2, 8):
                     test_sparse_matmul(2, 10, [n, m], [m, p])

         test_sparse_matmul(2, 0, [0, 0], [0, 0])
         test_sparse_matmul(2, 0, [0, 10], [10, 0])
         test_error_cases()
         test_backward_noncontiguous()

     @coalescedonoff
     @dtypes(torch.double)
     def test_assign(self, device, dtype, coalesced):
         def assign_to():
             a, i_a, v_a = self._gen_sparse(2, 5, [2, 3], dtype, device, coalesced)
             a[0] = 100

         self.assertRaises(TypeError, assign_to)

     @dtypes(torch.double, torch.cdouble)
     def test_full_broadcast_to(self, device, dtype):
         def can_broadcast(s0, s1):
             s0 = tuple(reversed(s0))
             s1 = tuple(reversed(s1))
             for i in range(len(s0)):
                 if s0[i] != 1 and s0[i] != s1[i]:
                     return False
             return True
         sizes = (
             (), (1,), (2,), (1, 1), (3, 1), (3, 2), (4, 1, 1), (4, 3, 2)
         )
         for s0, s1 in itertools.combinations(sizes, r=2):
             t = make_tensor(s0, dtype=dtype, device=device, low=-9, high=9)
             for sparse_dims in range(1, len(s0) + 1):
                 s = t.to_sparse(sparse_dims)
                 if can_broadcast(s0, s1):
                     t_res = torch.broadcast_to(t, s1)
                     s_res = torch._sparse_broadcast_to(s, s1)
                     torch._validate_sparse_coo_tensor_args(s_res._indices(), s_res._values(), s_res.shape)
                     if s_res.is_coalesced():
                         # ensure that is_coalesced is estimated correctly
                         self.assertEqual(s_res, torch.sparse_coo_tensor(s_res._indices(), s_res._values(), s_res.shape).coalesce())
                     self.assertEqual(s_res.to_dense(), t_res)
                 else:
                     with self.assertRaisesRegex(RuntimeError,
                                                 r"The expanded size of the tensor \(\d\) "
                                                 r"must match the existing size \(\d\)"):
                         torch._sparse_broadcast_to(s, s1)

     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
     def test_sparse_broadcast_to(self, device, dtype, coalesced):
         def test(sparse_dims, nnz, with_size, new_size):
             x = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0]
             y = self.safeToDense(x)
             x1 = torch._sparse_broadcast_to(x, new_size)
             y1 = y.broadcast_to(new_size)
             self.assertEqual(self.safeToDense(x1), y1)

         test(4, 6, [7, 3, 1, 3, 0], [7, 3, 4, 3, 0])
         test(4, 6, [7, 3, 1, 3, 0], [2, 7, 3, 1, 3, 0])
         test(4, 6, [7, 3, 1, 3, 1, 3], [7, 3, 1, 3, 2, 3])
         test(4, 6, [7, 3, 1, 3, 2, 1], [7, 3, 1, 3, 2, 3])

     def _test_mul_skips(self, device, dtype, coalesced):
         skipTestIfUncoalesced = False
         # This case always coalesce inputs and that could lead to loss of precision,
         # hence it is inhibited for float16/bfloat16 by providing already coalesced tensors.
         if not coalesced and dtype in {torch.float16, torch.bfloat16}:
             skipTestIfUncoalesced = True
         # to_dense is problematic for boolean non-coalesced CUDA tensors
         # see https://github.com/pytorch/pytorch/issues/81648
         if not coalesced and dtype == torch.bool and torch.device(device).type == "cuda":
             skipTestIfUncoalesced = True

         if skipTestIfUncoalesced:
             self.skipTest(f"Test with dtype={dtype}, device={device} runs only with coalesced inputs")

     @coalescedonoff
     # NOTE: addcmul_out is not implemented for bool.
     @dtypes(*all_types_and_complex_and(torch.bfloat16, torch.float16))
     @precisionOverride({torch.bfloat16: 1e-2, torch.float16: 1e-2})
     def test_sparse_sparse_mul(self, device, dtype, coalesced):
         self._test_mul_skips(device, dtype, coalesced)

         shape = (2, 3, 4, 10)
         nnz = 10

         def check(self, x, y):
             res_sparse = x * y
             res_dense = x.to_dense() * y.to_dense()
             self.assertEqual(res_sparse.to_dense(), res_dense)

         def check_empty(sparse_shape, nnz, dense_shape, coalesce):
             from itertools import product
             for nnz_val, shape_suffix in product((nnz, 0), ((), (0,))):
                 empty_sparse_shape = sparse_shape + shape_suffix
                 empty_dense_shape = dense_shape + shape_suffix
                 x = self._gen_sparse(sparse_dim, nnz_val, empty_sparse_shape, dtype, device, coalesce)[0]
                 check(self, x, x)

         # TODO: uncomment once backward is implemented for sparse tensors that broadcast in dense dims.
         # def check_autograd(x, y):
         #     if dtype in {torch.double, torch.cdouble}:
         #         xa = x.detach().clone().requires_grad_(True)
         #         ya = y.detach().clone().requires_grad_(True)
         #         gradcheck(lambda a, b: (a * b).to_dense(), (xa, ya), masked=True)
         #         gradcheck(lambda a, b: (a * b).to_dense(), (ya, xa), masked=True)

         for dim in range(len(shape) + 1):
             sub_shape = shape[dim:]
             sparse_dim = len(sub_shape) // 2

             check_empty(sub_shape, nnz, shape, coalesced)

             x = self._gen_sparse(sparse_dim, nnz, sub_shape, dtype, device, coalesced)[0]
             y = self._gen_sparse(sparse_dim, nnz, sub_shape, dtype, device, coalesced)[0]
             check(self, x, y)
             # TODO: uncomment once supported
             # check_autograd(x, y)

             # check broadcasting in dense dims
             for d in range(sparse_dim, len(sub_shape)):
                 new_shape = sub_shape[:d] + (1,) + sub_shape[d + 1:]
                 y = self._gen_sparse(sparse_dim, nnz, new_shape, dtype, device, coalesced)[0]
                 check(self, x, y)
                 # TODO: uncomment once supported
                 # check_autograd(x, y)

     @coalescedonoff
     @dtypes(*all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16))
     @precisionOverride({torch.bfloat16: 1e-2, torch.float16: 1e-2})
     def test_sparse_dense_mul(self, device, dtype, coalesced):
         self._test_mul_skips(device, dtype, coalesced)

         shape = (2, 3, 4, 10)
         nnz = 10

         def check(self, s, d):
             res = d * s

             # check commutativity
             self.assertEqual(res, s * d)

             # check correctness
             self.assertEqual(res.to_dense(), s.to_dense() * d)

             # check in-placeness for dense
             if d.dim() >= s.dim():
                 dc = d.clone()
                 self.assertEqual(d.mul_(s), dc.mul_(s.to_dense()))

             # check in-placeness for sparse
             if s.dim() >= d.dim():
                 # for sparse
                 sc = s.clone()
                 self.assertEqual(s.mul_(d).to_dense(), sc.to_dense().mul_(d))

         for dim in range(len(shape) + 1):
             sub_shape = shape[dim:]
             sparse_dim = len(sub_shape) // 2

             def check_empty(sparse_shape, nnz, dense_shape, coalesce):
                 from itertools import product
                 for nnz_val, shape_suffix in product((nnz, 0), ((), (0,))):
                     empty_sparse_shape = sparse_shape + shape_suffix
                     empty_dense_shape = dense_shape + shape_suffix
                     s = self._gen_sparse(sparse_dim, nnz_val, empty_sparse_shape, dtype, device, coalesce)[0]
                     d = make_tensor(empty_dense_shape, dtype=dtype, device=device)
                     check(self, s, d)

             # check scalar multiplication
             s = self._gen_sparse(sparse_dim, nnz, sub_shape, dtype, device, coalesced)[0]
             for scalar in (True, 1, 1.0):
                 res_sparse_right = s * scalar
                 res_sparse_left = scalar * s
                 res_dense = s.to_dense() * scalar
                 # check correctness and dtype
                 self.assertEqual(s.to(res_sparse_right.dtype), res_sparse_right)
                 self.assertEqual(res_sparse_right, res_sparse_left)
                 self.assertEqual(res_sparse_right.dtype, res_dense.dtype)
                 self.assertEqual(res_sparse_left.dtype, res_dense.dtype)
                 # check scalar as 0-dim sparse tensor
                 tscalar = torch.tensor(scalar, device=device)
                 sscalar = tscalar.to_sparse()
                 res_sparse_right = s * sscalar
                 res_sparse_left = sscalar * s
                 self.assertEqual(res_sparse_right, res_sparse_left)
                 self.assertEqual(s.to(res_sparse_right.dtype), res_sparse_right)

             # check non-coalesced 0-dim scalar
             # we skip torch.bool because for such tensors
             # coalesce.to_dense != to_dense
             if dtype == torch.bool:
                 return

             for scalar_dtype in (int, float):
                 scalar = scalar_dtype(1)
                 idx = torch.tensor([], device=device).reshape(0, 2)
                 val = torch.tensor([scalar, scalar], device=device)
                 sscalar = torch.sparse_coo_tensor(idx, val, ())
                 res_dense = s.to_dense() * sscalar.to_dense()
                 self.assertEqual((s * sscalar).to_dense(), res_dense)
                 self.assertEqual((sscalar * s).to_dense(), res_dense)

             # Case 1: sparse broadcasts over dense
             s = self._gen_sparse(sparse_dim, nnz, sub_shape, dtype, device, coalesced)[0]
             d = make_tensor(shape, dtype=dtype, device=device)
             check(self, s, d)
             check_empty(sub_shape, nnz, shape, coalesced)

             # Case 2: dense broadcasts over sparse
             s = self._gen_sparse(3, nnz, shape, dtype, device, coalesced)[0]
             d = make_tensor(sub_shape, dtype=dtype, device=device)
             check(self, s, d)
             check_empty(shape, nnz, sub_shape, coalesced)

     @unittest.skipIf(not TEST_NUMPY, "NumPy is not available")
     @onlyCPU
     @dtypes(*all_types_and_complex_and(torch.bool))
     def test_sparse_spdiags(self, device, dtype):

         make_diags = functools.partial(make_tensor, dtype=dtype, device=device)
         make_offsets = functools.partial(torch.tensor, dtype=torch.long, device=device)

         if TEST_SCIPY:
             def reference(diags, offsets, shape):
                 return scipy.sparse.spdiags(diags, offsets, *shape).toarray()

         else:
             def reference(diags, offsets, shape):
                 result = torch.zeros(shape, dtype=dtype, device=device)
                 for i, off in enumerate(offsets):
                     res_view = result.diagonal(off)
                     data = diags[i]
                     if off > 0:
                         data = data[off:]

                     m = min(res_view.shape[0], data.shape[0])
                     res_view[:m] = data[:m]
                 return result

         def check_valid(diags, offsets, shape, layout=None):
             ref_out = reference(diags, offsets, shape)
             out = torch.sparse.spdiags(diags, offsets, shape, layout=layout)
             if layout is None:
                 ex_layout = torch.sparse_coo
             else:
                 ex_layout = layout
             out_dense = out.to_dense()
             self.assertTrue(out.layout == ex_layout, f"Output layout {out.layout} expected {ex_layout}")
             self.assertEqual(out_dense, ref_out, f"Result:\n{out_dense} does not match reference:\n{ref_out}")

         def check_invalid(args, error):
             with self.assertRaisesRegex(RuntimeError, error):
                 torch.sparse.spdiags(*args)

         def valid_cases():
             # some normal cases
             yield (make_diags((1, 5)), make_offsets([0]), (5, 5))
             yield (make_diags((3, 3)), make_offsets([-1, 0, 1]), (4, 4))
             # noncontigous diags
             yield (make_diags((5, 4), noncontiguous=True), make_offsets([-1, 1, 0, 2, -2]), (5, 5))
             # noncontigous offsets
             yield (make_diags((3, 4)), make_offsets([1, -1, 0, -2, 2])[::2], (5, 5))
             # noncontigous diags + offsets
             yield (make_diags((3, 4), noncontiguous=True), make_offsets([1, -1, 0, -2, 2])[::2], (5, 5))
             # correct dimensionality, 2d, 2d , and shapes match, but the number of diagonals is zero
             yield (make_diags((0, 3)), make_offsets([]), (3, 3))
             # forward rotation of upper diagonals
             yield (make_diags((3, 8)), make_offsets([1, 2, 3]), (4, 4))
             # rotation exausts input space to read from
             yield (make_diags((2, 3)), make_offsets([2, 1]), (3, 3))
             # Simple cases repeated with special output format
             yield (make_diags((1, 5)), make_offsets([0]), (5, 5), torch.sparse_csc)
             yield (make_diags((3, 3)), make_offsets([-1, 0, 1]), (4, 4), torch.sparse_csr)
             # vector diags
             yield (make_diags((3, )), make_offsets([1]), (4, 4))
             # Scalar offset
             yield (make_diags((1, 3)), make_offsets(2), (4, 4))
             # offsets out of range
             yield (make_diags((1, 3)), make_offsets([3]), (3, 3))
             yield (make_diags((1, 3)), make_offsets([-3]), (3, 3))

         for case in valid_cases():
             check_valid(*case)

         def invalid_cases():
             yield (make_diags((1, 3)), make_offsets([0]), (3, 2, 3)), "Output shape must be 2d"
             yield (make_diags((2, 3)), make_offsets([[1, 2], [0, 3]]), (3, 3)), "Offsets must be scalar or vector"
             yield (make_diags((3, 2, 3)), make_offsets([0, 1, 2]), (4, 4)), "Diagonals must be vector or matrix"
             yield (make_diags((3, 3)), make_offsets([-1, 0]), (3, 3)), \
                 r"Number of diagonals \(\d\) does not match the number of offsets \(\d\)"
             yield (make_diags((5,)), make_offsets([0, 1, 2, 3, 4]), (3, 3)), \
                 r"Number of diagonals \(\d\) does not match the number of offsets \(\d\)"
             yield (make_diags((2, 2)), make_offsets([-1, 0]), (2, 3), torch.strided), \
                 r"Only output layouts \(\w+, \w+, \w+\) are supported, got \w+"
             yield (make_diags((2, 5)), make_offsets([0, 0]), (5, 5)), "Offset tensor contains duplicate values"
             yield (make_diags((1, 5)), make_offsets([0]).to(torch.int32), (5, 5)), r"Offset Tensor must have dtype Long but got \w+"


         for case, error_regex in invalid_cases():
             check_invalid(case, error_regex)

     def test_small_nnz_coalesced(self):
         # creating a coo tensor with nnz == 0 is always coalesced
         self.assertTrue(torch.sparse_coo_tensor([[], []], [], (2, 2)).is_coalesced())
         # same for a coo tensor with only 1 nnz
         self.assertTrue(torch.sparse_coo_tensor([[0], [0]], [1], (2, 2)).is_coalesced())
         # two or more nnz coalesced is false as it can't be verified without an expensive check
         self.assertFalse(torch.sparse_coo_tensor([[0, 0], [0, 0]], [1, 2], (2, 2)).is_coalesced())
         # even if there are no duplicates
         self.assertFalse(torch.sparse_coo_tensor([[0, 1], [0, 1]], [1, 2], (2, 2)).is_coalesced())

     @coalescedonoff
     @dtypes(*all_types_and_complex_and(torch.bool))
     def test_sum(self, device, dtype, coalesced):
         def run_test(shape, nnz):
             a = self._gen_sparse(2, nnz, shape, dtype, device, coalesced)[0]
             self.assertEqual(a.sum(), a._values().sum())
             if dtype.is_floating_point or dtype.is_complex:
                 a.requires_grad_(True)
                 a_inter = a.sum()
                 a_inter.abs().backward()
                 with torch.no_grad():
                     self.assertEqual(a.grad, torch.ones(shape, dtype=dtype, device=device) * torch.sgn(a_inter))
         for shape in [(10, 5), (10, 10)]:
             run_test(shape, 0)
             run_test(shape, max(shape))
             run_test(shape, shape[0] * shape[1])


 class TestSparseOneOff(TestCase):
     @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
     def test_cuda_from_cpu(self):
         with self.assertRaisesRegex(
                 RuntimeError,
                 "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"):
             torch.sparse_coo_tensor(torch.zeros(1, 4).long().cuda(),
                                     torch.randn(4, 4, 4),
                                     [3, 4, 4])

         with self.assertRaisesRegex(
                 RuntimeError,
                 "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"):
             torch.sparse_coo_tensor(torch.zeros(1, 4).long().cuda(),
                                     torch.randn(4, 4, 4, 0),
                                     [3, 4, 4, 0])

         with self.assertRaisesRegex(
                 RuntimeError,
                 "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"):
             torch.sparse_coo_tensor(torch.empty(1, 0).long().cuda(),
                                     torch.randn(0, 4, 4, 0),
                                     [0, 4, 4, 0])

     @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
     def test_cuda_sparse_cpu_dense_add(self):
         x = torch.zeros(3, 4, 4)
         sparse_y = torch.sparse_coo_tensor(torch.zeros(1, 4).long().cuda(),
                                            torch.randn(4, 4, 4).cuda(),
                                            [3, 4, 4])
         with self.assertRaisesRegex(RuntimeError, "add: expected 'self' to be a CUDA tensor, but got a CPU tensor"):
             x + sparse_y

         x = torch.zeros(3, 4, 4, 0)
         sparse_y = torch.sparse_coo_tensor(torch.zeros(1, 4).long().cuda(),
                                            torch.randn(4, 4, 4, 0).cuda(),
                                            [3, 4, 4, 0])
         with self.assertRaisesRegex(RuntimeError, "add: expected 'self' to be a CUDA tensor, but got a CPU tensor"):
             x + sparse_y

         x = torch.zeros(0, 4, 4, 0)
         sparse_y = torch.sparse_coo_tensor(torch.empty(1, 0).long().cuda(),
                                            torch.randn(0, 4, 4, 0).cuda(),
                                            [0, 4, 4, 0])
         with self.assertRaisesRegex(RuntimeError, "add: expected 'self' to be a CUDA tensor, but got a CPU tensor"):
             x + sparse_y


 def _sparse_to_dense(tensor):
     if tensor.dtype != torch.bool:
         return tensor.to_dense(masked_grad=True)

     # to_dense uses coalesce which isn't implemented for bool
     return tensor.to(torch.int8).to_dense().to(torch.bool)


 _sparse_unary_ops = ops(sparse_unary_ufuncs, dtypes=OpDTypes.supported,
                         allowed_dtypes=all_types_and_complex())
 class TestSparseUnaryUfuncs(TestCase):
     exact_dtype = True


     @_sparse_unary_ops
     def test_sparse_consistency(self, device, dtype, op):
         sample = first_sample(self, op.sample_inputs(device, dtype))
         assert isinstance(sample.input, torch.Tensor)

         expected = op(sample.input, *sample.args, **sample.kwargs)
         assert torch.is_tensor(expected)
         output = op(sample.input.to_sparse(), *sample.args, **sample.kwargs)
         assert torch.is_tensor(output)
         self.assertEqual(_sparse_to_dense(output), expected)

     @_sparse_unary_ops
     def test_out(self, device, dtype, op):
         if not op.supports_out:
             self.skipTest("Skipped! Out not supported")

         sample = first_sample(self, op.sample_inputs(device, dtype))
         sample.input = sample.input.to_sparse()
         expect = op(sample.input, *sample.args, **sample.kwargs)

         out = torch.sparse_coo_tensor(sample.input.shape, device=device,
                                       dtype=expect.dtype)
         op(sample.input, *sample.args, **sample.kwargs, out=out)
         self.assertEqual(out, expect)

     @_sparse_unary_ops
     def test_inplace(self, device, dtype, op):
         if op.inplace_variant is None:
             self.skipTest("Skipped! Out not supported")

         sample = first_sample(self, op.sample_inputs(device, dtype))
         sample.input = sample.input.to_sparse().coalesce()
         expect = op(sample.input, *sample.args, **sample.kwargs)

         if not torch.can_cast(expect.dtype, dtype):
             with self.assertRaisesRegex(RuntimeError, "result type .* can't be cast to"):
                 op.inplace_variant(sample.input, *sample.args, **sample.kwargs)
             return

         actual = op.inplace_variant(sample.input, *sample.args, **sample.kwargs)
         self.assertIs(actual, sample.input)
         self.assertEqual(actual, expect)

     @_sparse_unary_ops
     def test_sparse_zero_dims(self, device, dtype, op):
         # test 0x0 sparse_coo_tensor
         indices = torch.empty(2, 0, dtype=torch.int64)
         values = torch.empty(0, dtype=dtype)
         sparse_0x0 = torch.sparse_coo_tensor(indices, values, (0, 0))
         expected = torch.sparse_coo_tensor(indices, op(values), (0, 0))
         actual = op(sparse_0x0)
         self.assertEqual(expected, actual)

     @_sparse_unary_ops
     def test_sparse_zeros(self, device, dtype, op):
         samples = op.sample_inputs(device, dtype)

         zero_input = torch.zeros((), device=device, dtype=dtype)
         sparse_input = torch.sparse_coo_tensor((), dtype=dtype, device=device)

         expect = op(zero_input)
         actual = op(sparse_input)
         self.assertEqual(expect, _sparse_to_dense(actual))

     @ops(sparse_unary_ufuncs, dtypes=OpDTypes.supported,
          allowed_dtypes=[torch.double, torch.cdouble])
     def test_sparse_fn_grad(self, device, dtype, op):
         if not op.supports_autograd:
             self.skipTest("Skipped! Op doesn't support autograd")

         for sample in op.sample_inputs(device, dtype):
             sparse_input = sample.input.to_sparse().detach().requires_grad_(True)

             def fn(x):
                 return _sparse_to_dense(
                     op(x, *sample.args, **sample.kwargs))

             self.assertTrue(gradcheck(
                 fn,
                 (sparse_input,),
                 check_batched_grad=False,
                 check_grad_dtypes=True,
                 nondet_tol=op.gradcheck_nondet_tol,
                 fast_mode=op.gradcheck_fast_mode,
                 masked=True))


 class TestSparseMaskedReductions(TestCase):
     exact_dtype = True

     fp16_low_precision_list = {
         'masked.prod',
     }

     @ops(sparse_masked_reduction_ops)
     def test_future_empty_dim(self, device, dtype, op):
         """Currently, `dim=()` in reductions operations means "reduce over
         all dimensions" while in future, it will read "no reduce". See
         https://github.com/pytorch/pytorch/issues/29137

         For sparse masked reductions, we'll implement the current behavior.

         For testing, we'll use samples with `dim=0` and map it to
         `dim=()` until
         torch.testing._internal.common_methods_invocations._generate_reduction_kwargs
         is made to generate samples with `dim=()` for non-scalar
         inputs. With this and after gh-29137 is resolved, this test
         can be deleted. See also `torch.masked._canonical_dim`
         implementation about changing the `dim=()` behavior.
         """

         samples = op.sample_inputs_func(op, device, dtype, requires_grad=False)
         op_name = op.name.replace('masked.', '')
         for sample_input in samples:
             if sample_input.kwargs.get('dim') != 0:
                 continue
             sample_input_kwargs = dict(sample_input.kwargs)
             sample_input_kwargs['dim'] = ()    # reduce over all dimensions

             t = sample_input.input
             mask = sample_input_kwargs.get('mask')
             if mask is None and op_name in {'prod', 'amax', 'amin'}:
                 # FIXME: for now reductions with non-zero reduction identity and
                 # unspecified mask are not supported for sparse COO
                 # tensors, see torch.masked.prod implementation
                 # for details.
                 continue
             sparse_op_kwargs = dict(sample_input_kwargs)
             actual = op(t.to_sparse(), *sample_input.args, **sample_input_kwargs)
             self.assertEqual(actual.layout, torch.sparse_coo)

             expected = op(t, *sample_input.args, **sample_input_kwargs).to_sparse()
             atol = None
             rtol = None
             if op.name in self.fp16_low_precision_list and dtype == torch.half:
                 atol = 1e-5
                 rtol = 2e-3
             self.assertEqual(actual, expected, atol=atol, rtol=rtol)


 class TestSparseMeta(TestCase):
     exact_dtype = True

     def _test_meta_sparse_coo(self, dtype):
         r = torch.empty(4, 4, layout=torch.sparse_coo, device='meta', dtype=dtype)
         self.assertTrue(r.is_meta)
         self.assertEqual(r.device.type, "meta")
         r2 = torch.empty_like(r)
         self.assertTrue(r2.is_meta)
         self.assertEqual(r, r2)
         r3 = torch.sparse_coo_tensor(size=(4, 4), device='meta', dtype=dtype)
         self.assertTrue(r3.is_meta)
         self.assertEqual(r, r3)
         r.sparse_resize_((4, 4), 1, 1)
         r.sparse_resize_and_clear_((4, 4, 4), 2, 1)
         self.assertEqual(r.sparse_dim(), 2)
         self.assertEqual(r.dense_dim(), 1)
         self.assertEqual(r._dimV(), 1)
         self.assertEqual(r._nnz(), 0)
         # nnz zero sparse tensors should always be coalesced at creation
         self.assertEqual(r.is_coalesced(), True)
         # but we can force them into the uncoalesed state
         r._coalesced_(False)
         self.assertEqual(r.is_coalesced(), False)
         # return the coalesced state for indices/values access
         r._coalesced_(True)
         # TODO: this sort of aliasing will need to be handled by
         # functionalization
         self.assertEqual(r._indices(), torch.empty(2, 0, device='meta', dtype=torch.int64))
         self.assertEqual(r._values(), torch.empty(0, 4, device='meta', dtype=dtype))
         self.assertEqual(r.indices(), torch.empty(2, 0, device='meta', dtype=torch.int64))
         self.assertEqual(r.values(), torch.empty(0, 4, device='meta', dtype=dtype))

     def _test_meta_sparse_compressed(self, dtype, layout, batchsize, densesize):
         index_dtype = torch.int64
         blocksize = (2, 3) if layout in {torch.sparse_bsr, torch.sparse_bsc} else ()
         sparsesize = (4, 6)
         nnz = 0

         shape = (*batchsize, *sparsesize, *densesize)
         compressed_dim = 0 if layout in {torch.sparse_csr, torch.sparse_bsr} else 1
         nof_compressed_indices = (sparsesize[compressed_dim] // blocksize[compressed_dim] + 1 if blocksize
                                   else sparsesize[compressed_dim] + 1)
         compressed_indices = torch.empty((*batchsize, nof_compressed_indices), device='meta', dtype=index_dtype)
         plain_indices = torch.empty((*batchsize, nnz), device='meta', dtype=index_dtype)

         values = torch.empty((*batchsize, nnz, *blocksize, *densesize), device='meta', dtype=dtype)
         r = torch.sparse_compressed_tensor(
             compressed_indices,
             plain_indices,
             values,
             shape,
             layout=layout
         )
         self.assertTrue(r.is_meta)
         self.assertEqual(r.device.type, "meta")

         self.assertEqual(r.sparse_dim(), 2)
         self.assertEqual(r.dense_dim(), len(densesize))
         self.assertEqual(r._nnz(), nnz)
         batch_dims = r.ndim - r.sparse_dim() - r.dense_dim()
         r_blocksize = r.values().shape[batch_dims + 1: batch_dims + 1 + len(blocksize)]
         self.assertEqual(r_blocksize, blocksize)

         r_compressed_indices = r.crow_indices() if layout in {torch.sparse_csr, torch.sparse_bsr} else r.ccol_indices()
         r_plain_indices = r.col_indices() if layout in {torch.sparse_csr, torch.sparse_bsr} else r.row_indices()

         self.assertEqual(r_compressed_indices,
                          torch.empty((*batchsize, nof_compressed_indices), device='meta', dtype=index_dtype))
         self.assertEqual(r_plain_indices, torch.empty((*batchsize, nnz), device='meta', dtype=index_dtype))
         self.assertEqual(r.values(), torch.empty((*batchsize, nnz, *blocksize, *densesize), device='meta', dtype=dtype))

         r2 = torch.empty_like(r)
         self.assertTrue(r2.is_meta)
         self.assertEqual(r2, r)

         if layout in {torch.sparse_csr, torch.sparse_csc}:
             r3 = torch.empty((*batchsize, *sparsesize), dtype=dtype, layout=layout, device="meta")
             self.assertTrue(r3.is_meta)
             if not densesize:
                 # dense dimensions cannot be specified for torch.empty
                 self.assertEqual(r3, r)

     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("dtype", [torch.float64])
     def test_meta(self, dtype, layout):
         if layout is torch.sparse_coo:
             self._test_meta_sparse_coo(dtype)
         else:
             for batchsize, densesize in itertools.product([(), (2,)], [(), (3,)]):
                 self._test_meta_sparse_compressed(dtype, layout, batchsize, densesize)

     def _test_print_meta_data(self, dtype, layout, batchsize, sparsesize, densesize):
         index_dtype = torch.int64
         nnz = 0
         blocksize = (2, 3) if layout in {torch.sparse_bsr, torch.sparse_bsc} else ()
         shape = (*batchsize, *sparsesize, *densesize)
         values = torch.empty((*batchsize, nnz, *blocksize, *densesize), device='meta', dtype=dtype)
         if layout is torch.sparse_coo:
             indices = torch.empty((len(sparsesize), nnz), device='meta', dtype=index_dtype)
             x = torch.sparse_coo_tensor(indices, values, shape)
         else:
             compressed_dim = 0 if layout in {torch.sparse_csr, torch.sparse_bsr} else 1
             nof_compressed_indices = (sparsesize[compressed_dim] // blocksize[compressed_dim] + 1 if blocksize
                                       else sparsesize[compressed_dim] + 1)
             compressed_indices = torch.empty((*batchsize, nof_compressed_indices), device='meta', dtype=index_dtype)
             plain_indices = torch.empty((*batchsize, nnz), device='meta', dtype=index_dtype)
             x = torch.sparse_compressed_tensor(
                 compressed_indices,
                 plain_indices,
                 values,
                 shape,
                 layout=layout
             )

         printed = []
         printed.append(f"########## {dtype}/{index_dtype}/size={batchsize}+{sparsesize}+{blocksize}+{densesize} ##########")
         printed.append("# sparse meta tensor")
         printed.append(str(x))

         return printed

     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("dtype", [torch.float64])
     def test_print_meta(self, dtype, layout):
         printed = []
         for batchsize, sparsesize, densesize in itertools.product(
                 [(), (2,)], [(4, 6), (3, 5, 7)], [(), (3,)]
         ):
             if layout is torch.sparse_coo and batchsize:
                 # COO tensors don't have batch dimensions
                 continue
             if layout is not torch.sparse_coo and len(sparsesize) != 2:
                 # CSR/CSC/BSR/BSC tensors must have 2 sparse dimensions
                 continue
             printed += self._test_print_meta_data(dtype, layout, batchsize, sparsesize, densesize)

         orig_maxDiff = self.maxDiff
         self.maxDiff = None
         try:
             self.assertExpected('\n'.join(printed))
             self.maxDiff = orig_maxDiff
         except Exception:
             self.maxDiff = orig_maxDiff
             raise

     def assertEqualMeta(self, x, y, expected_nnz):
         self.assertEqual(x.layout, y.layout)
         self.assertEqual(x.shape, y.shape)
         self.assertEqual(x.dtype, y.dtype)
         self.assertEqual(x.sparse_dim(), y.sparse_dim())
         self.assertEqual(x.dense_dim(), y.dense_dim())

         def assertEqualAttrs(x, y, expected_shape):
             self.assertEqual(x.shape, expected_shape)
             self.assertEqual(x.dtype, y.dtype)
             self.assertEqual(x.layout, y.layout)
             if not x.is_meta:
                 self.assertEqual(x.device, y.device)

         if x.layout is torch.sparse_coo:
             assertEqualAttrs(x._indices(), y._indices(), (*y._indices().shape[:-1], expected_nnz))
             assertEqualAttrs(x._values(), y._values(), (expected_nnz, *y._values().shape[1:]))
         elif x.layout in {torch.sparse_csr, torch.sparse_bsr}:
             assertEqualAttrs(x.crow_indices(), y.crow_indices(), y.crow_indices().shape)
             assertEqualAttrs(x.col_indices(), y.col_indices(), (*y.col_indices().shape[:-1], expected_nnz))
             batch_dim = x.col_indices().ndim - 1
             values_shape = (*y.values().shape[:batch_dim], expected_nnz, *y.values().shape[batch_dim + 1:])
             self.assertEqual(x.values().layout, y.values().layout)
             self.assertEqual(x.values().dtype, y.values().dtype)
             self.assertEqual(x.values().shape, values_shape)
         elif x.layout in {torch.sparse_csc, torch.sparse_bsc}:
             assertEqualAttrs(x.ccol_indices(), y.ccol_indices(), y.ccol_indices().shape)
             assertEqualAttrs(x.row_indices(), y.row_indices(), (*y.row_indices().shape[:-1], expected_nnz))
             batch_dim = x.row_indices().ndim - 1
             values_shape = (*y.values().shape[:batch_dim], expected_nnz, *y.values().shape[batch_dim + 1:])
             self.assertEqual(x.values().layout, y.values().layout)
             self.assertEqual(x.values().dtype, y.values().dtype)
             self.assertEqual(x.values().shape, values_shape)

     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("dtype", [torch.float64])
     def test_to_meta(self, dtype, layout):
         index_dtype = torch.int64
         device = 'cpu'
         for t in self.generate_simple_inputs(layout, device=device, dtype=dtype, index_dtype=index_dtype):
             m = t.to(device="meta")
             self.assertEqual(m.device.type, "meta")
             self.assertEqualMeta(m, t, 0)

     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("dtype", [torch.float64])
     def test_zeros_like_meta(self, dtype, layout):
         index_dtype = torch.int64
         device = 'cpu'
         for t in self.generate_simple_inputs(layout, device=device, dtype=dtype, index_dtype=index_dtype):
             m = torch.zeros_like(t, device="meta")
             self.assertEqual(m.device.type, "meta")
             self.assertEqualMeta(m, t, 0)

     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("dtype", [torch.float64])
     def test_fake(self, dtype, layout):
         from torch._subclasses.fake_tensor import FakeTensorMode, FakeTensor
         fake_mode = FakeTensorMode()
         index_dtype = torch.int64
         device = 'cpu'
         for t in self.generate_simple_inputs(layout, device=device, dtype=dtype, index_dtype=index_dtype):
             f = FakeTensor.from_tensor(t, fake_mode)
             self.assertIsInstance(f, FakeTensor)
             self.assertEqualMeta(f, t, 0)

             d = f.detach()
             self.assertIsInstance(d, FakeTensor)
             self.assertEqualMeta(d, t, 0)

     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("dtype", [torch.float64])
     def test_zeros_like_fake(self, dtype, layout):
         from torch._subclasses.fake_tensor import FakeTensorMode, FakeTensor
         from torch.utils._mode_utils import no_dispatch
         fake_mode = FakeTensorMode()
         index_dtype = torch.int64
         device = 'cpu'
         for t in self.generate_simple_inputs(layout, device=device, dtype=dtype, index_dtype=index_dtype):
             f = FakeTensor.from_tensor(t, fake_mode)
             expected = torch.zeros_like(t)
             with no_dispatch():
                 result = torch.zeros_like(f, device=f.fake_device)
             self.assertEqual(result, expected)
             self.assertEqualMeta(result, expected, 0)

     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("dtype", [torch.float64])
     def test_sum_meta(self, dtype, layout):
         device = 'cpu'
         index_dtype = torch.int64
         for t in self.generate_simple_inputs(layout, device=device, dtype=dtype, index_dtype=index_dtype):
             m = t.to(device='meta')
             r = torch.sum(m)
             expected = torch.sum(t).to(device="meta")
             self.assertTrue(r.is_meta)
             self.assertEqualMeta(r, expected, 0)

     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("dtype", [torch.float64])
     def test_add_meta(self, dtype, layout):
         device = 'cpu'
         index_dtype = torch.int64
         for t in self.generate_simple_inputs(layout, device=device, dtype=dtype, index_dtype=index_dtype):
             expected = torch.add(t, t).to(device='meta')
             m = t.to(device='meta')
             r = torch.add(m, m)
             self.assertEqualMeta(r, expected, 0)


 class _SparseDataset(torch.utils.data.Dataset):
     # An utility class used in TestSparseAny.test_dataloader method.

     def __init__(self, sparse_tensors):
         self.sparse_tensors = sparse_tensors

     def __len__(self):
         return len(self.sparse_tensors)

     def __getitem__(self, index):
         return self.sparse_tensors[index]


 class TestSparseAny(TestCase):

     @onlyCPU
     @all_sparse_layouts('layout', include_strided=False)
     @torch.sparse.check_sparse_tensor_invariants(enable=False)
     def test_check_sparse_tensor_invariants(self, layout):

         if layout is torch.sparse_coo:

             def create_invalid_tensor(check_invariants=None):
                 shape = (2, 2)
                 invalid_indices = torch.tensor([[0], [3]])  # column index is out of range
                 values = torch.tensor([1])
                 if check_invariants is None:
                     return torch.sparse_coo_tensor(invalid_indices, values, shape)
                 else:
                     return torch.sparse_coo_tensor(invalid_indices, values, shape, check_invariants=check_invariants)

             expected_exception_message = 'size is inconsistent with indices: for dim 1, size is 2 but found index 3'

         elif layout in {torch.sparse_csr, torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc}:

             def create_invalid_tensor(check_invariants=None):
                 shape = (2, 2)
                 compressed_indices = torch.tensor([0, 0, 1])
                 invalid_plain_indices = torch.tensor([3])  # index is out of range
                 if layout in {torch.sparse_bsr, torch.sparse_bsc}:
                     values = torch.tensor([[[1]]])
                 else:
                     values = torch.tensor([1])
                 if check_invariants is None:
                     return torch.sparse_compressed_tensor(compressed_indices, invalid_plain_indices, values, shape, layout=layout)
                 else:
                     return torch.sparse_compressed_tensor(compressed_indices, invalid_plain_indices, values, shape, layout=layout,
                                                           check_invariants=check_invariants)

             if layout in {torch.sparse_csr, torch.sparse_bsr}:
                 expected_exception_message = r'`0 <= col_indices < ncols` is not satisfied.'
             else:
                 expected_exception_message = r'`0 <= row_indices < nrows` is not satisfied.'

         else:
             raise NotImplementedError(layout)

         # First, consider the case where invariant checks are disabled
         # "globally" (read: within the context of this test method
         # caller) as defined by check_sparse_tensor_invariants(False)
         # decorator:
         self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())

         # Enable the invariant checks in a local context:
         with torch.sparse.check_sparse_tensor_invariants():
             self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())

         # Leaving the local context must restore the "global" state of
         # the invariant check feature:
         self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())

         # Since invariant checks are disabled by default, we can
         # create an invalid sparse tensor without raising an
         # exception:
         r = create_invalid_tensor()
         self.assertEqual(r.layout, layout)

         # Or, when disabling the invariants check explicitly:
         r = create_invalid_tensor(check_invariants=False)
         self.assertEqual(r.layout, layout)

         # Enabling invariant check via constructor's optional argument
         # will raise an exception when sparse tensor invariants are
         # violated:
         with self.assertRaisesRegex(RuntimeError, expected_exception_message):
             create_invalid_tensor(check_invariants=True)

         # Check that the global invariant check flag has been restored
         # after raising the exception above:
         self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())

         # Next, consider the case where invariant checks are enabled
         # within a local context:
         with torch.sparse.check_sparse_tensor_invariants():
             self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())

             # Since invariant checks are now enabled by default, an
             # attempt to create an invalid sparse tensor will lead to
             # an exception:
             with self.assertRaisesRegex(RuntimeError, expected_exception_message):
                 create_invalid_tensor()

             # Similarly, when enabling the invariant checks
             # explicitly, invalid sparse tensor construction will lead
             # to an exception:
             with self.assertRaisesRegex(RuntimeError, expected_exception_message):
                 create_invalid_tensor(check_invariants=True)

             # However, invariants check can be disabled via
             # constructor's optional argument so that the invalid
             # tensor is succesfully constructed:
             r = create_invalid_tensor(check_invariants=False)
             self.assertEqual(r.layout, layout)

             # Check that the invariant check flag has been restored
             # when leaving the constructor:
             self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())

         # Double-check restoring the global state when leaving the
         # local context:
         self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())

         # Test nesting of pre-defined context managers
         check_ctx = torch.sparse.check_sparse_tensor_invariants(True)
         no_check_ctx = torch.sparse.check_sparse_tensor_invariants(False)
         with check_ctx:
             self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
             with no_check_ctx:
                 self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())
             self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
         self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())

         # Test an attempt to re-use an activate context manager instance
         check_ctx2 = torch.sparse.check_sparse_tensor_invariants(True)
         with check_ctx:
             self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
             with no_check_ctx:
                 self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())
                 with self.assertRaisesRegex(RuntimeError, "This context manager instance is already activated."
                                             " Use a different context manager instance for context nesting"):
                     with check_ctx:
                         self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
                 self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())
                 with check_ctx2:
                     self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
                 self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())
             self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
         self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())

     def test_generate_simple_inputs(self):
         layouts = [torch.strided, torch.sparse_coo, torch.sparse_csr, torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc]

         tested_combinations = set()
         for tensors in zip(*map(self.generate_simple_inputs, layouts)):
             for i, t in enumerate(tensors):
                 self.assertEqual(t.layout, layouts[i])

                 # all layouts must produce semantically the same tensors
                 self.assertEqual(t, tensors[0])

                 if t.layout is torch.strided:
                     is_hybrid = None
                 else:
                     is_hybrid = t.dense_dim() > 0
                 if t.layout in {torch.sparse_csr, torch.sparse_bsr}:
                     is_batch = t.crow_indices().ndim > 1
                 elif t.layout in {torch.sparse_csc, torch.sparse_bsc}:
                     is_batch = t.ccol_indices().ndim > 1
                 else:
                     is_batch = None
                 if t.layout in {torch.sparse_bsr, torch.sparse_bsc}:
                     blocksize = t.values().shape[1:3]
                     nontrivial_blocksize = 1 not in blocksize
                 else:
                     nontrivial_blocksize = None
                 if t.layout in {torch.sparse_csr, torch.sparse_bsr}:
                     contiguous_indices = t.crow_indices().is_contiguous() and t.col_indices().is_contiguous()
                     contiguous_values = t.values().is_contiguous()
                 elif t.layout in {torch.sparse_csc, torch.sparse_bsc}:
                     contiguous_indices = t.ccol_indices().is_contiguous() and t.row_indices().is_contiguous()
                     contiguous_values = t.values().is_contiguous()
                 elif t.layout is torch.sparse_coo:
                     contiguous_indices = t._indices().is_contiguous()
                     contiguous_values = t._values().is_contiguous()
                 else:
                     contiguous_indices = None
                     contiguous_values = t.is_contiguous()

                 tested_combinations.add((t.layout, is_hybrid, is_batch, nontrivial_blocksize,
                                          contiguous_indices, contiguous_values))

         # Ensure that the inputs generation covers all layout,
         # non-hybrid/hybrid, non-batch/batch, and contiguity
         # combinations:
         untested_combinations = set()
         for layout in layouts:
             for is_hybrid in [False, True]:
                 if layout is torch.strided:
                     is_hybrid = None
                 for is_batch in [False, True]:
                     if layout in {torch.sparse_coo, torch.strided}:
                         is_batch = None
                     for nontrivial_blocksize in [False, True]:
                         if layout not in {torch.sparse_bsr, torch.sparse_bsc}:
                             nontrivial_blocksize = None
                         for contiguous_indices in [False, True]:
                             if layout is torch.strided:
                                 contiguous_indices = None
                             elif not is_batch:
                                 # indices are contiguous per-patch
                                 contiguous_indices = True
                             for contiguous_values in [False, True]:
                                 key = (layout, is_hybrid, is_batch, nontrivial_blocksize,
                                        contiguous_indices, contiguous_values)
                                 if key not in tested_combinations:
                                     untested_combinations.add(
                                         f'layout={layout}, is_hybrid={is_hybrid}, is_batch={is_batch},'
                                         f' nontrivial_blocksize={nontrivial_blocksize},'
                                         f' contiguous_indices{contiguous_indices}, contiguous_values={contiguous_values}')
         assert not untested_combinations, untested_combinations

     @all_sparse_layouts('layout', include_strided=False)
     def test_constructor_autograd(self, device, layout):

         def specific_constructor(*args, **kwargs):
             if layout is torch.sparse_csr:
                 return torch.sparse_csr_tensor(*args, **kwargs)
             elif layout is torch.sparse_csc:
                 return torch.sparse_csc_tensor(*args, **kwargs)
             elif layout is torch.sparse_bsc:
                 return torch.sparse_bsc_tensor(*args, **kwargs)
             elif layout is torch.sparse_bsr:
                 return torch.sparse_bsr_tensor(*args, **kwargs)
             elif layout is torch.sparse_coo:
                 return torch.sparse_coo_tensor(*args, **kwargs)
             else:
                 raise NotImplementedError(layout)

         def generic_constructor(*args, **kwargs):
             if layout in {torch.sparse_csr, torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc}:
                 kwargs.update(layout=layout)
                 return torch.sparse_compressed_tensor(*args, **kwargs)
             elif layout is torch.sparse_coo:
                 return torch.sparse_coo_tensor(*args, **kwargs)
             else:
                 raise NotImplementedError(layout)

         if layout is torch.sparse_coo:
             constructors = (specific_constructor,)
         else:
             constructors = (specific_constructor, generic_constructor)

         for args, kwargs in self.generate_simple_inputs(
                 layout, device=device, dtype=torch.float64,
                 enable_batch=False,  # TODO: remove after gh-104868 is resolved
                 output_tensor=False):
             values_offset = 1 if layout is torch.sparse_coo else 2

             for cnstr in constructors:
                 for requires_grad in (False, True):
                     values = args[values_offset].detach().requires_grad_(requires_grad)
                     args = (*args[:values_offset], values, *args[values_offset + 1:])
                     kwargs_ = dict(kwargs)
                     args_ = args + (kwargs_.pop('size'),)

                     sparse = cnstr(*args, **kwargs)

                     self.assertEqual(sparse.requires_grad, requires_grad)

                     if requires_grad:
                         for masked in (False, True):
                             if layout is torch.sparse_coo:
                                 torch.autograd.gradcheck(
                                     lambda i, v: cnstr(i, v, **kwargs).to_dense(masked_grad=masked),
                                     args, masked=masked)
                                 torch.autograd.gradcheck(
                                     lambda i, v, sz: cnstr(i, v, sz, **kwargs_).to_dense(masked_grad=masked),
                                     args_, masked=masked)
                             else:
                                 if layout in {torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc} and 0:
                                     # TODO: remove this if-block after gh-107370 is resolved
                                     continue
                                 torch.autograd.gradcheck(
                                     lambda ci, pi, v: cnstr(ci, pi, v, **kwargs).to_dense(masked_grad=masked),
                                     args, masked=masked)
                                 torch.autograd.gradcheck(
                                     lambda ci, pi, v, sz: cnstr(ci, pi, v, sz, **kwargs_).to_dense(masked_grad=masked),
                                     args_, masked=masked)

     @all_sparse_layouts('from_layout', include_strided=False)
     @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
     @parametrize("index_dtype", [torch.int32, torch.int64])
     def test_to_dense(self, from_layout, device, dtype, index_dtype):
         """
         This test tests conversion from any layout to strided layout.
         """
         for t in self.generate_simple_inputs(
                 from_layout, device=device, dtype=dtype, index_dtype=index_dtype):
             r = t.to_dense()
             self.assertEqual(r.layout, torch.strided)
             self.assertEqual(r, t)

     @all_sparse_layouts('from_layout', include_strided=False)
     @dtypes(torch.float64, torch.complex128)
     @parametrize("index_dtype", [torch.int64])
     @gradcheck_semantics()
     def test_gradcheck_to_dense(self, from_layout, device, dtype, index_dtype, gradcheck):
         for t in self.generate_simple_inputs(
                 from_layout, device=device, dtype=dtype, index_dtype=index_dtype):
             batch_dim = t.dim() - t.dense_dim() - t.sparse_dim()
             if batch_dim > 0:
                 # TODO: implement batch support in _convert_indices_from_csr_to_coo
                 continue
             t = t.clone().detach().requires_grad_(True)
             r = gradcheck(lambda x: torch.Tensor.to_dense(x, masked_grad=gradcheck.masked), t)
             self.assertTrue(r)

     @all_sparse_layouts('from_layout', include_strided=True)
     @all_sparse_layouts('to_layout', include_strided=False)
     @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
     @parametrize("index_dtype", [torch.int32, torch.int64])
     def test_to_sparse(self, from_layout, to_layout, device, dtype, index_dtype):
         """
         This test tests conversion from any layout to any sparse layout.
         """
         for t in self.generate_simple_inputs(
                 from_layout, device=device, dtype=dtype, index_dtype=index_dtype,
                 enable_hybrid=(
                     # TODO: to support conversion strided->hybrid
                     # CSR/CSC/BSR/BSC, to_sparse() requires extra keyword
                     # argument, either nof_batch_dims or
                     # nof_dense_dims
                     not (from_layout is torch.strided and to_layout in
                          {torch.sparse_bsr, torch.sparse_bsc, torch.sparse_csr, torch.sparse_csc}))):

             if to_layout in {torch.sparse_bsr, torch.sparse_bsc}:
                 if from_layout == torch.sparse_bsr:
                     batch_ndim = t.crow_indices().dim() - 1
                     blocksize = t.values().shape[batch_ndim + 1:batch_ndim + 3]
                 elif from_layout == torch.sparse_bsc:
                     batch_ndim = t.ccol_indices().dim() - 1
                     blocksize = t.values().shape[batch_ndim + 1:batch_ndim + 3]
                 else:
                     blocksize = (1, 1)
             else:
                 blocksize = None

             if from_layout is torch.strided:
                 is_batch = None
                 is_hybrid = None
             else:
                 is_batch = t.dim() > (t.sparse_dim() + t.dense_dim())
                 is_hybrid = t.dense_dim() > 0

             def explicit_to_sparse(x):
                 # Used to check that the explicit conversion methods
                 # are consistent with the `to_sparse(*, layout,
                 # blocksize)` method.
                 if to_layout is torch.sparse_coo:
                     return x.to_sparse_coo()
                 elif to_layout is torch.sparse_csr:
                     return x.to_sparse_csr()
                 elif to_layout is torch.sparse_csc:
                     return x.to_sparse_csc()
                 elif to_layout is torch.sparse_bsr:
                     return x.to_sparse_bsr(blocksize)
                 elif to_layout is torch.sparse_bsc:
                     return x.to_sparse_bsc(blocksize)
                 else:
                     assert 0  # unreachable

             # TODO: The following exception cases all correspond to
             # not implemented conversions
             if from_layout in {
                     torch.sparse_csr, torch.sparse_csc} and to_layout in {torch.sparse_bsr, torch.sparse_bsc} and is_batch:
                 with self.assertRaisesRegex(
                         RuntimeError,
                         r"conversion from Sparse(Csr|Csc) to Sparse(Bsr|Bsc) for batched inputs is not supported"):
                     t.to_sparse(layout=to_layout, blocksize=blocksize)
                 with self.assertRaisesRegex(
                         RuntimeError,
                         r"conversion from Sparse(Csr|Csc) to Sparse(Bsr|Bsc) for batched inputs is not supported"):
                     explicit_to_sparse(t)
                 continue
             elif from_layout is torch.sparse_coo and to_layout in {
                     torch.sparse_csr, torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc} and t.sparse_dim() != 2:
                 with self.assertRaisesRegex(
                         RuntimeError,
                         r"conversion from Sparse to .* for input tensors with sparse_dim\(\)!=2 is not supported"):
                     t.to_sparse(layout=to_layout, blocksize=blocksize)
                 with self.assertRaisesRegex(
                         RuntimeError,
                         r"conversion from Sparse to .* for input tensors with sparse_dim\(\)!=2 is not supported"):
                     explicit_to_sparse(t)
                 continue
             elif (from_layout, to_layout) in {(torch.sparse_bsc, torch.sparse_csr), (torch.sparse_bsc, torch.sparse_csc),
                                               (torch.sparse_bsr, torch.sparse_csr), (torch.sparse_bsr, torch.sparse_csc)}:
                 with self.assertRaisesRegex(
                         RuntimeError,
                         r"sparse_compressed_to_sparse_(csr|csc|bsr|bsc): expected\s*(Sparse(Csc|Csr)[,]|)\s*Sparse(Csr|Bsr)"
                         " or Sparse(Csc|Bsc) layout but got Sparse(Csr|Csc|Bsr|Bsc)"):
                     t.to_sparse(layout=to_layout, blocksize=blocksize)
                 with self.assertRaisesRegex(
                         RuntimeError,
                         r"sparse_compressed_to_sparse_(csr|csc|bsr|bsc): expected\s*(Sparse(Csc|Csr)[,]|)\s*Sparse(Csr|Bsr)"
                         " or Sparse(Csc|Bsc) layout but got Sparse(Csr|Csc|Bsr|Bsc)"):
                     explicit_to_sparse(t)
                 self.skipTest('NOT IMPL')
             else:
                 r = t.to_sparse(layout=to_layout, blocksize=blocksize)

                 self.assertEqual(r.layout, to_layout)

                 # to_sparse method uses unsafe construction of sparse
                 # tensors. Here we explicitly validate the results to
                 # make sure that the sparse tensors are consistent
                 # with the corresponding sparse tensor invariants.
                 if r.layout in {torch.sparse_csr, torch.sparse_bsr, torch.sparse_csc, torch.sparse_bsc}:
                     if r.layout in {torch.sparse_csr, torch.sparse_bsr}:
                         compressed_indices, plain_indices = r.crow_indices(), r.col_indices()
                     else:
                         compressed_indices, plain_indices = r.ccol_indices(), r.row_indices()
                     torch._validate_sparse_compressed_tensor_args(compressed_indices, plain_indices, r.values(),
                                                                   r.shape, r.layout)
                     if from_layout in {torch.strided, torch.sparse_coo}:
                         self.assertEqual(compressed_indices.dtype, torch.int64)
                         self.assertEqual(plain_indices.dtype, torch.int64)
                     else:
                         self.assertEqual(compressed_indices.dtype, index_dtype)
                         self.assertEqual(plain_indices.dtype, index_dtype)
                     self.assertEqual(r.values().dtype, dtype)
                 elif r.layout is torch.sparse_coo:
                     if t.layout is torch.sparse_coo:
                         self.assertEqual(t.is_coalesced(), r.is_coalesced())

                     # Check r is truly coalesced when r.is_coalesced == True
                     if r.is_coalesced():
                         self.assertTrue(is_coalesced_indices(r))

                     torch._validate_sparse_coo_tensor_args(r._indices(), r._values(), r.shape)
                     self.assertEqual(r._indices().dtype, torch.int64)
                     self.assertEqual(r._values().dtype, dtype)
                 else:
                     assert 0  # unreachable

                 # Finally, we'll test tensor equality:
                 self.assertEqual(r, t)

                 # Also, check consistency with explicit conversion methods:
                 r2 = explicit_to_sparse(t)
                 self.assertEqual(r2, r)

                 # Check inverse conversion from sparse compressed block tensors
                 if from_layout == torch.sparse_bsr:
                     batch_ndim = t.crow_indices().dim() - 1
                     from_blocksize = t.values().shape[batch_ndim + 1:batch_ndim + 3]
                 elif from_layout == torch.sparse_bsc:
                     batch_ndim = t.ccol_indices().dim() - 1
                     from_blocksize = t.values().shape[batch_ndim + 1:batch_ndim + 3]
                 else:
                     continue
                 if r.ndim != 2:
                     continue

                 t2 = r.to_sparse(layout=from_layout, blocksize=from_blocksize)
                 self.assertEqual(t2, t)

         # extra tests
         if (from_layout, to_layout) == (torch.sparse_csr, torch.sparse_bsr):
             # See gh-90910
             t = torch.tensor([[0, 0, 1, 0], [0, 1, 0, 0]], dtype=dtype, device=device).to_sparse_csr()
             r = t.to_sparse_bsr((2, 2))
             torch._validate_sparse_compressed_tensor_args(r.crow_indices(), r.col_indices(), r.values(), r.shape, r.layout)
             self.assertEqual(r, t)

         if (from_layout, to_layout) in {(torch.sparse_csr, torch.sparse_csc),
                                         (torch.sparse_csc, torch.sparse_csr)}:
             # See gh-91007
             compressed_indices = torch.tensor([0, 4, 8, 8, 12, 16, 20], dtype=index_dtype, device=device)
             plain_indices = torch.tensor([0, 1, 2, 3] * 5, dtype=index_dtype, device=device)
             t = torch.sparse_compressed_tensor(compressed_indices, plain_indices, range(20),
                                                dtype=dtype, device=device, layout=from_layout)
             r = t.to_sparse(layout=to_layout)
             if r.layout in {torch.sparse_csr, torch.sparse_bsr}:
                 compressed_indices, plain_indices = r.crow_indices(), r.col_indices()
             else:
                 compressed_indices, plain_indices = r.ccol_indices(), r.row_indices()
             torch._validate_sparse_compressed_tensor_args(compressed_indices, plain_indices, r.values(), r.shape, r.layout)
             self.assertEqual(r, t)

     @onlyNativeDeviceTypes
     @suppress_warnings
     @ops(reduction_ops_with_sparse_support)
     @precisionOverride({torch.bfloat16: 5e-4, torch.float16: 5e-3})
     @all_sparse_layouts('layout', include_strided=False)
     def test_reductions(self, layout, device, dtype, op):
         count = 0
         for sample in op.sample_inputs_sparse(layout, device, dtype):
             count += 1

             t_inp, t_args, t_kwargs = sample.input, sample.args, sample.kwargs
             result = op.op(t_inp, *t_args, **t_kwargs)

             #  Checking invariant rop(inp, ...).to_dense() == rop(inp.to_dense(), ...)
             dense = op.op(t_inp.to_dense(), *t_args, **t_kwargs)
             self.assertEqual(result, dense)

         if count == 0:
             # we count samples to avoid false-positive test reports
             self.skipTest('no sample inputs')

     @onlyNativeDeviceTypes
     @suppress_warnings
     @ops(reduction_ops_with_sparse_support, allowed_dtypes=(torch.float32, torch.float64, torch.complex64, torch.complex128))
     @all_sparse_layouts('layout', include_strided=False)
     def test_reductions_backward(self, layout, device, dtype, op):
         count = 0
         for sample in op.sample_inputs_sparse(layout, device, dtype, requires_grad=True):
             t_inp, t_args, t_kwargs = sample.input, sample.args, sample.kwargs
             r = op.op(t_inp, *t_args, **t_kwargs)
             if r.numel() != 0:
                 r = r.sum()

             if op.name == 'sum':
                 count += 1
                 r.abs().backward()
                 self.assertEqual(t_inp.grad, torch.ones(t_inp.shape, dtype=dtype, device=device) * torch.sgn(r))
             else:
                 self.skipTest('NOT IMPL')

         if count == 0:
             # we count samples to avoid false-positive test reports
             self.skipTest('no sample inputs')

     @onlyNativeDeviceTypes
     @suppress_warnings
     @parametrize("mth", [subtest(mth, name=mth.__name__)
                          for mth in [torch.Tensor.is_coalesced,
                                      torch.Tensor.coalesce,
                                      torch.Tensor.indices,
                                      torch.Tensor.values,
                                      torch.Tensor.crow_indices,
                                      torch.Tensor.col_indices,
                                      torch.Tensor.ccol_indices,
                                      torch.Tensor.row_indices,
                                      ]])
     @all_sparse_layouts('layout', include_strided=True)
     def test_unsupported_backend_error_message(self, mth, layout, device):
         inp = torch.tensor([[1, 2], [3, 4]], device=device).to_sparse(
             layout=layout,
             blocksize=(1, 1) if layout in {torch.sparse_bsr, torch.sparse_bsc} else None)
         assert inp.layout is layout

         expected_behaviour = dict(
             # <mth name> = (<supported layouts>, <exception message on other layouts>)
             is_coalesced=({torch.sparse_coo},
                           "is_coalesced expected sparse coordinate tensor layout but got (Sparse(Csr|Csc|Bsr|Bsc)|Strided)"),
             coalesce=({torch.sparse_coo},
                       "coalesce expected sparse coordinate tensor layout but got (Sparse(Csr|Csc|Bsr|Bsc)|Strided)"),
             indices=({torch.sparse_coo},
                      "indices expected sparse coordinate tensor layout but got (Sparse(Csr|Csc|Bsr|Bsc)|Strided)"),
             values=({torch.sparse_coo, torch.sparse_csr, torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc},
                     "values expected sparse tensor layout but got Strided"),
             crow_indices=({torch.sparse_csr, torch.sparse_bsr},
                           "crow_indices expected sparse row compressed tensor layout but got (Sparse(Csc|Bsc|)|Strided)"),
             col_indices=({torch.sparse_csr, torch.sparse_bsr},
                          "col_indices expected sparse row compressed tensor layout but got (Sparse(Csc|Bsc|)|Strided)"),
             ccol_indices=({torch.sparse_csc, torch.sparse_bsc},
                           "ccol_indices expected sparse column compressed tensor layout but got (Sparse(Csr|Bsr|)|Strided)"),
             row_indices=({torch.sparse_csc, torch.sparse_bsc},
                          "row_indices expected sparse column compressed tensor layout but got (Sparse(Csr|Bsr|)|Strided)"),
         )[mth.__name__]

         if layout in expected_behaviour[0]:
             mth(inp)
         else:
             with self.assertRaisesRegex(RuntimeError, expected_behaviour[1]):
                 mth(inp)

     @onlyNativeDeviceTypes
     @all_sparse_layouts('layout', include_strided=not True)
     @dtypes(torch.float64, torch.cdouble)
     @parametrize("masked", [subtest(False, name='sparse'), subtest(True, name='masked')])
     @parametrize("fast_mode", [subtest(False, name='slow'), subtest(True, name='fast')])
     def test_gradcheck_mm(self, layout, dtype, device, masked, fast_mode):
         # This function does not check the following cases:
         # - batch or hybrid tensors because addmm does not support
         #   such inputs yet
         # - check_forward_ad=True because of the lack of sparse tensor
         #   support in aten::view_as_real, torch._VF._make_dual, etc.

         ref_x = torch.tensor([[1, 2, 0, 0],
                               [0, 6, 0, 0],
                               [0, 0, 0, 0],
                               [13, 14, 0, 15]], dtype=dtype, device=device)
         ref_y = torch.tensor([[11, 12, 13, 14],
                               [21, 22, 23, 24],
                               [31, 32, 33, 34],
                               [41, 42, 43, 44]],
                              dtype=dtype, device=device)

         mm = torch.sparse.mm if masked else torch.mm

         blocksize = (2, 2) if layout in {torch.sparse_bsr, torch.sparse_bsc} else None
         x = ref_x.to_sparse(layout=layout, blocksize=blocksize).requires_grad_(True)
         y = ref_y.requires_grad_(True)

         if layout is torch.sparse_bsr and not masked or layout is torch.sparse_bsc:
             with self.assertRaisesRegex(
                     RuntimeError,
                     r"addmm: computation on (CPU|CUDA) is not implemented for Strided \+ Sparse(Bsr|Bsc) @ Strided"):
                 torch.autograd.gradcheck(mm, (x, y), fast_mode=fast_mode, masked=masked)
             self.skipTest('NOT IMPL')
         elif layout in {torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc} and masked:
             with self.assertRaisesRegex(
                     RuntimeError,
                     r"(sparse_addmm_sparse_backward: unsupported combination of layouts,"
                     r" grad: Strided, mat1: Sparse(Csc|Bsr|Bsc), mat2: Strided"
                     r"|addmm: computation on (CPU|CUDA) is not implemented for "
                     r"Strided \+ Sparse(Csc|Bsr|Bsc) @ Strided without MKL)"):
                 torch.autograd.gradcheck(mm, (x, y), fast_mode=fast_mode, masked=masked)
             self.skipTest('NOT IMPL')
         else:
             torch.autograd.gradcheck(mm, (x, y), fast_mode=fast_mode, masked=masked)

     @onlyNativeDeviceTypes
     @suppress_warnings
     @ops(binary_ufuncs_with_sparse_support)
     @all_sparse_layouts('layout', include_strided=False)
     def test_binary_operation(self, layout, device, dtype, op):
         if not op.supports_sparse_layout(layout):
             self.skipTest(f'{layout} is not supported in `{op.name}` OpInfo definition. Skipping!')

         for sample in op.sample_inputs_sparse(layout, device, dtype):
             if validate_sample_input_sparse(op, sample, check_validate=False) is not sample:
                 # that is, the validation returns the sparse sample
                 # wrapped within ErrorInput instance
                 continue
             t_inp, t_args, t_kwargs = sample.input, sample.args, sample.kwargs
             batch_dim = t_inp.dim() - t_inp.dense_dim() - t_inp.sparse_dim()
             result = op.op(t_inp, *t_args, **t_kwargs)

             # Check rop(inp, ...).shape == inp.shape
             self.assertEqual(result.shape, t_inp.shape)

             # Check rop(inp, ...).sparse_dim() == inp.sparse_dim()
             self.assertEqual(result.sparse_dim(), t_inp.sparse_dim())

             # Check rop(inp, ...).dense_dim() == inp.dense_dim()
             self.assertEqual(result.dense_dim(), t_inp.dense_dim())

             # Check invariant rop(inp, ...).to_dense() == rop(inp.to_dense(), ...)
             try:
                 dense = op.op(t_inp.to_dense(), *(t_args[0].to_dense(), *t_args[1:]), **t_kwargs)
             except Exception as msg:
                 # this is strided op issue, so skipping the sample silently here
                 if "\"cpublas_axpy_impl\" not implemented for 'ComplexHalf'" in str(msg):
                     continue
                 raise
             self.assertEqual(result, dense)

     @onlyCPU
     @all_sparse_layouts('layout', include_strided=True)
     @dtypes(torch.double)
     def test_to_sparse_identity(self, device, layout, dtype):
         for dense_dim in range(4):
             x_dense = torch.eye(dense_dim, dtype=dtype, device=device)
             for sparse_dim_in in range(1, dense_dim):
                 x_sparse = x_dense.to_sparse(sparse_dim_in)
                 for sparse_dim_out in range(0, dense_dim):
                     if sparse_dim_out == sparse_dim_in:
                         self.assertTrue(x_sparse.to_sparse(sparse_dim_out).sparse_dim() == sparse_dim_out)
                     else:
                         with self.assertRaisesRegex(
                                 RuntimeError,
                                 r"to_sparse: conversion from Sparse to Sparse with sparse_dim argument !=self.sparse_dim\(\)"
                                 " is not supported"):
                             x_sparse.to_sparse(sparse_dim_out)


     @onlyNativeDeviceTypes
     @suppress_warnings
     @ops(like_fns_with_sparse_support)
     @all_sparse_layouts('layout', include_strided=False)
     def test_like_fns(self, layout, device, dtype, op):

         for sample in op.sample_inputs_sparse(layout, device, dtype):
             t_inp, t_args, t_kwargs = sample.input, sample.args, sample.kwargs
             batch_dim = t_inp.dim() - t_inp.dense_dim() - t_inp.sparse_dim()
             if t_inp.layout in {torch.sparse_bsr, torch.sparse_bsc}:
                 expected_blocksize = t_inp.values().shape[batch_dim + 1:batch_dim + 3]
             else:
                 expected_blocksize = None
             expected_dtype = t_kwargs.get('dtype', dtype)
             expected_device = torch.device(t_kwargs.get('device', device))
             expected_layout = t_kwargs.get('layout', layout)

             result = op.op(t_inp, *t_args, **t_kwargs)

             self.assertEqual(result.dtype, expected_dtype)
             self.assertEqual(result.device.type, expected_device.type)
             self.assertEqual(result.layout, expected_layout)

             if result.layout in {torch.sparse_bsr, torch.sparse_bsc}:
                 result_batch_dim = result.dim() - result.dense_dim() - result.sparse_dim()
                 blocksize = result.values().shape[result_batch_dim + 1:result_batch_dim + 3]
                 self.assertEqual(blocksize, expected_blocksize)

             # Check op(inp).shape == inp.shape
             self.assertEqual(result.shape, t_inp.shape)

             if expected_layout is torch.strided:
                 self.assertEqual(result.sparse_dim(), 0)
                 # Check op(inp, layout=torch.strided).dense_dim() == inp.dim()
                 self.assertEqual(result.dense_dim(), t_inp.dim())
             elif expected_layout is torch.sparse_coo:
                 # Check op(inp, layout=torch.sparse_coo).sparse_dim() == batch_dim + inp.sparse_dim()
                 self.assertEqual(result.sparse_dim(), batch_dim + t_inp.sparse_dim())
                 # Check op(inp, layout=torch.sparse_coo).dense_dim() == inp.dense_dim()
                 self.assertEqual(result.dense_dim(), t_inp.dense_dim())

                 torch._validate_sparse_coo_tensor_args(result._indices(), result._values(), result.shape)
             else:
                 # Check op(inp).sparse_dim() == inp.sparse_dim()
                 self.assertEqual(result.sparse_dim(), t_inp.sparse_dim())
                 # Check op(inp).dense_dim() == inp.dense_dim()
                 self.assertEqual(result.dense_dim(), t_inp.dense_dim())

                 if result.layout in {torch.sparse_csr, torch.sparse_bsr}:
                     compressed_indices, plain_indices = result.crow_indices(), result.col_indices()
                 else:
                     compressed_indices, plain_indices = result.ccol_indices(), result.row_indices()

                 torch._validate_sparse_compressed_tensor_args(compressed_indices, plain_indices, result.values(),
                                                               result.shape, result.layout)

     @all_sparse_layouts('mask_layout', include_strided=False)
     @onlyNativeDeviceTypes
     @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
     def test_sparse_mask(self, mask_layout, device, dtype):
         input_layout = torch.strided
         mask_dtype = torch.bool
         for mask in self.generate_simple_inputs(mask_layout, dtype=mask_dtype, device=device,
                                                 enable_hybrid=False, enable_batch=False):

             x = make_tensor(mask.shape, dtype=dtype, device=device).to_sparse(layout=input_layout)

             result = x.sparse_mask(mask)

             # Check invariant `x.sparse_mask(mask).<indices> == mask.<indices>`
             if mask_layout is torch.sparse_coo:
                 self.assertEqual(result._indices(), mask._indices())
                 ones = torch.sparse_coo_tensor(mask._indices(),
                                                torch.ones_like(mask._values(), dtype=x.dtype),
                                                mask.shape,
                                                is_coalesced=mask.is_coalesced())
             elif mask_layout in {torch.sparse_csr, torch.sparse_bsr}:
                 self.assertEqual(result.crow_indices(), mask.crow_indices())
                 self.assertEqual(result.col_indices(), mask.col_indices())
                 ones = torch.sparse_compressed_tensor(mask.crow_indices(), mask.col_indices(),
                                                       torch.ones_like(mask.values(), dtype=x.dtype),
                                                       mask.shape, layout=mask.layout)
             else:
                 self.assertEqual(result.ccol_indices(), mask.ccol_indices())
                 self.assertEqual(result.row_indices(), mask.row_indices())
                 ones = torch.sparse_compressed_tensor(mask.ccol_indices(), mask.row_indices(),
                                                       torch.ones_like(mask.values(), dtype=x.dtype),
                                                       mask.shape, layout=mask.layout)

             # Check invariant:
             #  x.sparse_mask(mask).to_dense() == x.mul(sparse_xyz_tensor(<mask indices>,
             #                                          ones_like(<mask values>)).to_dense())
             expected = x.mul(ones.to_dense())

             self.assertEqual(result.to_dense(), expected)

             # Check invariant `mask.to_dense().sparse_mask(mask) == mask`
             result = mask.to_dense().sparse_mask(mask)
             self.assertEqual(result, mask)

     @all_sparse_layouts('layout', include_strided=False)
     @parametrize("masked", [subtest(False, name='nonmasked'), subtest(True, name='masked')])
     @parametrize("fast_mode", [subtest(False, name='slow'), subtest(True, name='fast')])
     def test_as_sparse_gradcheck(self, layout, device, masked, fast_mode):
         gradcheck = torch.sparse.as_sparse_gradcheck(torch.autograd.gradcheck)
         sparse_compressed_layouts = {torch.sparse_csr, torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc}

         def identity(x):
             return x

         for func in (torch.Tensor.to_dense,
                      torch.Tensor.sum,
                      identity,
                      torch.Tensor.to_sparse,
                      torch.Tensor.values,
                      ):
             for x in self.generate_simple_inputs(
                     layout,
                     device=device,
                     dtype=torch.float64,
                     # TODO: fix gh-104868  to enable batched samples:
                     enable_batch=layout not in sparse_compressed_layouts,
                     enable_hybrid=not (
                         layout in sparse_compressed_layouts and (
                             # FIXME: RuntimeError: sparse_mask(): the
                             # number of sparse dimensions in `self`
                             # should match that of the `mask`. Got
                             # `self.sparse_dim() == 3` !=
                             # `mask.sparse_dim() == 2
                             func.__name__ == 'sum'
                             # FIXME: RuntimeError: expected
                             # col_indices to be a contiguous tensor
                             # per batch
                             or func.__name__ == 'to_sparse'
                         ))):
                 if layout is torch.sparse_coo and func.__name__ == 'values':
                     x = x.coalesce()

                 gradcheck(func, x.requires_grad_(True), masked=masked, fast_mode=fast_mode)

     @onlyCPU
     @all_sparse_layouts('layout', include_strided=False)
     @dtypes(torch.double)
     def test_dataloader(self, device, layout, dtype):

         data = list(self.generate_simple_inputs(layout, device=device, dtype=dtype))

         dataset = _SparseDataset(data)
         loader = torch.utils.data.DataLoader(dataset, batch_size=None, num_workers=2)

         loaded_data = list(loader)
         self.assertEqual(data, loaded_data)

     @onlyCPU
     def test_invalid_blocksize(self):
         # Blocksize should be a tuple/list/torch.Size containing two values
         with self.assertRaisesRegex(RuntimeError, ".*blocksize.*, but got 1"):
             torch.randn(1).to_sparse(blocksize=(1,))
         with self.assertRaisesRegex(RuntimeError, ".*blocksize.*, but got 1"):
             torch.randn(1).to_sparse(blocksize=[1])
         with self.assertRaisesRegex(RuntimeError, ".*blocksize.*, but got 1"):
             torch.randn(1).to_sparse(blocksize=torch.Size((1,)))
         with self.assertRaisesRegex(RuntimeError, ".*blocksize.*, but got 3"):
             torch.randn(1).to_sparse(blocksize=(1, 1, 1))
         with self.assertRaisesRegex(RuntimeError, ".*blocksize.*, but got 3"):
             torch.randn(1).to_sparse(blocksize=[1, 1, 1])
         with self.assertRaisesRegex(RuntimeError, ".*blocksize.*, but got 3"):
             torch.randn(1).to_sparse(blocksize=torch.Size((1, 1, 1)))

     @unittest.skipIf(not torch.cuda.is_available(), 'requires cuda')
     @onlyCPU
     @all_sparse_layouts('layout', include_strided=True)
     def test_constructor_pin_memory(self, device, layout):
         """Tests sparse_xyz_tensor(indices, values, pin_memory=True)
         """
         self.assertEqual(device, "cpu")
         for t in self.generate_simple_inputs(
                 layout, device=device, dtype=torch.float64,
                 enable_zero_sized=False,  # pinning zero-sized tensors is a no-op
                 pin_memory=True,
                 enable_batch=False,  # TODO: remove after gh-104868 is resolved
         ):
             if layout is torch.sparse_coo:
                 self.assertTrue(t._indices().is_pinned())
                 self.assertTrue(t._values().is_pinned())
             elif layout in {torch.sparse_csr, torch.sparse_bsr}:
                 self.assertTrue(t.crow_indices().is_pinned())
                 self.assertTrue(t.col_indices().is_pinned())
                 self.assertTrue(t.values().is_pinned())
             elif layout in {torch.sparse_csc, torch.sparse_bsc}:
                 self.assertTrue(t.ccol_indices().is_pinned())
                 self.assertTrue(t.row_indices().is_pinned())
                 self.assertTrue(t.values().is_pinned())
             elif layout is torch.strided:
                 pass
             else:
                 assert 0  # unreachable
             self.assertTrue(t.is_pinned())

     @unittest.skipIf(not torch.cuda.is_available(), 'requires cuda')
     @onlyCPU
     @all_sparse_layouts('layout', include_strided=True)
     def test_method_pin_memory(self, device, layout):
         """Tests sparse_xyz_tensor(indices, values, pin_memory=False).pin_memory()
         """

         for t_ in self.generate_simple_inputs(
                 layout, device=device, dtype=torch.float64,
                 enable_zero_sized=False,  # pinning zero-sized tensors is a no-op
                 pin_memory=False,         # no pinning
                 enable_batch=False,  # TODO: remove after gh-104868 is resolved
         ):
             t = t_.pin_memory()
             self.assertTrue(t.is_pinned())

             # registering a non-pinned tensor with CUDA memory is a
             # clone operation
             self.assertFalse(t_.is_pinned())

             # registering already pinned tensor with CUDA memory is an
             # identity operation:
             t2 = t.pin_memory()
             self.assertTrue(t2 is t)

             if layout is torch.sparse_coo:
                 self.assertTrue(t._indices().is_pinned())
                 self.assertTrue(t._values().is_pinned())
                 self.assertFalse(t_._indices().is_pinned())
                 self.assertFalse(t_._values().is_pinned())
             elif layout in {torch.sparse_csr, torch.sparse_bsr}:
                 self.assertTrue(t.crow_indices().is_pinned())
                 self.assertTrue(t.col_indices().is_pinned())
                 self.assertTrue(t.values().is_pinned())
                 self.assertFalse(t_.crow_indices().is_pinned())
                 self.assertFalse(t_.col_indices().is_pinned())
                 self.assertFalse(t_.values().is_pinned())
             elif layout in {torch.sparse_csc, torch.sparse_bsc}:
                 self.assertTrue(t.ccol_indices().is_pinned())
                 self.assertTrue(t.row_indices().is_pinned())
                 self.assertTrue(t.values().is_pinned())
                 self.assertFalse(t_.ccol_indices().is_pinned())
                 self.assertFalse(t_.row_indices().is_pinned())
                 self.assertFalse(t_.values().is_pinned())
             elif layout is torch.strided:
                 pass
             else:
                 assert 0  # unreachable


     @unittest.skipIf(not torch.cuda.is_available(), 'requires cuda')
     @onlyCPU
     @all_sparse_layouts('layout', include_strided=True)
     def test_constructor_pinned_memory(self, device, layout):
         """Tests sparse_xyz_tensor(indices.pin_memory(device), values.pin_memory(device))
         """
         pin_memory_device = "cuda"
         for t in self.generate_simple_inputs(
                 layout, device=device, dtype=torch.float64,
                 enable_zero_sized=False,     # pinning zero-sized tensors is a no-op
                 pin_memory=None,             # constructor does not specify pin_memory=...
                 members_pin_memory=True,     # indices and values are pinned
                 enable_batch=False,          # TODO: remove after gh-104868 is resolved
         ):
             if layout is torch.sparse_coo:
                 self.assertTrue(t._indices().is_pinned())
                 self.assertTrue(t._values().is_pinned())
             elif layout in {torch.sparse_csr, torch.sparse_bsr}:
                 self.assertTrue(t.crow_indices().is_pinned())
                 self.assertTrue(t.col_indices().is_pinned())
                 self.assertTrue(t.values().is_pinned())
             elif layout in {torch.sparse_csc, torch.sparse_bsc}:
                 self.assertTrue(t.ccol_indices().is_pinned())
                 self.assertTrue(t.row_indices().is_pinned())
                 self.assertTrue(t.values().is_pinned())
             elif layout is torch.strided:
                 pass
             else:
                 assert 0  # unreachable
             self.assertTrue(t.is_pinned())

     @unittest.skipIf(not torch.cuda.is_available(), 'requires cuda')
     @onlyCPU
     @all_sparse_layouts('layout', include_strided=False)
     def test_constructor_mismatched_pinned_memory(self, device, layout):
         """Test the failure to construct sparse tensor from indices and values
         that have different pinning states.
         """
         def generic_constructor(*args, **kwargs):
             if layout in {torch.sparse_csr, torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc}:
                 kwargs.update(layout=layout)
                 return torch.sparse_compressed_tensor(*args, **kwargs)
             elif layout is torch.sparse_coo:
                 return torch.sparse_coo_tensor(*args, **kwargs)
             else:
                 raise NotImplementedError(layout)

         for args, kwargs in self.generate_simple_inputs(
                 layout, device=device, dtype=torch.float64,
                 enable_zero_sized=False,     # pinning zero-sized tensors is a no-op
                 enable_batch=False,  # TODO: remove after gh-104868 is resolved
                 output_tensor=False):

             # indices are pinned, values is a non-pinned tensor
             args1 = (args[0].pin_memory(), *args[1:])

             # indices are non-pinned, values is a pinned tensor
             args2 = (*args[:-1], args[-1].pin_memory())

             with self.assertRaisesRegex(
                     RuntimeError, r"memory pinning of \w*indices \(=1\) must match memory pinning of values \(=0\)"):
                 generic_constructor(*args1, **kwargs)

             with self.assertRaisesRegex(
                     RuntimeError, r"memory pinning of \w*indices \(=0\) must match memory pinning of values \(=1\)"):
                 generic_constructor(*args2, **kwargs)


 # e.g., TestSparseUnaryUfuncsCPU and TestSparseUnaryUfuncsCUDA
 instantiate_device_type_tests(TestSparseUnaryUfuncs, globals(), except_for='meta')

 instantiate_device_type_tests(TestSparseMaskedReductions, globals(), except_for='meta')

 # e.g., TestSparseCPU and TestSparseCUDA
 instantiate_device_type_tests(TestSparse, globals(), except_for='meta')

 instantiate_device_type_tests(TestSparseAny, globals(), except_for='meta')

 instantiate_parametrized_tests(TestSparseMeta)

 instantiate_parametrized_tests(TestSparseLegacyAndDeprecation)

 if __name__ == '__main__':
     run_tests()