| # Owner(s): ["module: unknown"] |
| |
| import unittest |
| |
| import torch |
| import torch.testing._internal.common_utils as common |
| from torch.testing._internal.common_cuda import ( |
| TEST_CUDA, |
| TEST_MULTIGPU, |
| TEST_NUMBA_CUDA, |
| ) |
| from torch.testing._internal.common_utils import TEST_NUMPY |
| |
| |
| if TEST_NUMPY: |
| import numpy |
| |
| if TEST_NUMBA_CUDA: |
| import numba.cuda |
| |
| |
| class TestNumbaIntegration(common.TestCase): |
| @unittest.skipIf(not TEST_NUMPY, "No numpy") |
| @unittest.skipIf(not TEST_CUDA, "No cuda") |
| def test_cuda_array_interface(self): |
| """torch.Tensor exposes __cuda_array_interface__ for cuda tensors. |
| |
| An object t is considered a cuda-tensor if: |
| hasattr(t, '__cuda_array_interface__') |
| |
| A cuda-tensor provides a tensor description dict: |
| shape: (integer, ...) Tensor shape. |
| strides: (integer, ...) Tensor strides, in bytes. |
| typestr: (str) A numpy-style typestr. |
| data: (int, boolean) A (data_ptr, read-only) tuple. |
| version: (int) Version 0 |
| |
| See: |
| https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html |
| """ |
| |
| types = [ |
| torch.DoubleTensor, |
| torch.FloatTensor, |
| torch.HalfTensor, |
| torch.LongTensor, |
| torch.IntTensor, |
| torch.ShortTensor, |
| torch.CharTensor, |
| torch.ByteTensor, |
| ] |
| dtypes = [ |
| numpy.float64, |
| numpy.float32, |
| numpy.float16, |
| numpy.int64, |
| numpy.int32, |
| numpy.int16, |
| numpy.int8, |
| numpy.uint8, |
| ] |
| for tp, npt in zip(types, dtypes): |
| # CPU tensors do not implement the interface. |
| cput = tp(10) |
| |
| self.assertFalse(hasattr(cput, "__cuda_array_interface__")) |
| self.assertRaises(AttributeError, lambda: cput.__cuda_array_interface__) |
| |
| # Sparse CPU/CUDA tensors do not implement the interface |
| if tp not in (torch.HalfTensor,): |
| indices_t = torch.empty(1, cput.size(0), dtype=torch.long).clamp_(min=0) |
| sparse_t = torch.sparse_coo_tensor(indices_t, cput) |
| |
| self.assertFalse(hasattr(sparse_t, "__cuda_array_interface__")) |
| self.assertRaises( |
| AttributeError, lambda: sparse_t.__cuda_array_interface__ |
| ) |
| |
| sparse_cuda_t = torch.sparse_coo_tensor(indices_t, cput).cuda() |
| |
| self.assertFalse(hasattr(sparse_cuda_t, "__cuda_array_interface__")) |
| self.assertRaises( |
| AttributeError, lambda: sparse_cuda_t.__cuda_array_interface__ |
| ) |
| |
| # CUDA tensors have the attribute and v2 interface |
| cudat = tp(10).cuda() |
| |
| self.assertTrue(hasattr(cudat, "__cuda_array_interface__")) |
| |
| ar_dict = cudat.__cuda_array_interface__ |
| |
| self.assertEqual( |
| set(ar_dict.keys()), {"shape", "strides", "typestr", "data", "version"} |
| ) |
| |
| self.assertEqual(ar_dict["shape"], (10,)) |
| self.assertIs(ar_dict["strides"], None) |
| # typestr from numpy, cuda-native little-endian |
| self.assertEqual(ar_dict["typestr"], numpy.dtype(npt).newbyteorder("<").str) |
| self.assertEqual(ar_dict["data"], (cudat.data_ptr(), False)) |
| self.assertEqual(ar_dict["version"], 2) |
| |
| @unittest.skipIf(not TEST_CUDA, "No cuda") |
| @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda") |
| def test_array_adaptor(self): |
| """Torch __cuda_array_adaptor__ exposes tensor data to numba.cuda.""" |
| |
| torch_dtypes = [ |
| torch.complex64, |
| torch.complex128, |
| torch.float16, |
| torch.float32, |
| torch.float64, |
| torch.uint8, |
| torch.int8, |
| torch.uint16, |
| torch.int16, |
| torch.uint32, |
| torch.int32, |
| torch.uint64, |
| torch.int64, |
| torch.bool, |
| ] |
| |
| for dt in torch_dtypes: |
| # CPU tensors of all types do not register as cuda arrays, |
| # attempts to convert raise a type error. |
| cput = torch.arange(10).to(dt) |
| npt = cput.numpy() |
| |
| self.assertTrue(not numba.cuda.is_cuda_array(cput)) |
| with self.assertRaises(TypeError): |
| numba.cuda.as_cuda_array(cput) |
| |
| # Any cuda tensor is a cuda array. |
| cudat = cput.to(device="cuda") |
| self.assertTrue(numba.cuda.is_cuda_array(cudat)) |
| |
| numba_view = numba.cuda.as_cuda_array(cudat) |
| self.assertIsInstance(numba_view, numba.cuda.devicearray.DeviceNDArray) |
| |
| # The reported type of the cuda array matches the numpy type of the cpu tensor. |
| self.assertEqual(numba_view.dtype, npt.dtype) |
| self.assertEqual(numba_view.strides, npt.strides) |
| self.assertEqual(numba_view.shape, cudat.shape) |
| |
| # Pass back to cuda from host for all equality checks below, needed for |
| # float16 comparisons, which aren't supported cpu-side. |
| |
| # The data is identical in the view. |
| self.assertEqual(cudat, torch.tensor(numba_view.copy_to_host()).to("cuda")) |
| |
| # Writes to the torch.Tensor are reflected in the numba array. |
| cudat[:5] = 11 |
| self.assertEqual(cudat, torch.tensor(numba_view.copy_to_host()).to("cuda")) |
| |
| # Strided tensors are supported. |
| strided_cudat = cudat[::2] |
| strided_npt = cput[::2].numpy() |
| strided_numba_view = numba.cuda.as_cuda_array(strided_cudat) |
| |
| self.assertEqual(strided_numba_view.dtype, strided_npt.dtype) |
| self.assertEqual(strided_numba_view.strides, strided_npt.strides) |
| self.assertEqual(strided_numba_view.shape, strided_cudat.shape) |
| |
| # As of numba 0.40.0 support for strided views is ...limited... |
| # Cannot verify correctness of strided view operations. |
| |
| @unittest.skipIf(not TEST_CUDA, "No cuda") |
| @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda") |
| def test_conversion_errors(self): |
| """Numba properly detects array interface for tensor.Tensor variants.""" |
| |
| # CPU tensors are not cuda arrays. |
| cput = torch.arange(100) |
| |
| self.assertFalse(numba.cuda.is_cuda_array(cput)) |
| with self.assertRaises(TypeError): |
| numba.cuda.as_cuda_array(cput) |
| |
| # Sparse tensors are not cuda arrays, regardless of device. |
| sparset = torch.sparse_coo_tensor(cput[None, :], cput) |
| |
| self.assertFalse(numba.cuda.is_cuda_array(sparset)) |
| with self.assertRaises(TypeError): |
| numba.cuda.as_cuda_array(sparset) |
| |
| sparse_cuda_t = sparset.cuda() |
| |
| self.assertFalse(numba.cuda.is_cuda_array(sparset)) |
| with self.assertRaises(TypeError): |
| numba.cuda.as_cuda_array(sparset) |
| |
| # Device-status overrides gradient status. |
| # CPU+gradient isn't a cuda array. |
| cpu_gradt = torch.zeros(100).requires_grad_(True) |
| |
| self.assertFalse(numba.cuda.is_cuda_array(cpu_gradt)) |
| with self.assertRaises(TypeError): |
| numba.cuda.as_cuda_array(cpu_gradt) |
| |
| # CUDA+gradient raises a RuntimeError on check or conversion. |
| # |
| # Use of hasattr for interface detection causes interface change in |
| # python2; it swallows all exceptions not just AttributeError. |
| cuda_gradt = torch.zeros(100).requires_grad_(True).cuda() |
| |
| # conversion raises RuntimeError |
| with self.assertRaises(RuntimeError): |
| numba.cuda.is_cuda_array(cuda_gradt) |
| with self.assertRaises(RuntimeError): |
| numba.cuda.as_cuda_array(cuda_gradt) |
| |
| @unittest.skipIf(not TEST_CUDA, "No cuda") |
| @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda") |
| @unittest.skipIf(not TEST_MULTIGPU, "No multigpu") |
| def test_active_device(self): |
| """'as_cuda_array' tensor device must match active numba context.""" |
| |
| # Both torch/numba default to device 0 and can interop freely |
| cudat = torch.arange(10, device="cuda") |
| self.assertEqual(cudat.device.index, 0) |
| self.assertIsInstance( |
| numba.cuda.as_cuda_array(cudat), numba.cuda.devicearray.DeviceNDArray |
| ) |
| |
| # Tensors on non-default device raise api error if converted |
| cudat = torch.arange(10, device=torch.device("cuda", 1)) |
| |
| with self.assertRaises(numba.cuda.driver.CudaAPIError): |
| numba.cuda.as_cuda_array(cudat) |
| |
| # but can be converted when switching to the device's context |
| with numba.cuda.devices.gpus[cudat.device.index]: |
| self.assertIsInstance( |
| numba.cuda.as_cuda_array(cudat), numba.cuda.devicearray.DeviceNDArray |
| ) |
| |
| @unittest.skip( |
| "Test is temporary disabled, see https://github.com/pytorch/pytorch/issues/54418" |
| ) |
| @unittest.skipIf(not TEST_NUMPY, "No numpy") |
| @unittest.skipIf(not TEST_CUDA, "No cuda") |
| @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda") |
| def test_from_cuda_array_interface(self): |
| """torch.as_tensor() and torch.tensor() supports the __cuda_array_interface__ protocol. |
| |
| If an object exposes the __cuda_array_interface__, .as_tensor() and .tensor() |
| will use the exposed device memory. |
| |
| See: |
| https://numba.pydata.org/numba-doc/latest/cuda/cuda_array_interface.html |
| """ |
| |
| dtypes = [ |
| numpy.complex64, |
| numpy.complex128, |
| numpy.float64, |
| numpy.float32, |
| numpy.int64, |
| numpy.int32, |
| numpy.int16, |
| numpy.int8, |
| numpy.uint8, |
| ] |
| for dtype in dtypes: |
| numpy_arys = [ |
| numpy.ones((), dtype=dtype), |
| numpy.arange(6).reshape(2, 3).astype(dtype), |
| numpy.arange(6) |
| .reshape(2, 3) |
| .astype(dtype)[1:], # View offset should be ignored |
| numpy.arange(6) |
| .reshape(2, 3) |
| .astype(dtype)[:, None], # change the strides but still contiguous |
| ] |
| # Zero-copy when using `torch.as_tensor()` |
| for numpy_ary in numpy_arys: |
| numba_ary = numba.cuda.to_device(numpy_ary) |
| torch_ary = torch.as_tensor(numba_ary, device="cuda") |
| self.assertEqual( |
| numba_ary.__cuda_array_interface__, |
| torch_ary.__cuda_array_interface__, |
| ) |
| self.assertEqual( |
| torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype) |
| ) |
| |
| # Check that `torch_ary` and `numba_ary` points to the same device memory |
| torch_ary += 42 |
| self.assertEqual( |
| torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype) |
| ) |
| |
| # Implicit-copy because `torch_ary` is a CPU array |
| for numpy_ary in numpy_arys: |
| numba_ary = numba.cuda.to_device(numpy_ary) |
| torch_ary = torch.as_tensor(numba_ary, device="cpu") |
| self.assertEqual( |
| torch_ary.data.numpy(), numpy.asarray(numba_ary, dtype=dtype) |
| ) |
| |
| # Check that `torch_ary` and `numba_ary` points to different memory |
| torch_ary += 42 |
| self.assertEqual( |
| torch_ary.data.numpy(), numpy.asarray(numba_ary, dtype=dtype) + 42 |
| ) |
| |
| # Explicit-copy when using `torch.tensor()` |
| for numpy_ary in numpy_arys: |
| numba_ary = numba.cuda.to_device(numpy_ary) |
| torch_ary = torch.tensor(numba_ary, device="cuda") |
| self.assertEqual( |
| torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype) |
| ) |
| |
| # Check that `torch_ary` and `numba_ary` points to different memory |
| torch_ary += 42 |
| self.assertEqual( |
| torch_ary.cpu().data.numpy(), |
| numpy.asarray(numba_ary, dtype=dtype) + 42, |
| ) |
| |
| @unittest.skipIf(not TEST_NUMPY, "No numpy") |
| @unittest.skipIf(not TEST_CUDA, "No cuda") |
| @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda") |
| def test_from_cuda_array_interface_inferred_strides(self): |
| """torch.as_tensor(numba_ary) should have correct inferred (contiguous) strides""" |
| # This could, in theory, be combined with test_from_cuda_array_interface but that test |
| # is overly strict: it checks that the exported protocols are exactly the same, which |
| # cannot handle differing exported protocol versions. |
| dtypes = [ |
| numpy.float64, |
| numpy.float32, |
| numpy.int64, |
| numpy.int32, |
| numpy.int16, |
| numpy.int8, |
| numpy.uint8, |
| ] |
| for dtype in dtypes: |
| numpy_ary = numpy.arange(6).reshape(2, 3).astype(dtype) |
| numba_ary = numba.cuda.to_device(numpy_ary) |
| self.assertTrue(numba_ary.is_c_contiguous()) |
| torch_ary = torch.as_tensor(numba_ary, device="cuda") |
| self.assertTrue(torch_ary.is_contiguous()) |
| |
| @unittest.skip( |
| "Test is temporary disabled, see https://github.com/pytorch/pytorch/issues/54418" |
| ) |
| @unittest.skipIf(not TEST_NUMPY, "No numpy") |
| @unittest.skipIf(not TEST_CUDA, "No cuda") |
| @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda") |
| def test_from_cuda_array_interface_lifetime(self): |
| """torch.as_tensor(obj) tensor grabs a reference to obj so that the lifetime of obj exceeds the tensor""" |
| numba_ary = numba.cuda.to_device(numpy.arange(6)) |
| torch_ary = torch.as_tensor(numba_ary, device="cuda") |
| self.assertEqual( |
| torch_ary.__cuda_array_interface__, numba_ary.__cuda_array_interface__ |
| ) # No copy |
| del numba_ary |
| self.assertEqual( |
| torch_ary.cpu().data.numpy(), numpy.arange(6) |
| ) # `torch_ary` is still alive |
| |
| @unittest.skip( |
| "Test is temporary disabled, see https://github.com/pytorch/pytorch/issues/54418" |
| ) |
| @unittest.skipIf(not TEST_NUMPY, "No numpy") |
| @unittest.skipIf(not TEST_CUDA, "No cuda") |
| @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda") |
| @unittest.skipIf(not TEST_MULTIGPU, "No multigpu") |
| def test_from_cuda_array_interface_active_device(self): |
| """torch.as_tensor() tensor device must match active numba context.""" |
| |
| # Zero-copy: both torch/numba default to device 0 and can interop freely |
| numba_ary = numba.cuda.to_device(numpy.arange(6)) |
| torch_ary = torch.as_tensor(numba_ary, device="cuda") |
| self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary)) |
| self.assertEqual( |
| torch_ary.__cuda_array_interface__, numba_ary.__cuda_array_interface__ |
| ) |
| |
| # Implicit-copy: when the Numba and Torch device differ |
| numba_ary = numba.cuda.to_device(numpy.arange(6)) |
| torch_ary = torch.as_tensor(numba_ary, device=torch.device("cuda", 1)) |
| self.assertEqual(torch_ary.get_device(), 1) |
| self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary)) |
| if1 = torch_ary.__cuda_array_interface__ |
| if2 = numba_ary.__cuda_array_interface__ |
| self.assertNotEqual(if1["data"], if2["data"]) |
| del if1["data"] |
| del if2["data"] |
| self.assertEqual(if1, if2) |
| |
| |
| if __name__ == "__main__": |
| common.run_tests() |