| import torch |
| from pathlib import Path |
| from scipy import sparse |
| import math |
| |
| |
| def to_coo_scipy(x): |
| indices_1 = x._indices().numpy() |
| values_1 = x._values().numpy() |
| return sparse.coo_matrix((values_1, (indices_1[0], indices_1[1])), |
| shape=x.shape) |
| |
| |
| def sparse_grad_output(a, b): |
| c = torch.sparse.mm(a, b) |
| if c.is_sparse: |
| c2 = torch.rand_like(c.to_dense()) |
| return c2.sparse_mask(c.coalesce()) |
| else: |
| return torch.rand_like(c) |
| |
| |
| def read_matrix_params(path): |
| with open(path, 'r') as file: |
| line = file.readline() |
| nrows, ncols, nnz = map(lambda el: int(el), line.split(', ')) |
| return (nrows, ncols), nnz |
| |
| |
| def csr_to_coo(indices, indptr, shape): |
| n_rows, n_cols = shape |
| cols = indices |
| rows = [0] * len(cols) |
| for i in range(n_rows): |
| for j in range(indptr[i], indptr[i + 1]): |
| rows[j] = i |
| return torch.tensor([rows, cols], dtype=torch.long) |
| |
| |
| def load_sparse_matrix(path, device): |
| with open(path, 'r') as file: |
| nrows, ncols, nnz = map(lambda el: int(el), file.readline().split(', ')) |
| index_pointers = map(lambda el: int(el), file.readline().split()) |
| indices = map(lambda el: int(el), file.readline().split()) |
| |
| index_pointers = list(index_pointers) |
| indices = list(indices) |
| data = torch.randn(nnz, dtype=torch.double) |
| shape = (nrows, ncols) |
| return torch.sparse_coo_tensor(csr_to_coo(indices, index_pointers, shape), data, shape, device=device) |
| |
| |
| def gen_vector(path, device): |
| with open(path, 'r') as file: |
| nrows, ncols, nnz = map(lambda el: int(el), file.readline().split(', ')) |
| index_pointers = map(lambda el: int(el), file.readline().split()) |
| indices = map(lambda el: int(el), file.readline().split()) |
| return torch.randn(nrows, dtype=torch.double, device=device) |
| |
| |
| def gen_matrix(path, device): |
| with open(path, 'r') as file: |
| nrows, ncols, nnz = map(lambda el: int(el), file.readline().split(', ')) |
| index_pointers = map(lambda el: int(el), file.readline().split()) |
| indices = map(lambda el: int(el), file.readline().split()) |
| return torch.randn(nrows, ncols, dtype=torch.double, device=device) |
| |
| |
| def load_spmv_dataset(dataset_path, hidden_size, sparsity, device, n_limit=math.inf): |
| """load_spmv_dataset loads a DLMC dataset for a sparse matrix-vector multiplication (SPMV) performance test. |
| Args: |
| dataset_path: |
| path of the dataset from DLMC collection. |
| hidden_size |
| This value allows tensors of varying sizes. |
| sparsity: |
| This value allows tensors of varying sparsities. |
| device: |
| Whether to place the Tensor on a GPU or CPU. |
| n_limit: |
| This value allows a dataset with some limit size. |
| """ |
| current_folder_path = f"{dataset_path}/{sparsity}" |
| path = Path(current_folder_path) |
| files = path.glob('**/*.smtx') |
| print(dataset_path, hidden_size, sparsity) |
| index = 0 |
| x_files, y_files = [], [] |
| for f in files: |
| if index >= n_limit: |
| break |
| print('.', end='') |
| size, nnz = read_matrix_params(f.as_posix()) |
| if size[1] == hidden_size: |
| x_files.append(f.as_posix()) |
| if size[0] == hidden_size: |
| y_files.append(f.as_posix()) |
| index += 1 |
| print() |
| |
| for fx, fy in zip(x_files, y_files): |
| x = load_sparse_matrix(fx, device) |
| y = gen_vector(fy, device) |
| yield (x, y) |
| |
| |
| def load_spmm_dataset(dataset_path, hidden_size, sparsity, spmm_type, device, n_limit=math.inf): |
| """load_spmm_dataset loads a DLMC dataset for a sparse matrix-matrix multiplication (SPMM) performance test. |
| Args: |
| dataset_path: |
| path of the dataset from DLMC collection. |
| hidden_size |
| This value allows tensors of varying sizes. |
| sparsity: |
| This value allows tensors of varying sparsities. |
| spmm_type: |
| This value allows tensors for `sparse@sparse` or `sparse@dense` operations. |
| device: |
| Whether to place the Tensor on a GPU or CPU. |
| n_limit: |
| This value allows a dataset with some limit size. |
| """ |
| current_folder_path = f"{dataset_path}/{sparsity}" |
| path = Path(current_folder_path) |
| files = path.glob('**/*.smtx') |
| print(dataset_path, hidden_size, sparsity) |
| index = 0 |
| x_files, y_files = [], [] |
| for f in files: |
| if index >= n_limit: |
| break |
| print('.', end='') |
| size, nnz = read_matrix_params(f.as_posix()) |
| if size[1] == hidden_size: |
| x_files.append(f.as_posix()) |
| if size[0] == hidden_size: |
| y_files.append(f.as_posix()) |
| index += 1 |
| print() |
| |
| for fx, fy in zip(x_files, y_files): |
| x = load_sparse_matrix(fx, device) |
| y = gen_matrix(fy, device) if spmm_type == 'sparse@dense' else load_sparse_matrix(fy, device) |
| yield (x, y) |
| |
| |
| def load_dlmc_dataset(dataset_path, operation, hidden_size, sparsity, device, requires_grad, n_limit=math.inf): |
| """load_dlmc_dataset loads a DLMC dataset for a matmul performance test. |
| Args: |
| dataset_path: |
| path of the dataset from DLMC collection. |
| operation: |
| This value allows tensors for `sparse@sparse`|`sparse@dense`|`sparse@vector` operations. |
| hidden_size |
| This value allows tensors of varying sizes. |
| sparsity: |
| This value allows tensors of varying sparsities. |
| device: |
| Whether to place the Tensor on a GPU or CPU. |
| requires_grad: |
| Loads the dataset for backward test. |
| n_limit: |
| This value allows a dataset with some limit size. |
| """ |
| if operation == 'sparse@sparse' or operation == "sparse@dense": |
| collection = load_spmm_dataset(dataset_path, hidden_size, sparsity, operation, device, n_limit) |
| elif operation == 'sparse@vector': |
| collection = load_spmv_dataset(dataset_path, hidden_size, sparsity, device, n_limit) |
| scipy_vars = {} |
| backward_vars = {} |
| for x, y in collection: |
| if device == 'cpu': |
| scipy_vars = { |
| "sx": to_coo_scipy(x) if x.is_sparse else x.numpy(), |
| "sy": to_coo_scipy(y) if y.is_sparse else y.numpy(), |
| } |
| if not requires_grad: |
| dx = x.to_dense() if x.is_sparse else x |
| dy = y.to_dense() if y.is_sparse else y |
| else: |
| c = sparse_grad_output(x, y) |
| backward_vars = { |
| "sparse_grad_output": c, |
| "grad_output": c.to_dense() if c.is_sparse else c, |
| } |
| x.requires_grad_(True) |
| y.requires_grad_(True) |
| dx = x.to_dense().detach() if x.is_sparse else x.clone().detach() |
| dy = y.to_dense().detach() if y.is_sparse else y.clone().detach() |
| dx.requires_grad_(True) |
| dy.requires_grad_(True) |
| yield { |
| "x": x, |
| "y": y, |
| "dx": dx, |
| "dy": dy, |
| **scipy_vars, |
| **backward_vars |
| } |