blob: b3e19c101d26263db4be6e74c49e86fe1b8f42bd [file] [log] [blame]
import random
from typing import Any, Tuple
import torch
def transpose_tensors(p_transpose_both: float = 0.05) -> Tuple[bool, bool]:
transpose_both = random.choices(
[True, False], [p_transpose_both, 1 - p_transpose_both]
)[0]
if transpose_both:
return (True, True)
transpose_left = (True, False)
transpose_right = (False, True)
no_transpose = (False, False)
return random.choices([transpose_left, transpose_right, no_transpose])[0]
def fits_in_memory(dtype: Any, m: int, k: int, n: int) -> Any:
threshold_memory = torch.cuda.get_device_properties(0).total_memory / 4
# dividing by 4 beause we otherwise sometimes run out of memory, I assume because
# inductor creates copies of tensors for benchmarking?
return dtype.itemsize * (m * k + k * n + m * n) < threshold_memory
def get_mm_tensors(
m: int,
k: int,
n: int,
transpose_left: bool,
transpose_right: bool,
dtype_left: Any,
dtype_right: Any,
) -> Tuple[Any, Any]:
if transpose_left:
a = torch.randn(k, m, dtype=dtype_left).t()
else:
a = torch.randn(m, k, dtype=dtype_left)
if transpose_right:
b = torch.randn(n, k, dtype=dtype_right).t()
else:
b = torch.randn(k, n, dtype=dtype_right)
return (a, b)
def set_precision(dtype: Any, p_float32_prec_highest: float = 0.8) -> None:
if dtype == torch.float32:
precisions = ["high", "highest"]
weights = [1 - p_float32_prec_highest, p_float32_prec_highest]
precision = random.choices(precisions, weights)[0]
else:
precision = "high"
torch.set_float32_matmul_precision(precision)
def get_random_between_pow2(min_power2: int, max_power2: int) -> int:
i = random.randint(min_power2, max_power2 - 1)
lower = 2**i + 1
upper = 2 ** (i + 1) - 1
assert lower <= upper, "lower must not be greater than upper"
return random.randint(lower, upper)