blob: fbc52f3e48569a67091b2526bb5fc8b334a5a9be [file] [log] [blame]
from . import benchmark
class ReduceBench(benchmark.Benchmark):
def __init__(self, mode, device, dtype, case, M, N, K, skip_input_transform):
super().__init__(mode, device, dtype)
self.case = case
self.M = M
self.N = N
self.K = K
self._set_skip_input_transform(skip_input_transform)
self.inputs = [
self.randn(
[M, N, K], device=device, dtype=dtype, requires_grad=self.requires_grad
)
]
if case == "row":
self.dims = [1, 2]
elif case == "mid":
self.dims = [0, 2]
elif case == "col":
self.dims = [0, 1]
elif case == "full":
self.dims = [0, 1, 2]
else:
raise ValueError(f"invalid case: {case}")
def forward(self, inputs):
if self.skip_input_transform:
x = inputs
else:
x = self.add(inputs, 0.001)
y = self.sum(x, self.dims)
return y
def config(self):
if self.case == "full":
return [self.M * self.N * self.K, self._skip_input_transform_str()]
return [self.M, self.N, self.K, self._skip_input_transform_str()]
@staticmethod
def default_configs():
return [
# [512, 512, 512],
[512, 64, 512, "s0"],
]
@staticmethod
def module():
return "reduce"
def memory_workload(self):
if self.mode == "fwd":
sol_count = 1
algorithmic_count = 1
else:
sol_count = (1) + (1)
algorithmic_count = 1 + 1
buffer_size = self.M * self.N * self.K
return {
"sol": buffer_size * sol_count,
"algorithmic": buffer_size * algorithmic_count,
}
def _set_skip_input_transform(self, input_str):
# In the test setting, s1 will skip the input transformation, and s0 will not.
if input_str == "s0":
self.skip_input_transform = False
elif input_str == "s1":
self.skip_input_transform = True
else:
raise ValueError(f"invalid skip_input_transform: {input_str}")
def _skip_input_transform_str(self):
if self.skip_input_transform:
return "s1"
else:
return "s0"
class ReduceRowBench(ReduceBench):
def __init__(self, mode, device, dtype, M, N, K, skip_input_transform):
super().__init__(mode, device, dtype, "row", M, N, K, skip_input_transform)
@staticmethod
def module():
return "reduce_row"
class ReduceMidBench(ReduceBench):
def __init__(self, mode, device, dtype, M, N, K, skip_input_transform):
super().__init__(mode, device, dtype, "mid", M, N, K, skip_input_transform)
@staticmethod
def module():
return "reduce_mid"
class ReduceColBench(ReduceBench):
def __init__(self, mode, device, dtype, M, N, K, skip_input_transform):
super().__init__(mode, device, dtype, "col", M, N, K, skip_input_transform)
@staticmethod
def module():
return "reduce_col"
class ReduceFullBench(ReduceBench):
def __init__(self, mode, device, dtype, M, skip_input_transform):
super().__init__(mode, device, dtype, "full", M, 1, 1, skip_input_transform)
def config(self):
return [self.M * self.N * self.K, self._skip_input_transform_str()]
@staticmethod
def default_configs():
return [
[1 << 24, "s1"],
]
@staticmethod
def module():
return "reduce_full"
class Reduce2DBench(benchmark.Benchmark):
"""
A benchmark class to validate 2 dimensional reduction performance.
Only a simple add is fused to induce the fuser and isolate reduction perf.
"""
def __init__(self, mode, device, dtype, red_dim, dim0, dim1):
super().__init__(mode, device, dtype)
self.red_dim = red_dim
self.dim0 = dim0
self.dim1 = dim1
self.inputs = [
self.randn(
[dim0, dim1],
device=device,
dtype=dtype,
requires_grad=self.requires_grad,
)
]
if red_dim != 0 and red_dim != 1:
raise ValueError(f"invalid reduction dimension: {red_dim}")
def forward(self, inputs):
x = self.add(inputs, 0.001)
y = self.sum(x, [self.red_dim])
return y
def config(self):
return [self.red_dim, self.dim0, self.dim1]
@staticmethod
def default_configs():
return [
[1, 640, 524288],
]
@staticmethod
def module():
return "reduce2d"
@staticmethod
def input_iterable():
return True
def memory_workload(self):
assert self.mode == "fwd", "Only the forward operation is modeled!"
buffer_size = self.dim0 * self.dim1
if self.red_dim == 0:
buffer_size += self.dim1
else:
buffer_size += self.dim0
return {
"sol": buffer_size,
"algorithmic": buffer_size,
}
class Reduce2DInnerBench(Reduce2DBench):
def __init__(self, mode, device, dtype, dim0, dim1):
super().__init__(mode, device, dtype, 1, dim0, dim1)
@staticmethod
def default_configs():
parent_config = Reduce2DBench.default_configs()[0]
return [parent_config[1:]]
def config(self):
parent_config = super().config()
return parent_config[1:]
@staticmethod
def module():
return "reduce2d_inner"
class Reduce2DOuterBench(Reduce2DBench):
def __init__(self, mode, device, dtype, dim0, dim1):
super().__init__(mode, device, dtype, 0, dim0, dim1)
@staticmethod
def default_configs():
parent_config = Reduce2DBench.default_configs()[0]
return [parent_config[1:]]
def config(self):
parent_config = super().config()
return parent_config[1:]
@staticmethod
def module():
return "reduce2d_outer"
benchmark.register_benchmark_class(ReduceRowBench)
benchmark.register_benchmark_class(ReduceMidBench)
benchmark.register_benchmark_class(ReduceColBench)
benchmark.register_benchmark_class(Reduce2DInnerBench)
benchmark.register_benchmark_class(Reduce2DOuterBench)
benchmark.register_benchmark_class(ReduceFullBench)
class DynamicReduce2DBench(benchmark.DynamicShape, Reduce2DBench):
"""
A benchmark class to validate 2 dimensional reduction performance.
Only a simple add is fused to induce the fuser and isolate reduction perf.
"""
def __init__(self, mode, device, dtype, red_dim, dim0, dim1):
benchmark.DynamicShape.__init__(self)
Reduce2DBench.__init__(self, mode, device, dtype, red_dim, dim0, dim1)
def instantiate_input(self):
dim0, dim1 = self.rand_shape([self.dim0, self.dim1])
self.inputs = [
self.randn(
[dim0, dim1],
device=self.device,
dtype=self.dtype,
requires_grad=self.requires_grad,
)
]
@staticmethod
def module():
return "dynamicreduce2d"
class DynamicReduce2DInnerBench(DynamicReduce2DBench):
def __init__(self, mode, device, dtype, dim0, dim1):
super().__init__(mode, device, dtype, 1, dim0, dim1)
@staticmethod
def default_configs():
parent_config = DynamicReduce2DBench.default_configs()[0]
return [parent_config[1:]]
def config(self):
parent_config = super().config()
return parent_config[1:]
@staticmethod
def module():
return "reduce2d_dynamic_inner"
class DynamicReduce2DOuterBench(DynamicReduce2DBench):
def __init__(self, mode, device, dtype, dim0, dim1):
super().__init__(mode, device, dtype, 0, dim0, dim1)
@staticmethod
def default_configs():
parent_config = DynamicReduce2DBench.default_configs()[0]
return [parent_config[1:]]
def config(self):
parent_config = super().config()
return parent_config[1:]
@staticmethod
def module():
return "reduce2d_dynamic_outer"
benchmark.register_benchmark_class(DynamicReduce2DInnerBench)
benchmark.register_benchmark_class(DynamicReduce2DOuterBench)