blob: ac8f588d2ebf4bb04db902d0867c6fdce85a3d5e [file] [log] [blame]
"""Default set of benchmarks.
Parser notes:
`parse_stmts`:
- Width for the left (Python) column MUST be 40 characters.
- The column separator is " | ", not "|". Whitespace matters.
`GroupedVariants`:
- `Setup` and `Global_Setup` (case insensitive) are reserved keywords
to populate `setup` and `global_setup` for every generated benchmark.
- To set a label for the succeeding block, add `# @YOUR_LABEL` (Python)
or `// @YOUR_LABEL` (C++).
"""
from core.api import GroupedModules, GroupedStmts, GroupedVariants
from core.types import FlatIntermediateDefinition
from core.utils import flatten, parse_stmts
from definitions.setup import Setup
BENCHMARKS: FlatIntermediateDefinition = flatten({
"Empty": {
"no allocation": GroupedStmts(
r"torch.empty(())",
r"torch::empty({0});",
),
"with allocation": GroupedStmts(
r"torch.empty((1,))",
r"torch::empty({1});",
),
"overloads": GroupedVariants(
cpp_block=r"""
// @Setup
auto options_empty = c10::TensorOptions();
auto options_full = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU);
auto optional_float = c10::make_optional(at::kFloat);
// @TensorOptions overload
at::empty({0}, options_empty);
at::empty({0}, options_full);
at::empty({0}, at::kFloat); // implicit conversion
// @Faithful overload
at::empty({0}, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
at::empty({0}, at::kFloat, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
at::empty({0}, optional_float, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
"""
),
},
"Pointwise": {
"Math": GroupedVariants(*parse_stmts(r"""
Python | C++
---------------------------------------- | ----------------------------------------
# @setup | // @setup
torch.manual_seed(138_10_23) | torch::manual_seed(1381023);
x = torch.rand((4, 4)) | auto x = torch::rand({4, 4});
y_float = torch.ones((4, 4)) | auto y_float = torch::ones({4, 4});
y_vector = torch.ones((4, 1)) | auto y_vector = torch::ones({4, 1});
y_int = torch.ones( | auto y_int = torch::ones({4, 4}, at::kInt);
(4, 4), dtype=torch.int32) |
|
# @add | // @add
x += 1.0 | x += 1;
x += y_float | x += y_float;
x += y_vector | x += y_vector;
x += y_int | x += y_int;
x + y_float | x + y_float;
torch.add(x, y_float) | torch::add(x, y_float);
torch.add(x, y_float, out=x) | torch::add_out(/*out=*/x, x, y_float);
|
# @multiply | // @multiply
x *= 1.0 | x *= 1;
x *= y_float | x *= y_float;
x *= y_vector | x *= y_vector;
x *= y_int | x *= y_int;
x * y_float | x * y_float;
torch.mul(x, y_float) | torch::mul(x, y_float);
torch.mul(x, y_float, out=x) | torch::mul_out(/*out=*/x, x, y_float);
|
# @equality | // @equality
x == y_float | x == y_float;
x == 1.0 | x == 1.0;
""")),
"Data movement": GroupedVariants(*parse_stmts(r"""
Python | C++
---------------------------------------- | ----------------------------------------
# @setup | // @setup
x = torch.ones((4, 4)) | auto x = torch::ones({4, 4});
y = torch.ones((4, 4)) | auto y = torch::ones({4, 4});
x_t = x.t() | auto x_t = x.t();
|
# @contiguous (trivial) | // @contiguous (trivial)
x.contiguous() | x.contiguous();
|
# @contiguous (non-trivial) | // @contiguous (non-trivial)
x_t.contiguous() | x_t.contiguous();
|
# @clone | // @clone
x.clone() | x.clone();
|
# @copy_ | // @copy_
x.copy_(y) | x.copy_(y);
|
# @zero_ | // @zero_
x.zero_() | x.zero_();
|
# @RNG | // @RNG
x.uniform_() | x.uniform_();
""")),
},
"Reduction": GroupedVariants(*parse_stmts(r"""
Python | C++
---------------------------------------- | ----------------------------------------
# @setup | // @setup
x = torch.ones((4, 4)) | auto x = torch::ones({4, 4});
|
# @max | // @max
x.max() | x.max();
|
# @sum | // @sum
x.sum() | x.sum();
|
# @variance | // @variance
x.var(0) | x.var(0);
""")),
"Indexing": GroupedVariants(*parse_stmts(r"""
Python | C++
---------------------------------------- | ----------------------------------------
# @setup | // @setup
| using namespace torch::indexing;
torch.manual_seed(6626_10_34) | torch::manual_seed(66261034);
|
x = torch.randn(1, 1, 1) | auto x = torch::randn({1, 1, 1});
y = torch.randn(1, 1, 1) | auto y = torch::randn({1, 1, 1});
|
# @Tensor-Scalar | // @Tensor-Scalar
x[0] = 1 | x.index_put_({0}, 1);
x[0, 0] = 1 | x.index_put_({0, 0}, 1);
x[0, 0, 0] = 1 | x.index_put_({0, 0, 0}, 1);
|
# @Tensor-Scalar (Advanced) | // @Tensor-Scalar (Advanced)
x[...] = 1 | x.index_put_({"..."}, 1);
x[:] = 1 | x.index_put_({Slice(None, None, None)}, 1);
x[None] = 1 | x.index_put_({None}, 1);
x[False] = 1 | x.index_put_({false}, 1);
x[True] = 1 | x.index_put_({true}, 1);
|
# @Tensor-Tensor | // @Tensor-Tensor
x[0] = y[0] | x.index_put_({0}, y.index({0}));
x[0, 0] = y[0, 0] | x.index_put_({0, 0}, y.index({0, 0}));
x[0, 0, 0] = y[0, 0, 0] | x.index_put_({0, 0, 0}, y.index({0, 0, 0}));
|
# @Tensor-Tensor (Advanced) | // @Tensor-Tensor (Advanced)
x[...] = y[...] | x.index_put_({"..."}, y.index({"..."}));
x[:] = y[:] | x.index_put_({Slice(None, None, None)}, y.index({Slice(None, None, None)}));
x[None] = y[None] | x.index_put_({None}, y.index({None}));
x[False] = y[False] | x.index_put_({false}, y.index({false}));
x[True] = y[True] | x.index_put_({true}, y.index({true}));
""")),
"Metadata and views": GroupedVariants(*parse_stmts(r"""
Python | C++
---------------------------------------- | ----------------------------------------
# @setup | // @setup
x = torch.ones((4, 4)) | auto x = torch::ones({4, 4});
|
# @size | // @size
x.size()[0] | x.sizes()[0];
|
# @stride | // @stride
x.stride(0) | x.stride(0);
|
# @as_strided | // @as_strided
torch.as_strided(x, (2, 3), (4, 1), 2) | torch::as_strided(x, {2, 3}, {4, 1}, 2);
|
# @select | // @select
x.select(1, 1) | x.select(1, 1);
|
# @unsqueeze | // @unsqueeze
x.unsqueeze(0) | x.unsqueeze(0);
|
# @view | // @view
x.view(-1, 1) | x.view({-1, 1});
|
# @transpose | // @transpose
x.t() | x.t();
|
# @reshape | // @reshape
x.reshape((16, 1)) | x.reshape({16, 1});
""")),
"nn Modules": {
py_constructor.split("(")[0]: GroupedModules(
f"model = torch.nn.{py_constructor}",
f"auto model = torch::nn::{cpp_constructor};",
setup=setup.value,
signature="f(x) -> y",
torchscript=torchscript,
)
for setup, torchscript, (py_constructor, cpp_constructor) in (
(Setup.TRIVIAL_4D, True, ("BatchNorm2d(4)",) * 2),
(Setup.TRIVIAL_4D, True, ("GroupNorm(2, 4)",) * 2),
(Setup.TRIVIAL_4D, True, (
"LayerNorm(4)",
"LayerNorm(torch::nn::LayerNormOptions({4}))"
)),
(Setup.TRIVIAL_3D, True, ("Conv1d(4, 4, 1)",) * 2),
(Setup.TRIVIAL_4D, True, ("Conv2d(4, 4, 1)",) * 2),
(Setup.TRIVIAL_4D, True, ("MaxPool2d(2)",) * 2),
(Setup.TRIVIAL_2D, True, ("ReLU()",) * 2),
(Setup.TRIVIAL_2D, True, ("Sigmoid()",) * 2),
(Setup.TRIVIAL_4D, True, ("Linear(4, 2)",) * 2),
# TODO: LSTM can't be TorchScript'd
(Setup.TRIVIAL_3D, False, ("LSTM(4, 2)",) * 2),
)
},
"training": {
"simple": GroupedStmts(
*parse_stmts(r"""
Python | C++
---------------------------------------- | ----------------------------------------
a0 = torch.nn.functional.relu(x * w0) | auto a0 = torch::nn::functional::relu(x * w0);
y = a0 * w1 | auto y = a0 * w1;
"""),
Setup.TRAINING.value,
num_threads=(1, 2),
signature=r"f(x, w0, w1) -> y",
torchscript=True,
autograd=True,
),
"ensemble": GroupedStmts(
*parse_stmts(r"""
Python | C++
---------------------------------------- | ----------------------------------------
a0 = torch.nn.functional.gelu(x * w0) | auto a0 = torch::nn::functional::gelu(x * w0);
a1 = torch.nn.functional.prelu(y, w1) | auto a1 = torch::nn::functional::prelu(y, w1);
z = torch.nn.functional.normalize( | auto z = torch::nn::functional::normalize(
torch.cat([a0, a1]), | torch::cat({a0, a1}),
p=2.0, dim=0, | torch::nn::functional::NormalizeFuncOptions().p(2).dim(0)
).dot(w2) | ).dot(w2);
"""),
Setup.TRAINING.value,
num_threads=(1, 2),
signature=r"f(x, y, w0, w1, w2) -> z",
torchscript=True,
autograd=True,
),
},
"InferenceMode": GroupedVariants(
# In general, the mixed input scenario is less common so its
# perf can be less important than pure inference tensor inputs.
cpp_block=r"""
// @Setup
auto s = torch::ones({3, 3}); // Normal Tensor
c10::InferenceMode guard;
auto x = torch::ones({3, 3}); // Inference Tensor
// @View
torch::Tensor y = x.view({9});
// @Inplace
torch::Tensor y = x.mul_(x);
// @Mixed
torch::Tensor y = x + s;
"""
),
})