benchmarks/instruction_counts/definitions/standard.py - platform/external/pytorch - Git at Google

 """Default set of benchmarks.

 Parser notes:
     `parse_stmts`:
         - Width for the left (Python) column MUST be 40 characters.
         - The column separator is " | ", not "|". Whitespace matters.

     `GroupedVariants`:
         - `Setup` and `Global_Setup` (case insensitive) are reserved keywords
           to populate `setup` and `global_setup` for every generated benchmark.
         - To set a label for the succeeding block, add `# @YOUR_LABEL` (Python)
           or `// @YOUR_LABEL` (C++).
 """

 from core.api import GroupedModules, GroupedStmts, GroupedVariants
 from core.types import FlatIntermediateDefinition
 from core.utils import flatten, parse_stmts
 from definitions.setup import Setup


 BENCHMARKS: FlatIntermediateDefinition = flatten({
     "Empty": {
         "no allocation": GroupedStmts(
             r"torch.empty(())",
             r"torch::empty({0});",
         ),

         "with allocation": GroupedStmts(
             r"torch.empty((1,))",
             r"torch::empty({1});",
         ),

         "overloads": GroupedVariants(
             cpp_block=r"""
                 // @Setup
                 auto options_empty = c10::TensorOptions();
                 auto options_full = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU);
                 auto optional_float = c10::make_optional(at::kFloat);

                 // @TensorOptions overload
                 at::empty({0}, options_empty);
                 at::empty({0}, options_full);
                 at::empty({0}, at::kFloat); // implicit conversion

                 // @Faithful overload
                 at::empty({0}, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
                 at::empty({0}, at::kFloat, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
                 at::empty({0}, optional_float, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
             """
         ),
     },

     "Pointwise": {
         "Math": GroupedVariants(*parse_stmts(r"""
             Python                                   | C++
             ---------------------------------------- | ----------------------------------------
             # @setup                                 | // @setup
             torch.manual_seed(138_10_23)             | torch::manual_seed(1381023);
             x = torch.rand((4, 4))                   | auto x = torch::rand({4, 4});
             y_float = torch.ones((4, 4))             | auto y_float = torch::ones({4, 4});
             y_vector = torch.ones((4, 1))            | auto y_vector = torch::ones({4, 1});
             y_int = torch.ones(                      | auto y_int = torch::ones({4, 4}, at::kInt);
                 (4, 4), dtype=torch.int32)           |
                                                      |
             # @add                                   | // @add
             x += 1.0                                 | x += 1;
             x += y_float                             | x += y_float;
             x += y_vector                            | x += y_vector;
             x += y_int                               | x += y_int;
             x + y_float                              | x + y_float;
             torch.add(x, y_float)                    | torch::add(x, y_float);
             torch.add(x, y_float, out=x)             | torch::add_out(/*out=*/x, x, y_float);
                                                      |
             # @multiply                              | // @multiply
             x *= 1.0                                 | x *= 1;
             x *= y_float                             | x *= y_float;
             x *= y_vector                            | x *= y_vector;
             x *= y_int                               | x *= y_int;
             x * y_float                              | x * y_float;
             torch.mul(x, y_float)                    | torch::mul(x, y_float);
             torch.mul(x, y_float, out=x)             | torch::mul_out(/*out=*/x, x, y_float);
                                                      |
             # @equality                              | // @equality
             x == y_float                             | x == y_float;
             x == 1.0                                 | x == 1.0;
         """)),

         "Data movement": GroupedVariants(*parse_stmts(r"""
             Python                                   | C++
             ---------------------------------------- | ----------------------------------------
             # @setup                                 | // @setup
             x = torch.ones((4, 4))                   | auto x = torch::ones({4, 4});
             y = torch.ones((4, 4))                   | auto y = torch::ones({4, 4});
             x_t = x.t()                              | auto x_t = x.t();
                                                      |
             # @contiguous (trivial)                  | // @contiguous (trivial)
             x.contiguous()                           | x.contiguous();
                                                      |
             # @contiguous (non-trivial)              | // @contiguous (non-trivial)
             x_t.contiguous()                         | x_t.contiguous();
                                                      |
             # @clone                                 | // @clone
             x.clone()                                | x.clone();
                                                      |
             # @copy_                                 | // @copy_
             x.copy_(y)                               | x.copy_(y);
                                                      |
             # @zero_                                 | // @zero_
             x.zero_()                                | x.zero_();
                                                      |
             # @RNG                                   | // @RNG
             x.uniform_()                             | x.uniform_();
         """)),
     },

     "Reduction": GroupedVariants(*parse_stmts(r"""
         Python                                   | C++
         ---------------------------------------- | ----------------------------------------
         # @setup                                 | // @setup
         x = torch.ones((4, 4))                   | auto x = torch::ones({4, 4});
                                                  |
         # @max                                   | // @max
         x.max()                                  | x.max();
                                                  |
         # @sum                                   | // @sum
         x.sum()                                  | x.sum();
                                                  |
         # @variance                              | // @variance
         x.var(0)                                 | x.var(0);
     """)),

     "Indexing": GroupedVariants(*parse_stmts(r"""
         Python                                   | C++
         ---------------------------------------- | ----------------------------------------
         # @setup                                 | // @setup
                                                  | using namespace torch::indexing;
         torch.manual_seed(6626_10_34)            | torch::manual_seed(66261034);
                                                  |
         x = torch.randn(1, 1, 1)                 | auto x = torch::randn({1, 1, 1});
         y = torch.randn(1, 1, 1)                 | auto y = torch::randn({1, 1, 1});
                                                  |
         # @Tensor-Scalar                         | // @Tensor-Scalar
         x[0] = 1                                 | x.index_put_({0}, 1);
         x[0, 0] = 1                              | x.index_put_({0, 0}, 1);
         x[0, 0, 0] = 1                           | x.index_put_({0, 0, 0}, 1);
                                                  |
         # @Tensor-Scalar (Advanced)              | // @Tensor-Scalar (Advanced)
         x[...] = 1                               | x.index_put_({"..."}, 1);
         x[:] = 1                                 | x.index_put_({Slice(None, None, None)}, 1);
         x[None] = 1                              | x.index_put_({None}, 1);
         x[False] = 1                             | x.index_put_({false}, 1);
         x[True] = 1                              | x.index_put_({true}, 1);
                                                  |
         # @Tensor-Tensor                         | // @Tensor-Tensor
         x[0] = y[0]                              | x.index_put_({0}, y.index({0}));
         x[0, 0] = y[0, 0]                        | x.index_put_({0, 0}, y.index({0, 0}));
         x[0, 0, 0] = y[0, 0, 0]                  | x.index_put_({0, 0, 0}, y.index({0, 0, 0}));
                                                  |
         # @Tensor-Tensor (Advanced)              | // @Tensor-Tensor (Advanced)
         x[...] = y[...]                          | x.index_put_({"..."}, y.index({"..."}));
         x[:] = y[:]                              | x.index_put_({Slice(None, None, None)}, y.index({Slice(None, None, None)}));
         x[None] = y[None]                        | x.index_put_({None}, y.index({None}));
         x[False] = y[False]                      | x.index_put_({false}, y.index({false}));
         x[True] = y[True]                        | x.index_put_({true}, y.index({true}));
     """)),

     "Metadata and views": GroupedVariants(*parse_stmts(r"""
         Python                                   | C++
         ---------------------------------------- | ----------------------------------------
         # @setup                                 | // @setup
         x = torch.ones((4, 4))                   | auto x = torch::ones({4, 4});
                                                  |
         # @size                                  | // @size
         x.size()[0]                              | x.sizes()[0];
                                                  |
         # @stride                                | // @stride
         x.stride(0)                              | x.stride(0);
                                                  |
         # @as_strided                            | // @as_strided
         torch.as_strided(x, (2, 3), (4, 1), 2)   | torch::as_strided(x, {2, 3}, {4, 1}, 2);
                                                  |
         # @select                                | // @select
         x.select(1, 1)                           | x.select(1, 1);
                                                  |
         # @unsqueeze                             | // @unsqueeze
         x.unsqueeze(0)                           | x.unsqueeze(0);
                                                  |
         # @view                                  | // @view
         x.view(-1, 1)                            | x.view({-1, 1});
                                                  |
         # @transpose                             | // @transpose
         x.t()                                    | x.t();
                                                  |
         # @reshape                               | // @reshape
         x.reshape((16, 1))                       | x.reshape({16, 1});
     """)),

     "nn Modules": {
         py_constructor.split("(")[0]: GroupedModules(
             f"model = torch.nn.{py_constructor}",
             f"auto model = torch::nn::{cpp_constructor};",
             setup=setup.value,
             signature="f(x) -> y",
             torchscript=torchscript,
         )

         for setup, torchscript, (py_constructor, cpp_constructor) in (
             (Setup.TRIVIAL_4D, True, ("BatchNorm2d(4)",) * 2),
             (Setup.TRIVIAL_4D, True, ("GroupNorm(2, 4)",) * 2),
             (Setup.TRIVIAL_4D, True, (
                 "LayerNorm(4)",
                 "LayerNorm(torch::nn::LayerNormOptions({4}))"
             )),
             (Setup.TRIVIAL_3D, True, ("Conv1d(4, 4, 1)",) * 2),
             (Setup.TRIVIAL_4D, True, ("Conv2d(4, 4, 1)",) * 2),
             (Setup.TRIVIAL_4D, True, ("MaxPool2d(2)",) * 2),
             (Setup.TRIVIAL_2D, True, ("ReLU()",) * 2),
             (Setup.TRIVIAL_2D, True, ("Sigmoid()",) * 2),
             (Setup.TRIVIAL_4D, True, ("Linear(4, 2)",) * 2),

             # TODO: LSTM can't be TorchScript'd
             (Setup.TRIVIAL_3D, False, ("LSTM(4, 2)",) * 2),
         )
     },

     "training": {
         "simple": GroupedStmts(
             *parse_stmts(r"""
                 Python                                   | C++
                 ---------------------------------------- | ----------------------------------------
                 a0 = torch.nn.functional.relu(x * w0)    | auto a0 = torch::nn::functional::relu(x * w0);
                 y = a0 * w1                              | auto y = a0 * w1;
             """),
             Setup.TRAINING.value,
             num_threads=(1, 2),
             signature=r"f(x, w0, w1) -> y",
             torchscript=True,
             autograd=True,
         ),

         "ensemble": GroupedStmts(
             *parse_stmts(r"""
                 Python                                   | C++
                 ---------------------------------------- | ----------------------------------------
                 a0 = torch.nn.functional.gelu(x * w0)    | auto a0 = torch::nn::functional::gelu(x * w0);
                 a1 = torch.nn.functional.prelu(y, w1)    | auto a1 = torch::nn::functional::prelu(y, w1);
                 z = torch.nn.functional.normalize(       | auto z = torch::nn::functional::normalize(
                     torch.cat([a0, a1]),                 |     torch::cat({a0, a1}),
                     p=2.0, dim=0,                        |     torch::nn::functional::NormalizeFuncOptions().p(2).dim(0)
                 ).dot(w2)                                | ).dot(w2);
             """),
             Setup.TRAINING.value,
             num_threads=(1, 2),
             signature=r"f(x, y, w0, w1, w2) -> z",
             torchscript=True,
             autograd=True,
         ),
     },

     "InferenceMode": GroupedVariants(
         # In general, the mixed input scenario is less common so its
         # perf can be less important than pure inference tensor inputs.
         cpp_block=r"""
             // @Setup
             auto s = torch::ones({3, 3});  // Normal Tensor
             c10::InferenceMode guard;
             auto x = torch::ones({3, 3});  // Inference Tensor

             // @View
             torch::Tensor y = x.view({9});

             // @Inplace
             torch::Tensor y = x.mul_(x);

             // @Mixed
             torch::Tensor y = x + s;
         """
     ),
 })
	"""Default set of benchmarks.

	Parser notes:
	`parse_stmts`:
	- Width for the left (Python) column MUST be 40 characters.
	- The column separator is " \| ", not "\|". Whitespace matters.

	`GroupedVariants`:
	- `Setup` and `Global_Setup` (case insensitive) are reserved keywords
	to populate `setup` and `global_setup` for every generated benchmark.
	- To set a label for the succeeding block, add `# @YOUR_LABEL` (Python)
	or `// @YOUR_LABEL` (C++).
	"""

	from core.api import GroupedModules, GroupedStmts, GroupedVariants
	from core.types import FlatIntermediateDefinition
	from core.utils import flatten, parse_stmts
	from definitions.setup import Setup


	BENCHMARKS: FlatIntermediateDefinition = flatten({
	"Empty": {
	"no allocation": GroupedStmts(
	r"torch.empty(())",
	r"torch::empty({0});",
	),

	"with allocation": GroupedStmts(
	r"torch.empty((1,))",
	r"torch::empty({1});",
	),

	"overloads": GroupedVariants(
	cpp_block=r"""
	// @Setup
	auto options_empty = c10::TensorOptions();
	auto options_full = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU);
	auto optional_float = c10::make_optional(at::kFloat);

	// @TensorOptions overload
	at::empty({0}, options_empty);
	at::empty({0}, options_full);
	at::empty({0}, at::kFloat); // implicit conversion

	// @Faithful overload
	at::empty({0}, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
	at::empty({0}, at::kFloat, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
	at::empty({0}, optional_float, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
	"""
	),
	},

	"Pointwise": {
	"Math": GroupedVariants(*parse_stmts(r"""
	Python \| C++
	---------------------------------------- \| ----------------------------------------
	# @setup \| // @setup
	torch.manual_seed(138_10_23) \| torch::manual_seed(1381023);
	x = torch.rand((4, 4)) \| auto x = torch::rand({4, 4});
	y_float = torch.ones((4, 4)) \| auto y_float = torch::ones({4, 4});
	y_vector = torch.ones((4, 1)) \| auto y_vector = torch::ones({4, 1});
	y_int = torch.ones( \| auto y_int = torch::ones({4, 4}, at::kInt);
	(4, 4), dtype=torch.int32) \|
	\|
	# @add \| // @add
	x += 1.0 \| x += 1;
	x += y_float \| x += y_float;
	x += y_vector \| x += y_vector;
	x += y_int \| x += y_int;
	x + y_float \| x + y_float;
	torch.add(x, y_float) \| torch::add(x, y_float);
	torch.add(x, y_float, out=x) \| torch::add_out(/out=/x, x, y_float);
	\|
	# @multiply \| // @multiply
	x = 1.0 \| x = 1;
	x = y_float \| x = y_float;
	x = y_vector \| x = y_vector;
	x = y_int \| x = y_int;
	x * y_float \| x * y_float;
	torch.mul(x, y_float) \| torch::mul(x, y_float);
	torch.mul(x, y_float, out=x) \| torch::mul_out(/out=/x, x, y_float);
	\|
	# @equality \| // @equality
	x == y_float \| x == y_float;
	x == 1.0 \| x == 1.0;
	""")),

	"Data movement": GroupedVariants(*parse_stmts(r"""
	Python \| C++
	---------------------------------------- \| ----------------------------------------
	# @setup \| // @setup
	x = torch.ones((4, 4)) \| auto x = torch::ones({4, 4});
	y = torch.ones((4, 4)) \| auto y = torch::ones({4, 4});
	x_t = x.t() \| auto x_t = x.t();
	\|
	# @contiguous (trivial) \| // @contiguous (trivial)
	x.contiguous() \| x.contiguous();
	\|
	# @contiguous (non-trivial) \| // @contiguous (non-trivial)
	x_t.contiguous() \| x_t.contiguous();
	\|
	# @clone \| // @clone
	x.clone() \| x.clone();
	\|
	# @copy_ \| // @copy_
	x.copy_(y) \| x.copy_(y);
	\|
	# @zero_ \| // @zero_
	x.zero_() \| x.zero_();
	\|
	# @RNG \| // @RNG
	x.uniform_() \| x.uniform_();
	""")),
	},

	"Reduction": GroupedVariants(*parse_stmts(r"""
	Python \| C++
	---------------------------------------- \| ----------------------------------------
	# @setup \| // @setup
	x = torch.ones((4, 4)) \| auto x = torch::ones({4, 4});
	\|
	# @max \| // @max
	x.max() \| x.max();
	\|
	# @sum \| // @sum
	x.sum() \| x.sum();
	\|
	# @variance \| // @variance
	x.var(0) \| x.var(0);
	""")),

	"Indexing": GroupedVariants(*parse_stmts(r"""
	Python \| C++
	---------------------------------------- \| ----------------------------------------
	# @setup \| // @setup
	\| using namespace torch::indexing;
	torch.manual_seed(6626_10_34) \| torch::manual_seed(66261034);
	\|
	x = torch.randn(1, 1, 1) \| auto x = torch::randn({1, 1, 1});
	y = torch.randn(1, 1, 1) \| auto y = torch::randn({1, 1, 1});
	\|
	# @Tensor-Scalar \| // @Tensor-Scalar
	x[0] = 1 \| x.index_put_({0}, 1);
	x[0, 0] = 1 \| x.index_put_({0, 0}, 1);
	x[0, 0, 0] = 1 \| x.index_put_({0, 0, 0}, 1);
	\|
	# @Tensor-Scalar (Advanced) \| // @Tensor-Scalar (Advanced)
	x[...] = 1 \| x.index_put_({"..."}, 1);
	x[:] = 1 \| x.index_put_({Slice(None, None, None)}, 1);
	x[None] = 1 \| x.index_put_({None}, 1);
	x[False] = 1 \| x.index_put_({false}, 1);
	x[True] = 1 \| x.index_put_({true}, 1);
	\|
	# @Tensor-Tensor \| // @Tensor-Tensor
	x[0] = y[0] \| x.index_put_({0}, y.index({0}));
	x[0, 0] = y[0, 0] \| x.index_put_({0, 0}, y.index({0, 0}));
	x[0, 0, 0] = y[0, 0, 0] \| x.index_put_({0, 0, 0}, y.index({0, 0, 0}));
	\|
	# @Tensor-Tensor (Advanced) \| // @Tensor-Tensor (Advanced)
	x[...] = y[...] \| x.index_put_({"..."}, y.index({"..."}));
	x[:] = y[:] \| x.index_put_({Slice(None, None, None)}, y.index({Slice(None, None, None)}));
	x[None] = y[None] \| x.index_put_({None}, y.index({None}));
	x[False] = y[False] \| x.index_put_({false}, y.index({false}));
	x[True] = y[True] \| x.index_put_({true}, y.index({true}));
	""")),

	"Metadata and views": GroupedVariants(*parse_stmts(r"""
	Python \| C++
	---------------------------------------- \| ----------------------------------------
	# @setup \| // @setup
	x = torch.ones((4, 4)) \| auto x = torch::ones({4, 4});
	\|
	# @size \| // @size
	x.size()[0] \| x.sizes()[0];
	\|
	# @stride \| // @stride
	x.stride(0) \| x.stride(0);
	\|
	# @as_strided \| // @as_strided
	torch.as_strided(x, (2, 3), (4, 1), 2) \| torch::as_strided(x, {2, 3}, {4, 1}, 2);
	\|
	# @select \| // @select
	x.select(1, 1) \| x.select(1, 1);
	\|
	# @unsqueeze \| // @unsqueeze
	x.unsqueeze(0) \| x.unsqueeze(0);
	\|
	# @view \| // @view
	x.view(-1, 1) \| x.view({-1, 1});
	\|
	# @transpose \| // @transpose
	x.t() \| x.t();
	\|
	# @reshape \| // @reshape
	x.reshape((16, 1)) \| x.reshape({16, 1});
	""")),

	"nn Modules": {
	py_constructor.split("(")[0]: GroupedModules(
	f"model = torch.nn.{py_constructor}",
	f"auto model = torch::nn::{cpp_constructor};",
	setup=setup.value,
	signature="f(x) -> y",
	torchscript=torchscript,
	)

	for setup, torchscript, (py_constructor, cpp_constructor) in (
	(Setup.TRIVIAL_4D, True, ("BatchNorm2d(4)",) * 2),
	(Setup.TRIVIAL_4D, True, ("GroupNorm(2, 4)",) * 2),
	(Setup.TRIVIAL_4D, True, (
	"LayerNorm(4)",
	"LayerNorm(torch::nn::LayerNormOptions({4}))"
	)),
	(Setup.TRIVIAL_3D, True, ("Conv1d(4, 4, 1)",) * 2),
	(Setup.TRIVIAL_4D, True, ("Conv2d(4, 4, 1)",) * 2),
	(Setup.TRIVIAL_4D, True, ("MaxPool2d(2)",) * 2),
	(Setup.TRIVIAL_2D, True, ("ReLU()",) * 2),
	(Setup.TRIVIAL_2D, True, ("Sigmoid()",) * 2),
	(Setup.TRIVIAL_4D, True, ("Linear(4, 2)",) * 2),

	# TODO: LSTM can't be TorchScript'd
	(Setup.TRIVIAL_3D, False, ("LSTM(4, 2)",) * 2),
	)
	},

	"training": {
	"simple": GroupedStmts(
	*parse_stmts(r"""
	Python \| C++
	---------------------------------------- \| ----------------------------------------
	a0 = torch.nn.functional.relu(x * w0) \| auto a0 = torch::nn::functional::relu(x * w0);
	y = a0 * w1 \| auto y = a0 * w1;
	"""),
	Setup.TRAINING.value,
	num_threads=(1, 2),
	signature=r"f(x, w0, w1) -> y",
	torchscript=True,
	autograd=True,
	),

	"ensemble": GroupedStmts(
	*parse_stmts(r"""
	Python \| C++
	---------------------------------------- \| ----------------------------------------
	a0 = torch.nn.functional.gelu(x * w0) \| auto a0 = torch::nn::functional::gelu(x * w0);
	a1 = torch.nn.functional.prelu(y, w1) \| auto a1 = torch::nn::functional::prelu(y, w1);
	z = torch.nn.functional.normalize( \| auto z = torch::nn::functional::normalize(
	torch.cat([a0, a1]), \| torch::cat({a0, a1}),
	p=2.0, dim=0, \| torch::nn::functional::NormalizeFuncOptions().p(2).dim(0)
	).dot(w2) \| ).dot(w2);
	"""),
	Setup.TRAINING.value,
	num_threads=(1, 2),
	signature=r"f(x, y, w0, w1, w2) -> z",
	torchscript=True,
	autograd=True,
	),
	},

	"InferenceMode": GroupedVariants(
	# In general, the mixed input scenario is less common so its
	# perf can be less important than pure inference tensor inputs.
	cpp_block=r"""
	// @Setup
	auto s = torch::ones({3, 3}); // Normal Tensor
	c10::InferenceMode guard;
	auto x = torch::ones({3, 3}); // Inference Tensor

	// @View
	torch::Tensor y = x.view({9});

	// @Inplace
	torch::Tensor y = x.mul_(x);

	// @Mixed
	torch::Tensor y = x + s;
	"""
	),
	})