blob: aba17b98e72019e64ae50bee0f88aafdb8f853e5 [file] [log] [blame] [edit]
// Copyright 2022 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
#include <xnnpack.h>
#include <xnnpack/subgraph.h>
#include "runtime-tester.h"
#include <gtest/gtest.h>
namespace xnnpack {
TEST(ADD_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(4);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input1_id = 0;
uint32_t input2_id = 1;
uint32_t intermediate_id = 2;
uint32_t output_id = 3;
tester
.AddInputTensorF32({1, 2, 2, 3}, input1_id)
.AddInputTensorF32({1, 2, 2, 3}, input2_id)
.AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
.AddOutputTensorF32({1, 2, 2, 3}, output_id)
.AddAddition(input1_id, input2_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(AVERAGE_POOLING_2D_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(3);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input_id = 0;
uint32_t intermediate_id = 1;
uint32_t output_id = 2;
tester
.AddInputTensorF32({1, 10, 10, 3}, input_id)
.AddDynamicTensorF32({1, 9, 9, 3}, intermediate_id)
.AddOutputTensorF32({1, 9, 9, 3}, output_id)
.AddAveragePooling2D(0, 0, 0, 0, 2, 2, 1, 1, input_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(CLAMP_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(3);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input_id = 0;
uint32_t intermediate_id = 1;
uint32_t output_id = 2;
tester
.AddInputTensorF32({1, 10, 10, 3}, input_id)
.AddDynamicTensorF32({1, 10, 10, 3}, intermediate_id)
.AddOutputTensorF32({1, 10, 10, 3}, output_id)
.AddClamp(
-std::numeric_limits<float>::infinity(),
std::numeric_limits<float>::infinity(),
input_id,
intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(CONVOLUTION_2D_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(5);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input_id = 0;
uint32_t filter_id = 1;
uint32_t bias_id = 2;
uint32_t intermediate_id = 3;
uint32_t output_id = 4;
tester
.AddInputTensorF32({1, 256, 256, 3}, input_id)
.AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
.AddStaticTensorF32({32}, TensorType::kDense, bias_id)
.AddDynamicTensorF32({1, 128, 128, 32}, intermediate_id)
.AddOutputTensorF32({1, 128, 128, 32}, output_id)
.AddConvolution2D(
ConvolutionParams{
Padding{1, 1, 1, 1},
Kernel{3, 3},
Subsampling{2, 2},
Dilation{1, 1},
/*groups=*/ 1,
/*group_input_channels=*/ 3,
/*group_output_channels=*/ 32,
}, input_id, filter_id, bias_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(DIVIDE_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(4);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input1_id = 0;
uint32_t input2_id = 1;
uint32_t intermediate_id = 2;
uint32_t output_id = 3;
tester
.AddInputTensorF32({1, 2, 2, 3}, input1_id)
.AddInputTensorF32({1, 2, 2, 3}, input2_id)
.AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
.AddOutputTensorF32({1, 2, 2, 3}, output_id)
.AddDivide(input1_id, input2_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(DECONVOLUTION_2D_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(5);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input_id = 0;
uint32_t filter_id = 1;
uint32_t bias_id = 2;
uint32_t intermediate_id = 3;
uint32_t output_id = 4;
tester
.AddInputTensorF32({1, 128, 128, 3}, input_id)
.AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
.AddStaticTensorF32({32}, TensorType::kDense, bias_id)
.AddDynamicTensorF32({1, 255, 255, 32}, intermediate_id)
.AddOutputTensorF32({1, 255, 255, 32}, output_id)
.AddDeconvolution2D(
DeconvolutionParams{
Padding{1, 1, 1, 1},
Adjustment{0, 0},
Kernel{3, 3},
Upsampling{2, 2},
Dilation{1, 1},
/*groups=*/ 1,
/*group_input_channels=*/ 3,
/*groups_output_channels*/ 32
}, input_id, filter_id, bias_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(DEPTHWISE_CONVOLUTION_2D_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(5);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input_id = 0;
uint32_t filter_id = 1;
uint32_t bias_id = 2;
uint32_t intermediate_id = 3;
uint32_t output_id = 4;
tester
.AddInputTensorF32({1, 128, 128, 4}, input_id)
.AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
.AddStaticTensorF32({4}, TensorType::kDense, bias_id)
.AddDynamicTensorF32({1, 128, 128, 4}, intermediate_id)
.AddOutputTensorF32({1, 128, 128, 4}, output_id)
.AddDepthwiseConvolution2D(
DepthwiseConvolutionParams{
Padding{1, 1, 1, 1},
Kernel{3, 3},
Subsampling{1, 1},
Dilation{1, 1},
/*depth_multiplier=*/ 1,
/*input_channels=*/ 4
}, input_id, filter_id, bias_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(FULLY_CONNECTED_2D_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(5);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input_id = 0;
uint32_t filter_id = 1;
uint32_t bias_id = 2;
uint32_t intermediate_id = 3;
uint32_t output_id = 4;
tester
.AddInputTensorF32({5, 3}, input_id)
.AddStaticTensorF32({7, 3}, TensorType::kDense, filter_id)
.AddStaticTensorF32({7}, TensorType::kDense, bias_id)
.AddDynamicTensorF32({5, 7}, intermediate_id)
.AddOutputTensorF32({5, 7}, output_id)
.AddFullyConnected(input_id, filter_id, bias_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(MULTIPLY_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(4);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input1_id = 0;
uint32_t input2_id = 1;
uint32_t intermediate_id = 2;
uint32_t output_id = 3;
tester
.AddInputTensorF32({1, 2, 2, 3}, input1_id)
.AddInputTensorF32({1, 2, 2, 3}, input2_id)
.AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
.AddOutputTensorF32({1, 2, 2, 3}, output_id)
.AddMultiply(input1_id, input2_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(MAX_POOLING_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(3);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input_id = 0;
uint32_t intermediate_id = 1;
uint32_t output_id = 2;
tester
.AddInputTensorF32({1, 10, 10, 3}, input_id)
.AddDynamicTensorF32({1, 9, 9, 3}, intermediate_id)
.AddOutputTensorF32({1, 9, 9, 3}, output_id)
.AddMaxPooling2D(0, 0, 0, 0, 2, 2, 1, 1, 1, 1, input_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(SUBTRACT_THEN_CLAMP, fusion) {
auto tester = RuntimeTester(4);
float output_min = -0.5f;
float output_max = 0.5f;
uint32_t input1_id = 0;
uint32_t input2_id = 1;
uint32_t intermediate_id = 2;
uint32_t output_id = 3;
tester
.AddInputTensorF32({1, 2, 2, 3}, input1_id)
.AddInputTensorF32({1, 2, 2, 3}, input2_id)
.AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id)
.AddOutputTensorF32({1, 2, 2, 3}, output_id)
.AddSubtract(input1_id, input2_id, intermediate_id)
.AddClamp(output_min, output_max, intermediate_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->activation.output_min, output_min);
ASSERT_EQ(tester.Node(0)->activation.output_max, output_max);
ASSERT_EQ(tester.Node(0)->outputs[0], output_id);
ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(CONSTANT_PAD_THEN_CONVOLUTION, fusion) {
auto tester = RuntimeTester(5);
uint32_t input_id = 0;
uint32_t intermediate_id = 1;
uint32_t filter_id = 2;
uint32_t bias_id = 3;
uint32_t output_id = 4;
size_t pre_paddings[4] = {0, 2, 4, 0};
size_t post_paddings[4] = {0, 6, 8, 0};
float padding_value = 0.0f;
tester
.AddInputTensorF32({1, 254, 254, 3}, input_id)
.AddDynamicTensorF32({1, 262, 266, 3}, intermediate_id)
.AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
.AddStaticTensorF32({32}, TensorType::kDense, bias_id)
.AddOutputTensorF32({1, 131, 133, 32}, output_id)
.AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
.AddConvolution2D(
ConvolutionParams{
Padding{0, 0, 0, 0},
Kernel{3, 3},
Subsampling{2, 2},
Dilation{1, 1},
/*groups=*/ 1,
/*group_input_channels=*/ 3,
/*group_output_channels=*/ 32,
}, intermediate_id, filter_id, bias_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_top, 2);
ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_left, 4);
ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_right, 8);
ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_bottom, 6);
ASSERT_EQ(tester.Node(1)->outputs[0], output_id);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(CONSTANT_PAD_THEN_CONVOLUTION, not_fused_due_to_non_zero_padding_in_n_dimension) {
auto tester = RuntimeTester(5);
uint32_t input_id = 0;
uint32_t intermediate_id = 1;
uint32_t filter_id = 2;
uint32_t bias_id = 3;
uint32_t output_id = 4;
// Non-zero pre-padding in the N or C dimension.
size_t pre_paddings[4] = {1, 2, 4, 0};
size_t post_paddings[4] = {0, 6, 8, 0};
float padding_value = 0.0f;
tester
.AddInputTensorF32({1, 254, 254, 3}, input_id)
.AddDynamicTensorF32({2, 262, 266, 3}, intermediate_id)
.AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
.AddStaticTensorF32({32}, TensorType::kDense, bias_id)
.AddOutputTensorF32({2, 131, 133, 32}, output_id)
.AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
.AddConvolution2D(
ConvolutionParams{
Padding{0, 0, 0, 0},
Kernel{3, 3},
Subsampling{2, 2},
Dilation{1, 1},
/*groups=*/ 1,
/*group_input_channels=*/ 3,
/*group_output_channels=*/ 32,
}, intermediate_id, filter_id, bias_id, output_id)
.Optimize();
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
}
TEST(CONSTANT_PAD_THEN_CONVOLUTION, not_fused_due_to_padding_value_not_zero) {
auto tester = RuntimeTester(5);
uint32_t input_id = 0;
uint32_t intermediate_id = 1;
uint32_t filter_id = 2;
uint32_t bias_id = 3;
uint32_t output_id = 4;
size_t pre_paddings[4] = {0, 2, 4, 0};
size_t post_paddings[4] = {0, 6, 8, 0};
float padding_value = 1.0f;
tester
.AddInputTensorF32({1, 254, 254, 3}, input_id)
.AddDynamicTensorF32({2, 262, 266, 3}, intermediate_id)
.AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id)
.AddStaticTensorF32({32}, TensorType::kDense, bias_id)
.AddOutputTensorF32({2, 131, 133, 32}, output_id)
.AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
.AddConvolution2D(
ConvolutionParams{
Padding{0, 0, 0, 0},
Kernel{3, 3},
Subsampling{2, 2},
Dilation{1, 1},
/*groups=*/ 1,
/*group_input_channels=*/ 3,
/*group_output_channels=*/ 32,
}, intermediate_id, filter_id, bias_id, output_id)
.Optimize();
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
}
TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, fusion) {
auto tester = RuntimeTester(5);
uint32_t input_id = 0;
uint32_t intermediate_id = 1;
uint32_t filter_id = 2;
uint32_t bias_id = 3;
uint32_t output_id = 4;
size_t pre_paddings[4] = {0, 2, 4, 0};
size_t post_paddings[4] = {0, 6, 8, 0};
float padding_value = 0.0f;
tester
.AddInputTensorF32({1, 128, 128, 4}, input_id)
.AddDynamicTensorF32({1, 136, 140, 4}, intermediate_id)
.AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
.AddStaticTensorF32({4}, TensorType::kDense, bias_id)
.AddOutputTensorF32({1, 134, 140, 4}, output_id)
.AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
.AddDepthwiseConvolution2D(
DepthwiseConvolutionParams{
Padding{0, 0, 0, 0},
Kernel{3, 3},
Subsampling{1, 1},
Dilation{1, 1},
/*depth_multiplier=*/ 1,
/*input_channels=*/ 4
}, intermediate_id, filter_id, bias_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 1);
ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_invalid);
ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_top, 2);
ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_left, 4);
ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_right, 8);
ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_bottom, 6);
ASSERT_EQ(tester.Node(1)->outputs[0], output_id);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, not_fused_due_to_non_zero_padding_in_n_dimension) {
auto tester = RuntimeTester(5);
uint32_t input_id = 0;
uint32_t intermediate_id = 1;
uint32_t filter_id = 2;
uint32_t bias_id = 3;
uint32_t output_id = 4;
// Non-zero pre-padding in the N or C dimension.
size_t pre_paddings[4] = {1, 2, 4, 0};
size_t post_paddings[4] = {0, 6, 8, 0};
float padding_value = 0.0f;
tester
.AddInputTensorF32({1, 128, 128, 4}, input_id)
.AddDynamicTensorF32({2, 136, 140, 4}, intermediate_id)
.AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
.AddStaticTensorF32({4}, TensorType::kDense, bias_id)
.AddOutputTensorF32({2, 134, 140, 4}, output_id)
.AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
.AddDepthwiseConvolution2D(
DepthwiseConvolutionParams{
Padding{0, 0, 0, 0},
Kernel{3, 3},
Subsampling{1, 1},
Dilation{1, 1},
/*depth_multiplier=*/ 1,
/*input_channels=*/ 4
}, intermediate_id, filter_id, bias_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
ASSERT_EQ(unoptimized_output, optimized_output);
}
TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, not_fused_due_to_padding_value_not_zero) {
auto tester = RuntimeTester(5);
uint32_t input_id = 0;
uint32_t intermediate_id = 1;
uint32_t filter_id = 2;
uint32_t bias_id = 3;
uint32_t output_id = 4;
size_t pre_paddings[4] = {0, 2, 4, 0};
size_t post_paddings[4] = {0, 6, 8, 0};
float padding_value = 1.0f;
tester
.AddInputTensorF32({1, 128, 128, 4}, input_id)
.AddDynamicTensorF32({1, 136, 140, 4}, intermediate_id)
.AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id)
.AddStaticTensorF32({4}, TensorType::kDense, bias_id)
.AddOutputTensorF32({1, 134, 140, 4}, output_id)
.AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id)
.AddDepthwiseConvolution2D(
DepthwiseConvolutionParams{
Padding{0, 0, 0, 0},
Kernel{3, 3},
Subsampling{1, 1},
Dilation{1, 1},
/*depth_multiplier=*/ 1,
/*input_channels=*/ 4
}, intermediate_id, filter_id, bias_id, output_id);
std::vector<float> unoptimized_output = tester.RunWithoutFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
std::vector<float> optimized_output = tester.RunWithFusion<float>();
ASSERT_EQ(tester.NumOperators(), 2);
ASSERT_EQ(unoptimized_output, optimized_output);
}
} // namespace xnnpack