| #define TORCH_ASSERT_ONLY_METHOD_OPERATORS |
| #include <ATen/core/Tensor.h> |
| #include <ATen/Dispatch.h> |
| #include <ATen/TensorUtils.h> |
| |
| #include <ATen/native/im2col.h> |
| #include <ATen/native/im2col_shape_check.h> |
| #include <c10/util/irange.h> |
| |
| #ifndef AT_PER_OPERATOR_HEADERS |
| #include <ATen/Functions.h> |
| #include <ATen/NativeFunctions.h> |
| #else |
| #include <ATen/ops/col2im_native.h> |
| #include <ATen/ops/empty_like.h> |
| #include <ATen/ops/im2col_native.h> |
| #endif |
| |
| // Note [im2col/col2im output padding] |
| // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| // Our implementations of im2col and col2im take both the input height/width as |
| // well as a seemingly redundant output height/width. In principle, you could |
| // compute the output height/width by using the convolution shape formulas. So, |
| // what's up with that? |
| // |
| // The trouble arises when one runs the backward of a transposed convolution |
| // with output_padding >= stride. (BTW, output_padding is known as adj inside |
| // THNN.) Let's consider a simple case where we have kernel=2, dilation=2, |
| // stride=1, output_padding=1 for a 4x4 input: |
| // |
| // Input: X |
| // |
| // Output: X.X. |
| // .... |
| // X.X. |
| // .... |
| // |
| // If we compute backwards of output with a standard convolution on the output |
| // with the same parameters, we would end up with a 2x2 grad_input (because you |
| // can slide the stencil over to the right once and down once). But that is all |
| // out-of-bounds if you're computing backwards for a 1x1 input. |
| // |
| // "Now Edward," you might say, "the real problem is that you set output_padding |
| // >= stride, surely an error should have been raised in this case." To |
| // understand why it is useful to handle this case, we have to understand how we |
| // compute the weight gradient of a convolution. Suppose we have a convolution |
| // with kernel=2, stride=2 on a 5x5 input. Let us see all the contributions of |
| // weight[0][0] (which we have labeled w) in the output: |
| // |
| // Input: a.b.. Weight: w. |
| // ..... .. |
| // c.d.. |
| // ..... |
| // ..... |
| // |
| // Output: [ aw+... bw+... ] |
| // [ cw+... dw+... ] |
| // |
| // From this diagram, it easy to see that we can compute the weight gradient |
| // by performing a *dilated* convolution between the input and the |
| // output gradients with kernel=2, dilation=2, stride=1. But there's a rub: if |
| // we do a dilated convolution directly, we'll end up with a 3x3 weight |
| // gradient, when we clearly wanted a 2x2. So how do we avoid going out |
| // of bounds? We could add a notion of 'output_padding' for non-transposed |
| // convolution, but another simple and effective fix is to just accept |
| // the desired output size directly, and compute only within those bounds. |
| // |
| // |
| // ALSO do vol2col |
| |
| namespace at::native { |
| namespace { |
| |
| static void col2im_out_cpu_template( |
| Tensor& output, |
| const Tensor& input_, |
| IntArrayRef output_size, |
| IntArrayRef kernel_size, |
| IntArrayRef dilation, |
| IntArrayRef padding, |
| IntArrayRef stride) { |
| TORCH_CHECK( |
| output_size.size() == 2, |
| "It is expected output_size equals to 2, but got size ", |
| output_size.size()); |
| |
| TORCH_CHECK( |
| kernel_size.size() == 2, |
| "It is expected kernel_size equals to 2, but got size ", |
| kernel_size.size()); |
| |
| TORCH_CHECK( |
| dilation.size() == 2, |
| "It is expected dilation equals to 2, but got size ", |
| dilation.size()); |
| |
| TORCH_CHECK( |
| padding.size() == 2, |
| "It is expected padding equals to 2, but got size ", |
| padding.size()); |
| |
| TORCH_CHECK( |
| stride.size() == 2, |
| "It is expected stride equals to 2, but got size ", |
| stride.size()); |
| |
| int64_t output_height = output_size[0]; |
| int64_t output_width = output_size[1]; |
| int64_t kernel_height = kernel_size[0]; |
| int64_t kernel_width = kernel_size[1]; |
| int64_t dilation_height = dilation[0]; |
| int64_t dilation_width = dilation[1]; |
| int64_t pad_height = padding[0]; |
| int64_t pad_width = padding[1]; |
| int64_t stride_height = stride[0]; |
| int64_t stride_width = stride[1]; |
| |
| col2im_shape_check( |
| input_, |
| Tensor(), |
| output_height, |
| output_width, |
| kernel_height, |
| kernel_width, |
| dilation_height, |
| dilation_width, |
| pad_height, |
| pad_width, |
| stride_height, |
| stride_width); |
| |
| Tensor input = input_.contiguous(); |
| |
| bool batched_input = true; |
| if (input.dim() == 2) { |
| // Force batch |
| batched_input = false; |
| input = input.view({1, input.size(0), input.size(1)}); |
| } |
| |
| int64_t batch_size = input.size(0); |
| int64_t n_input_plane = input.size(1); |
| int64_t n_output_plane = n_input_plane / (kernel_width * kernel_height); |
| |
| output.resize_({batch_size, n_output_plane, output_height, output_width}); |
| |
| AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kBFloat16, kHalf, |
| input.scalar_type(), "col2im_out_cpu", [&] { |
| Tensor input_n = Tensor(); |
| Tensor output_n = Tensor(); |
| |
| int64_t height_col = (output_height + 2 * pad_height - |
| (dilation_height * (kernel_height - 1) + 1)) / |
| stride_height + |
| 1; |
| int64_t width_col = (output_width + 2 * pad_width - |
| (dilation_width * (kernel_width - 1) + 1)) / |
| stride_width + |
| 1; |
| |
| for (const auto elt : c10::irange(batch_size)) { |
| input_n = input.select(0, elt); |
| output_n = output.select(0, elt); |
| |
| col2im<scalar_t>( |
| input_n.const_data_ptr<scalar_t>(), |
| n_output_plane, |
| output_height, |
| output_width, |
| height_col, |
| width_col, |
| kernel_height, |
| kernel_width, |
| pad_height, |
| pad_width, |
| stride_height, |
| stride_width, |
| dilation_height, |
| dilation_width, |
| output_n.mutable_data_ptr<scalar_t>()); |
| } |
| |
| if (!batched_input) { |
| output.resize_({n_output_plane, output_height, output_width}); |
| } |
| }); |
| } |
| |
| } // namespace |
| |
| Tensor& col2im_out_cpu(const Tensor& input, |
| IntArrayRef output_size, |
| IntArrayRef kernel_size, |
| IntArrayRef dilation, |
| IntArrayRef padding, |
| IntArrayRef stride, |
| Tensor& output) { |
| col2im_out_cpu_template( |
| output, input, output_size, kernel_size, dilation, padding, stride); |
| return output; |
| } |
| |
| Tensor col2im_cpu( |
| const Tensor& input, |
| IntArrayRef output_size, |
| IntArrayRef kernel_size, |
| IntArrayRef dilation, |
| IntArrayRef padding, |
| IntArrayRef stride) { |
| Tensor output = at::empty_like(input, LEGACY_CONTIGUOUS_MEMORY_FORMAT); |
| |
| col2im_out_cpu_template( |
| output, input, output_size, kernel_size, dilation, padding, stride); |
| return output; |
| } |
| |
| } // namespace at::native |