blob: 263c96b68572efd42ce837aae055ee71a089e094 [file] [log] [blame]
#include "caffe2/operators/unsafe_coalesce.h"
namespace caffe2 {
OPERATOR_SCHEMA(UnsafeCoalesce)
.NumInputsOutputs([](int inputs, int outputs) {
return inputs + 1 == outputs;
})
.AllowInplace([](int input, int output) { return input == output; })
.SetDoc(R"DOC(
Coalesce the N inputs into N outputs and a single coalesced output blob.
This allows operations that operate over multiple small kernels (e.g.
biases in a deep CNN) to be coalesced into a single larger operation,
amortizing the kernel launch overhead, synchronization costs for
distributed computation, etc.
The operator:
- computes the total size of the coalesced blob by summing the input sizes
- allocates the coalesced output blob as the total size
- copies the input vectors into the coalesced blob, at the correct offset.
- aliases each Output(i) to- point into the coalesced blob, at the corresponding offset for Input(i).
This is 'unsafe' as the output vectors are aliased, so use with
caution.
)DOC");
REGISTER_CPU_OPERATOR(UnsafeCoalesce, UnsafeCoalesceOp<CPUContext>);
} // namespace caffe2