| #ifndef LSTM_OP_H_ |
| #define LSTM_OP_H_ |
| |
| #include <algorithm> |
| #include <sstream> |
| #include <unordered_map> |
| #include <vector> |
| #include "caffe2/core/blob_serialization.h" |
| #include "caffe2/core/export_caffe2_op_to_c10.h" |
| #include <c10/util/irange.h> |
| #include "caffe2/core/operator.h" |
| #include "caffe2/core/tensor.h" |
| #include "caffe2/utils/eigen_utils.h" |
| #include "caffe2/utils/math.h" |
| #include "lstm_utils.h" |
| |
| C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(LSTMOp); |
| |
| namespace caffe2 { |
| namespace { |
| |
| using t_tuple = std::tuple<Tensor, Tensor>; |
| |
| struct CellParams { |
| CellParams( |
| const Tensor& _w_ih, |
| const Tensor& _w_hh, |
| const Tensor& _b_ih, |
| const Tensor& _b_hh, |
| CPUContext* _context) { |
| initParams(_w_ih, _w_hh, _b_ih, _b_hh, _context); |
| } |
| |
| CellParams(const CellParams& rhs) { |
| initParams(rhs.w_ih, rhs.w_hh, rhs.b_ih, rhs.b_hh, rhs.context); |
| } |
| |
| CellParams& operator=(const CellParams& rhs) { |
| initParams(rhs.w_ih, rhs.w_hh, rhs.b_ih, rhs.b_hh, rhs.context); |
| return *this; |
| } |
| |
| void initParams( |
| const Tensor& _w_ih, |
| const Tensor& _w_hh, |
| const Tensor& _b_ih, |
| const Tensor& _b_hh, |
| CPUContext* _context) { |
| w_ih = copy_ctor(_w_ih); |
| w_hh = copy_ctor(_w_hh); |
| b_ih = copy_ctor(_b_ih); |
| b_hh = copy_ctor(_b_hh); |
| context = _context; |
| } |
| |
| Tensor w_ih; |
| Tensor w_hh; |
| Tensor b_ih; /* optional */ |
| Tensor b_hh; /* optional */ |
| CPUContext* context; |
| |
| Tensor linear_ih(const Tensor& input) const { |
| return linear(input, w_ih, b_ih, context); |
| } |
| Tensor linear_hh(const Tensor& h) const { |
| return linear(h, w_hh, b_hh, context); |
| } |
| }; |
| |
| struct LSTMCell { |
| explicit LSTMCell(CPUContext* context) : context_(context) {} |
| t_tuple operator()( |
| const Tensor& input, |
| const t_tuple& hidden, |
| const CellParams& params) const { |
| const auto& hx = std::get<0>(hidden); |
| const auto& cx = std::get<1>(hidden); |
| auto linear_ih = params.linear_ih(input); |
| auto linear_hh = params.linear_hh(hx); |
| auto gates = add(linear_ih, linear_hh, context_); |
| auto chunked_gates = chunk(gates, 4, 1, context_); |
| auto ingate = sigmoid(chunked_gates[0]); |
| auto forgetgate = sigmoid(chunked_gates[1]); |
| auto cellgate = tanh(chunked_gates[2], context_); |
| auto outgate = sigmoid(chunked_gates[3]); |
| |
| auto cy = |
| add(mul(forgetgate, cx, context_), |
| mul(ingate, cellgate, context_), |
| context_); |
| auto hy = mul(outgate, tanh(cy, context_), context_); |
| return std::make_tuple(std::move(hy), std::move(cy)); |
| } |
| CPUContext* context_; |
| }; |
| |
| template <typename output_type, typename hidden_type> |
| struct LayerOutput { |
| output_type outputs; |
| hidden_type final_hidden; |
| |
| LayerOutput(const output_type& _outputs, const hidden_type& _hidden) { |
| outputs = copy_ctor(_outputs); |
| final_hidden = copy_ctor(_hidden); |
| } |
| }; |
| |
| template <typename hidden_type, typename param_type> |
| struct Layer { |
| using output_type = LayerOutput<Tensor, hidden_type>; |
| virtual ~Layer() {} |
| virtual output_type operator()( |
| const Tensor& input, |
| const hidden_type& input_hidden, |
| const param_type& params) const = 0; |
| }; |
| |
| struct FullLSTMLayer : Layer<t_tuple, CellParams> { |
| FullLSTMLayer(LSTMCell& cell, CPUContext* context) |
| : cell_(cell), context_(context) {} |
| |
| LayerOutput<std::vector<Tensor>, t_tuple> operator()( |
| const std::vector<Tensor>& step_inputs, |
| const std::tuple<Tensor, Tensor>& input_hidden, |
| const CellParams& params) const { |
| std::vector<Tensor> step_outputs; |
| auto hidden = copy_ctor(input_hidden); |
| |
| for (const auto i : c10::irange(step_inputs.size())) { |
| hidden = cell_(step_inputs[i], hidden, params); |
| step_outputs.push_back(copy_ctor(std::get<0>(hidden))); |
| } |
| |
| return {step_outputs, hidden}; |
| } |
| |
| LayerOutput<Tensor, t_tuple> operator()( |
| const Tensor& inputs, |
| const std::tuple<Tensor, Tensor>& input_hidden, |
| const CellParams& params) const override { |
| auto unstacked_output = |
| (*this)(unbind(inputs, 0, context_), input_hidden, params); |
| return {stack(unstacked_output.outputs, 0, context_), |
| unstacked_output.final_hidden}; |
| } |
| LSTMCell cell_; |
| CPUContext* context_; |
| }; |
| |
| struct FullBidirectionalLSTMLayer |
| : Layer<std::pair<t_tuple, t_tuple>, std::pair<CellParams, CellParams>> { |
| using bidir_hidden_type = std::pair<t_tuple, t_tuple>; |
| using param_type = std::pair<CellParams, CellParams>; |
| using output_type = LayerOutput<Tensor, bidir_hidden_type>; |
| |
| FullBidirectionalLSTMLayer(LSTMCell& cell, CPUContext* context) |
| : layer_(cell, context), context_(context) {} |
| |
| output_type operator()( |
| const Tensor& input, |
| const bidir_hidden_type& input_hidden, |
| const param_type& params) const override { |
| std::vector<Tensor> outputs; |
| auto step_inputs = unbind(input, 0, context_); |
| auto fw_result = layer_(step_inputs, input_hidden.first, params.first); |
| auto fw_output = stack(fw_result.outputs, 0, context_); |
| outputs.push_back(copy_ctor(fw_output)); |
| auto rev_step_inputs = reverse(std::move(step_inputs)); |
| auto rev_result = |
| layer_(rev_step_inputs, input_hidden.second, params.second); |
| std::reverse(rev_result.outputs.begin(), rev_result.outputs.end()); |
| auto rev_output = stack(rev_result.outputs, 0, context_); |
| outputs.push_back(copy_ctor(rev_output)); |
| return {cat(outputs, fw_output.dim() - 1, context_), |
| std::make_pair( |
| std::move(fw_result.final_hidden), |
| std::move(rev_result.final_hidden))}; |
| } |
| |
| inline std::vector<Tensor> reverse(std::vector<Tensor>&& x) const { |
| std::reverse(x.begin(), x.end()); |
| return std::move(x); |
| } |
| |
| private: |
| FullLSTMLayer layer_; |
| CPUContext* context_; |
| }; |
| |
| template <typename hidden_type, typename weight_type> |
| LayerOutput<Tensor, std::vector<hidden_type>> apply_layer_stack( |
| const Layer<hidden_type, weight_type>& layer, |
| const Tensor& input, |
| const std::vector<hidden_type>& hiddens, |
| const std::vector<weight_type>& weights, |
| int64_t num_layers) { |
| CAFFE_ENFORCE( |
| num_layers == hiddens.size(), |
| "Expected more hidden states in stacked_rnn"); |
| CAFFE_ENFORCE( |
| num_layers == weights.size(), "Expected more weights in stacked_rnn"); |
| |
| auto layer_input = input.UnsafeSharedInstance(); |
| auto hidden_it = hiddens.begin(); |
| auto weight_it = weights.begin(); |
| std::vector<hidden_type> final_hiddens(num_layers); |
| for (const auto l : c10::irange(num_layers)) { |
| auto layer_output = layer(layer_input, *(hidden_it++), *(weight_it++)); |
| final_hiddens.at(l) = std::move(layer_output.final_hidden); |
| layer_input = std::move(layer_output.outputs); |
| } |
| return {layer_input, final_hiddens}; |
| } |
| |
| std::tuple<Tensor, Tensor, Tensor> _lstm_impl( |
| const Tensor& input, |
| const std::vector<CellParams>& params, |
| const Tensor& hx, |
| const Tensor& cx, |
| int64_t num_layers, |
| bool bidirectional, |
| CPUContext* context) { |
| using stack_output = LayerOutput<Tensor, std::vector<t_tuple>>; |
| auto layer_hx = unbind(hx, 0, context); |
| auto layer_cx = unbind(cx, 0, context); |
| int64_t total_layers = layer_hx.size(); |
| std::vector<std::tuple<Tensor, Tensor>> hiddens; |
| hiddens.reserve(total_layers); |
| for (const auto i : c10::irange(total_layers)) { |
| hiddens.emplace_back(std::move(layer_hx[i]), std::move(layer_cx[i])); |
| } |
| LSTMCell cell(context); |
| std::shared_ptr<stack_output> stack_output_ptr; |
| if (bidirectional) { |
| auto bidir_result = apply_layer_stack( |
| FullBidirectionalLSTMLayer{cell, context}, |
| input, |
| pair_vec(hiddens), |
| pair_vec(params), |
| num_layers); |
| stack_output_ptr.reset(new stack_output( |
| bidir_result.outputs, |
| unpair_vec(std::move(bidir_result.final_hidden)))); |
| } else { |
| auto result = apply_layer_stack( |
| FullLSTMLayer{cell, context}, input, hiddens, params, num_layers); |
| stack_output_ptr = std::make_shared<stack_output>(std::move(result)); |
| } |
| |
| std::vector<Tensor> hy, cy; |
| hy.reserve(total_layers); |
| cy.reserve(total_layers); |
| for (auto& hidden : stack_output_ptr->final_hidden) { |
| hy.push_back(std::move(std::get<0>(hidden))); |
| cy.push_back(std::move(std::get<1>(hidden))); |
| } |
| return std::make_tuple( |
| std::move(stack_output_ptr->outputs), |
| stack(hy, 0, context), |
| stack(cy, 0, context)); |
| } |
| |
| // Parses a flat list of parameter tensors into a list of CellParams |
| std::vector<CellParams> gather_params( |
| const std::vector<Tensor>& params, |
| bool has_biases, |
| CPUContext* context) { |
| Tensor undefined; |
| std::vector<CellParams> result; |
| if (has_biases) { |
| CAFFE_ENFORCE_EQ( |
| params.size() % 4, 0, "got an incorrect number of LSTM parameters"); |
| for (size_t i = 0; i < params.size(); i += 4) { |
| result.emplace_back( |
| params[i], params[i + 1], params[i + 2], params[i + 3], context); |
| } |
| } else { |
| CAFFE_ENFORCE_EQ( |
| params.size() % 2, 0, "got an incorrect number of LSTM parameters"); |
| for (size_t i = 0; i < params.size(); i += 2) { |
| result.emplace_back( |
| params[i], params[i + 1], undefined, undefined, context); |
| } |
| } |
| return result; |
| } |
| |
| class InferenceLSTMOp : public Operator<CPUContext> { |
| public: |
| template <class... Args> |
| explicit InferenceLSTMOp(Args&&... args) |
| : Operator(std::forward<Args>(args)...), |
| num_layers_(this->template GetSingleArgument<int64_t>("num_layers", 1)), |
| bidirectional_( |
| this->template GetSingleArgument<bool>("bidirectional", false)), |
| has_biases_(this->template GetSingleArgument<bool>("has_biases", true)), |
| batch_first_( |
| this->template GetSingleArgument<bool>("batch_first", false)) {} |
| |
| bool RunOnDevice() override; |
| |
| protected: |
| int64_t num_layers_; |
| bool bidirectional_; |
| bool has_biases_; |
| bool batch_first_; |
| }; |
| |
| } // namespace |
| } // namespace caffe2 |
| #endif // LSTM_OP_H_ |