| // Copyright 2015 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // unpack.h: unpacking the result blocks computed by compute.h, |
| // storing them into the destination matrix. |
| |
| #ifndef GEMMLOWP_INTERNAL_UNPACK_H_ |
| #define GEMMLOWP_INTERNAL_UNPACK_H_ |
| |
| #include "internal/allocator.h" |
| #include "internal/block_params.h" |
| #include "internal/pack.h" |
| |
| #ifdef GEMMLOWP_NEON |
| #include "internal/unpack_neon.h" |
| #endif |
| |
| namespace gemmlowp { |
| |
| class PackedResultInt32 { |
| Allocator* allocator_; |
| Allocator::Handle matrix_handle_; |
| const BlockParams& block_params_; |
| |
| public: |
| PackedResultInt32(Allocator* _allocator, const BlockParams& _block_params) |
| : allocator_(_allocator), block_params_(_block_params) { |
| matrix_handle_ = allocator_->Reserve<std::int32_t>(block_params_.l2_rows * |
| block_params_.l2_cols); |
| } |
| |
| ~PackedResultInt32() {} |
| |
| MatrixMap<std::int32_t, MapOrder::ColMajor> Map() { |
| return MatrixMap<std::int32_t, MapOrder::ColMajor>( |
| allocator_->GetPointer<std::int32_t>(matrix_handle_), |
| block_params_.l2_rows, block_params_.l2_cols, block_params_.l2_rows); |
| } |
| |
| MatrixMap<const std::int32_t, MapOrder::ColMajor> Map() const { |
| return MatrixMap<const std::int32_t, MapOrder::ColMajor>( |
| allocator_->GetPointer<const std::int32_t>(matrix_handle_), |
| block_params_.l2_rows, block_params_.l2_cols, block_params_.l2_rows); |
| } |
| }; |
| |
| template <typename ResultBlockType, typename PackedResult, |
| typename KernelLhsFormat, typename KernelRhsFormat> |
| void UnpackResultImpl(ResultBlockType* dst, const PackedResult& src, |
| const PackedSideBlock<KernelLhsFormat>& packed_lhs, |
| const PackedSideBlock<KernelRhsFormat>& packed_rhs, |
| int depth, std::int32_t result_offset, |
| std::int32_t result_mult_int, std::int32_t result_shift) { |
| std::int32_t rank0update = packed_lhs.rank_one_update_multiplier() * |
| packed_rhs.rank_one_update_multiplier() * depth; |
| // No top-level blocking in the depth dimension at the moment. |
| // Too much loss of precision. |
| for (int c = 0; c < dst->cols(); c++) { |
| for (int r = 0; r < dst->rows(); r++) { |
| std::int32_t q = *src.data(r, c); |
| q += packed_lhs.rank_one_update()[r] + packed_rhs.rank_one_update()[c] + |
| rank0update; |
| q = ((q + result_offset) * result_mult_int + (1 << (result_shift - 1))) >> |
| result_shift; |
| (*dst)(r, c) = q > 255 ? 255 : q < 0 ? 0 : q; |
| } |
| } |
| } |
| |
| template <typename ResultBlockType, typename PackedResult, |
| typename KernelLhsFormat, typename KernelRhsFormat> |
| void UnpackResult(ResultBlockType* dst, const PackedResult& src, |
| const PackedSideBlock<KernelLhsFormat>& packed_lhs, |
| const PackedSideBlock<KernelRhsFormat>& packed_rhs, int depth, |
| std::int32_t result_offset, std::int32_t result_mult_int, |
| std::int32_t result_shift) { |
| ScopedProfilingLabel label("unpack"); |
| #ifdef GEMMLOWP_NEON |
| UnpackResultImplNEON(dst, src.Map(), packed_lhs, packed_rhs, depth, |
| result_offset, result_mult_int, result_shift); |
| #else |
| UnpackResultImpl(dst, src.Map(), packed_lhs, packed_rhs, depth, result_offset, |
| result_mult_int, result_shift); |
| #endif |
| } |
| |
| } // namespace gemmlowp |
| |
| #endif // GEMMLOWP_INTERNAL_UNPACK_H_ |