internal/unpack.h - platform/external/gemmlowp - Git at Google

 // Copyright 2015 Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // unpack.h: unpacking the result blocks computed by compute.h,
 // storing them into the destination matrix.

 #ifndef GEMMLOWP_INTERNAL_UNPACK_H_
 #define GEMMLOWP_INTERNAL_UNPACK_H_

 #include "internal/allocator.h"
 #include "internal/block_params.h"
 #include "internal/pack.h"

 #ifdef GEMMLOWP_NEON
 #include "internal/unpack_neon.h"
 #endif

 namespace gemmlowp {

 class PackedResultInt32 {
   Allocator* allocator_;
   Allocator::Handle matrix_handle_;
   const BlockParams& block_params_;

  public:
   PackedResultInt32(Allocator* _allocator, const BlockParams& _block_params)
       : allocator_(_allocator), block_params_(_block_params) {
     matrix_handle_ = allocator_->Reserve<std::int32_t>(block_params_.l2_rows *
                                                        block_params_.l2_cols);
   }

   ~PackedResultInt32() {}

   MatrixMap<std::int32_t, MapOrder::ColMajor> Map() {
     return MatrixMap<std::int32_t, MapOrder::ColMajor>(
         allocator_->GetPointer<std::int32_t>(matrix_handle_),
         block_params_.l2_rows, block_params_.l2_cols, block_params_.l2_rows);
   }

   MatrixMap<const std::int32_t, MapOrder::ColMajor> Map() const {
     return MatrixMap<const std::int32_t, MapOrder::ColMajor>(
         allocator_->GetPointer<const std::int32_t>(matrix_handle_),
         block_params_.l2_rows, block_params_.l2_cols, block_params_.l2_rows);
   }
 };

 template <typename ResultBlockType, typename PackedResult,
           typename KernelLhsFormat, typename KernelRhsFormat>
 void UnpackResultImpl(ResultBlockType* dst, const PackedResult& src,
                       const PackedSideBlock<KernelLhsFormat>& packed_lhs,
                       const PackedSideBlock<KernelRhsFormat>& packed_rhs,
                       int depth, std::int32_t result_offset,
                       std::int32_t result_mult_int, std::int32_t result_shift) {
   std::int32_t rank0update = packed_lhs.rank_one_update_multiplier() *
                              packed_rhs.rank_one_update_multiplier() * depth;
   // No top-level blocking in the depth dimension at the moment.
   // Too much loss of precision.
   for (int c = 0; c < dst->cols(); c++) {
     for (int r = 0; r < dst->rows(); r++) {
       std::int32_t q = *src.data(r, c);
       q += packed_lhs.rank_one_update()[r] + packed_rhs.rank_one_update()[c] +
            rank0update;
       q = ((q + result_offset) * result_mult_int + (1 << (result_shift - 1))) >>
           result_shift;
       (*dst)(r, c) = q > 255 ? 255 : q < 0 ? 0 : q;
     }
   }
 }

 template <typename ResultBlockType, typename PackedResult,
           typename KernelLhsFormat, typename KernelRhsFormat>
 void UnpackResult(ResultBlockType* dst, const PackedResult& src,
                   const PackedSideBlock<KernelLhsFormat>& packed_lhs,
                   const PackedSideBlock<KernelRhsFormat>& packed_rhs, int depth,
                   std::int32_t result_offset, std::int32_t result_mult_int,
                   std::int32_t result_shift) {
   ScopedProfilingLabel label("unpack");
 #ifdef GEMMLOWP_NEON
   UnpackResultImplNEON(dst, src.Map(), packed_lhs, packed_rhs, depth,
                        result_offset, result_mult_int, result_shift);
 #else
   UnpackResultImpl(dst, src.Map(), packed_lhs, packed_rhs, depth, result_offset,
                    result_mult_int, result_shift);
 #endif
 }

 }  // namespace gemmlowp

 #endif  // GEMMLOWP_INTERNAL_UNPACK_H_
	// Copyright 2015 Google Inc. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// unpack.h: unpacking the result blocks computed by compute.h,
	// storing them into the destination matrix.

	#ifndef GEMMLOWP_INTERNAL_UNPACK_H_
	#define GEMMLOWP_INTERNAL_UNPACK_H_

	#include "internal/allocator.h"
	#include "internal/block_params.h"
	#include "internal/pack.h"

	#ifdef GEMMLOWP_NEON
	#include "internal/unpack_neon.h"
	#endif

	namespace gemmlowp {

	class PackedResultInt32 {
	Allocator* allocator_;
	Allocator::Handle matrix_handle_;
	const BlockParams& block_params_;

	public:
	PackedResultInt32(Allocator* _allocator, const BlockParams& _block_params)
	: allocator_(_allocator), block_params_(_block_params) {
	matrix_handle_ = allocator_->Reserve<std::int32_t>(block_params_.l2_rows *
	block_params_.l2_cols);
	}

	~PackedResultInt32() {}

	MatrixMap<std::int32_t, MapOrder::ColMajor> Map() {
	return MatrixMap<std::int32_t, MapOrder::ColMajor>(
	allocator_->GetPointer<std::int32_t>(matrix_handle_),
	block_params_.l2_rows, block_params_.l2_cols, block_params_.l2_rows);
	}

	MatrixMap<const std::int32_t, MapOrder::ColMajor> Map() const {
	return MatrixMap<const std::int32_t, MapOrder::ColMajor>(
	allocator_->GetPointer<const std::int32_t>(matrix_handle_),
	block_params_.l2_rows, block_params_.l2_cols, block_params_.l2_rows);
	}
	};

	template <typename ResultBlockType, typename PackedResult,
	typename KernelLhsFormat, typename KernelRhsFormat>
	void UnpackResultImpl(ResultBlockType* dst, const PackedResult& src,
	const PackedSideBlock<KernelLhsFormat>& packed_lhs,
	const PackedSideBlock<KernelRhsFormat>& packed_rhs,
	int depth, std::int32_t result_offset,
	std::int32_t result_mult_int, std::int32_t result_shift) {
	std::int32_t rank0update = packed_lhs.rank_one_update_multiplier() *
	packed_rhs.rank_one_update_multiplier() * depth;
	// No top-level blocking in the depth dimension at the moment.
	// Too much loss of precision.
	for (int c = 0; c < dst->cols(); c++) {
	for (int r = 0; r < dst->rows(); r++) {
	std::int32_t q = *src.data(r, c);
	q += packed_lhs.rank_one_update()[r] + packed_rhs.rank_one_update()[c] +
	rank0update;
	q = ((q + result_offset) * result_mult_int + (1 << (result_shift - 1))) >>
	result_shift;
	(*dst)(r, c) = q > 255 ? 255 : q < 0 ? 0 : q;
	}
	}
	}

	template <typename ResultBlockType, typename PackedResult,
	typename KernelLhsFormat, typename KernelRhsFormat>
	void UnpackResult(ResultBlockType* dst, const PackedResult& src,
	const PackedSideBlock<KernelLhsFormat>& packed_lhs,
	const PackedSideBlock<KernelRhsFormat>& packed_rhs, int depth,
	std::int32_t result_offset, std::int32_t result_mult_int,
	std::int32_t result_shift) {
	ScopedProfilingLabel label("unpack");
	#ifdef GEMMLOWP_NEON
	UnpackResultImplNEON(dst, src.Map(), packed_lhs, packed_rhs, depth,
	result_offset, result_mult_int, result_shift);
	#else
	UnpackResultImpl(dst, src.Map(), packed_lhs, packed_rhs, depth, result_offset,
	result_mult_int, result_shift);
	#endif
	}

	} // namespace gemmlowp

	#endif // GEMMLOWP_INTERNAL_UNPACK_H_