common/cpu_operations/Multinomial.cpp - platform/packages/modules/NeuralNetworks - Git at Google

 /*
  * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #define LOG_TAG "Operations"

 #include "Multinomial.h"

 #include <algorithm>
 #include <limits>
 #include <vector>

 #include "CpuExecutor.h"
 #include "Tracing.h"

 #ifdef NN_INCLUDE_CPU_IMPLEMENTATION
 #include <tensorflow/lite/kernels/internal/tensor_utils.h>

 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunused-parameter"
 #pragma clang diagnostic ignored "-Winvalid-partial-specialization"
 #include <unsupported/Eigen/CXX11/Tensor>
 #pragma clang diagnostic pop

 #include "CpuOperationUtils.h"
 #include "guarded_philox_random.h"
 #include "philox_random.h"
 #include "simple_philox.h"
 #endif  // NN_INCLUDE_CPU_IMPLEMENTATION

 namespace android {
 namespace nn {

 namespace {

 template <typename T>
 inline T* GetBuffer(RunTimeOperandInfo* operand) {
     return reinterpret_cast<T*>(operand->buffer);
 }

 template <typename T>
 inline const T* GetBuffer(const RunTimeOperandInfo* operand) {
     return reinterpret_cast<const T*>(operand->buffer);
 }

 }  // namespace

 Multinomial::Multinomial(const Operation& operation, RunTimeOperandInfo* operands) {
     NNTRACE_TRANS("Multinomial::Multinomial");
     input_ = GetInput(operation, operands, kInputTensor);
     sample_count_ = getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
     random_seeds_ = GetInput(operation, operands, kRandomSeedsTensor);

     output_ = GetOutput(operation, operands, kOutputTensor);
 }

 bool Multinomial::Prepare(const Operation& operation, RunTimeOperandInfo* operands,
                           Shape* outputShape) {
     NNTRACE_TRANS("Multinomial::Prepare");
     NN_CHECK_EQ(NumInputsWithValues(operation, operands), 3);
     NN_CHECK_EQ(NumOutputs(operation), 1);

     const RunTimeOperandInfo* input = GetInput(operation, operands, Multinomial::kInputTensor);
     const Shape& inputShape = input->shape();

     const uint32_t batch_size = SizeOfDimension(input, 0);
     const uint32_t sample_count =
             getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));

     outputShape->type = OperandType::TENSOR_INT32;
     outputShape->dimensions = {batch_size, sample_count};
     outputShape->offset = inputShape.offset;
     outputShape->scale = inputShape.scale;

     return true;
 }

 bool Multinomial::Eval() {
     NNTRACE_COMP("Multinomial::Eval");
     switch (input_->type) {
         case OperandType::TENSOR_FLOAT16: {
             std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
             convertFloat16ToFloat32(GetBuffer<_Float16>(input_), &inputDataFloat32);
             EvalFloat32(inputDataFloat32.data());
             break;
         }
         case OperandType::TENSOR_FLOAT32: {
             EvalFloat32(GetBuffer<float>(input_));
             break;
         }
         default: {
             LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
             return false;
         }
     }
     return true;
 }

 void Multinomial::EvalFloat32(const float* inputData) {
     const uint32_t batch_size = SizeOfDimension(input_, 0);
     const uint32_t class_size = SizeOfDimension(input_, 1);

     tensorflow::GuardedPhiloxRandom random_generator;
     int32_t* seeds = GetBuffer<int32_t>(random_seeds_);
     random_generator.Init(seeds[0], seeds[1]);

     // PhiloxRandom produces results as 4 32-bit integers.
     int sample_count_aligned = (sample_count_ + 3) / 4 * 4;
     // The CPU operation uses 64-bit double values, so two results per sample.
     sample_count_aligned *= 2;
     auto random_generator_reserved =
             random_generator.ReserveRandomOutputs(batch_size * sample_count_aligned, 256);
     tensorflow::random::SimplePhilox simple_philox(&random_generator_reserved);

     for (uint64_t b = 0; b < batch_size; ++b) {
         const float* input_ptr_batch = inputData + b * class_size;
         float max = std::numeric_limits<float>::lowest();
         for (uint64_t j = 0; j < class_size; ++j) {
             if (Eigen::numext::isfinite(input_ptr_batch[j])) {
                 max = std::max(max, input_ptr_batch[j]);
             }
         }
         const double batch_max = static_cast<double>(max);
         double total = 0;
         std::vector<double> cdf;
         cdf.resize(class_size);
         for (uint64_t j = 0; j < class_size; ++j) {
             if (Eigen::numext::isfinite(static_cast<float>(input_ptr_batch[j]))) {
                 total += exp(static_cast<double>(input_ptr_batch[j]) - batch_max);
             }
             cdf[j] = total;
         }

         auto* output_ptr_batch = GetBuffer<int32_t>(output_) + b * sample_count_;
         for (uint64_t j = 0; j < static_cast<uint64_t>(sample_count_); ++j) {
             const double target = simple_philox.RandDouble() * total;
             auto found_iter = std::upper_bound(cdf.begin(), cdf.end(), target);
             output_ptr_batch[j] = std::distance(cdf.begin(), found_iter);
         }
     }
 }

 }  // namespace nn
 }  // namespace android
	/*
	* Copyright (C) 2018 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#define LOG_TAG "Operations"

	#include "Multinomial.h"

	#include <algorithm>
	#include <limits>
	#include <vector>

	#include "CpuExecutor.h"
	#include "Tracing.h"

	#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
	#include <tensorflow/lite/kernels/internal/tensor_utils.h>

	#pragma clang diagnostic push
	#pragma clang diagnostic ignored "-Wunused-parameter"
	#pragma clang diagnostic ignored "-Winvalid-partial-specialization"
	#include <unsupported/Eigen/CXX11/Tensor>
	#pragma clang diagnostic pop

	#include "CpuOperationUtils.h"
	#include "guarded_philox_random.h"
	#include "philox_random.h"
	#include "simple_philox.h"
	#endif // NN_INCLUDE_CPU_IMPLEMENTATION

	namespace android {
	namespace nn {

	namespace {

	template <typename T>
	inline T* GetBuffer(RunTimeOperandInfo* operand) {
	return reinterpret_cast<T*>(operand->buffer);
	}

	template <typename T>
	inline const T* GetBuffer(const RunTimeOperandInfo* operand) {
	return reinterpret_cast<const T*>(operand->buffer);
	}

	} // namespace

	Multinomial::Multinomial(const Operation& operation, RunTimeOperandInfo* operands) {
	NNTRACE_TRANS("Multinomial::Multinomial");
	input_ = GetInput(operation, operands, kInputTensor);
	sample_count_ = getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));
	random_seeds_ = GetInput(operation, operands, kRandomSeedsTensor);

	output_ = GetOutput(operation, operands, kOutputTensor);
	}

	bool Multinomial::Prepare(const Operation& operation, RunTimeOperandInfo* operands,
	Shape* outputShape) {
	NNTRACE_TRANS("Multinomial::Prepare");
	NN_CHECK_EQ(NumInputsWithValues(operation, operands), 3);
	NN_CHECK_EQ(NumOutputs(operation), 1);

	const RunTimeOperandInfo* input = GetInput(operation, operands, Multinomial::kInputTensor);
	const Shape& inputShape = input->shape();

	const uint32_t batch_size = SizeOfDimension(input, 0);
	const uint32_t sample_count =
	getScalarData<int>(*GetInput(operation, operands, kSampleCountParam));

	outputShape->type = OperandType::TENSOR_INT32;
	outputShape->dimensions = {batch_size, sample_count};
	outputShape->offset = inputShape.offset;
	outputShape->scale = inputShape.scale;

	return true;
	}

	bool Multinomial::Eval() {
	NNTRACE_COMP("Multinomial::Eval");
	switch (input_->type) {
	case OperandType::TENSOR_FLOAT16: {
	std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
	convertFloat16ToFloat32(GetBuffer<_Float16>(input_), &inputDataFloat32);
	EvalFloat32(inputDataFloat32.data());
	break;
	}
	case OperandType::TENSOR_FLOAT32: {
	EvalFloat32(GetBuffer<float>(input_));
	break;
	}
	default: {
	LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
	return false;
	}
	}
	return true;
	}

	void Multinomial::EvalFloat32(const float* inputData) {
	const uint32_t batch_size = SizeOfDimension(input_, 0);
	const uint32_t class_size = SizeOfDimension(input_, 1);

	tensorflow::GuardedPhiloxRandom random_generator;
	int32_t* seeds = GetBuffer<int32_t>(random_seeds_);
	random_generator.Init(seeds[0], seeds[1]);

	// PhiloxRandom produces results as 4 32-bit integers.
	int sample_count_aligned = (sample_count_ + 3) / 4 * 4;
	// The CPU operation uses 64-bit double values, so two results per sample.
	sample_count_aligned *= 2;
	auto random_generator_reserved =
	random_generator.ReserveRandomOutputs(batch_size * sample_count_aligned, 256);
	tensorflow::random::SimplePhilox simple_philox(&random_generator_reserved);

	for (uint64_t b = 0; b < batch_size; ++b) {
	const float* input_ptr_batch = inputData + b * class_size;
	float max = std::numeric_limits<float>::lowest();
	for (uint64_t j = 0; j < class_size; ++j) {
	if (Eigen::numext::isfinite(input_ptr_batch[j])) {
	max = std::max(max, input_ptr_batch[j]);
	}
	}
	const double batch_max = static_cast<double>(max);
	double total = 0;
	std::vector<double> cdf;
	cdf.resize(class_size);
	for (uint64_t j = 0; j < class_size; ++j) {
	if (Eigen::numext::isfinite(static_cast<float>(input_ptr_batch[j]))) {
	total += exp(static_cast<double>(input_ptr_batch[j]) - batch_max);
	}
	cdf[j] = total;
	}

	auto* output_ptr_batch = GetBuffer<int32_t>(output_) + b * sample_count_;
	for (uint64_t j = 0; j < static_cast<uint64_t>(sample_count_); ++j) {
	const double target = simple_philox.RandDouble() * total;
	auto found_iter = std::upper_bound(cdf.begin(), cdf.end(), target);
	output_ptr_batch[j] = std::distance(cdf.begin(), found_iter);
	}
	}
	}

	} // namespace nn
	} // namespace android