| |
| #ifndef CAFFE2_IMAGE_IMAGE_INPUT_OP_H_ |
| #define CAFFE2_IMAGE_IMAGE_INPUT_OP_H_ |
| |
| #include <opencv2/opencv.hpp> |
| |
| #include <algorithm> |
| #include <iostream> |
| |
| #include "c10/core/thread_pool.h" |
| #include <c10/util/irange.h> |
| #include "caffe2/core/common.h" |
| #include "caffe2/core/db.h" |
| #include "caffe2/image/transform_gpu.h" |
| #include "caffe2/operators/prefetch_op.h" |
| #include "caffe2/proto/caffe2_legacy.pb.h" |
| #include "caffe2/utils/cast.h" |
| #include "caffe2/utils/math.h" |
| |
| namespace caffe2 { |
| |
| class CUDAContext; |
| |
| template <class Context> |
| class ImageInputOp final : public PrefetchOperator<Context> { |
| // SINGLE_LABEL: single integer label for multi-class classification |
| // MULTI_LABEL_SPARSE: sparse active label indices for multi-label |
| // classification MULTI_LABEL_DENSE: dense label embedding vector for label |
| // embedding regression MULTI_LABEL_WEIGHTED_SPARSE: sparse active label |
| // indices with per-label weights for multi-label classification |
| // SINGLE_LABEL_WEIGHTED: single integer label for multi-class classification |
| // with weighted sampling EMBEDDING_LABEL: an array of floating numbers |
| // representing dense embedding. |
| // It is useful for model distillation |
| enum LABEL_TYPE { |
| SINGLE_LABEL = 0, |
| MULTI_LABEL_SPARSE = 1, |
| MULTI_LABEL_DENSE = 2, |
| MULTI_LABEL_WEIGHTED_SPARSE = 3, |
| SINGLE_LABEL_WEIGHTED = 4, |
| EMBEDDING_LABEL = 5, |
| }; |
| |
| // INCEPTION_STYLE: Random crop with size 8% - 100% image area and aspect |
| // ratio in [3/4, 4/3]. Reference: GoogleNet paper |
| enum SCALE_JITTER_TYPE { |
| NO_SCALE_JITTER = 0, |
| INCEPTION_STYLE = 1 |
| // TODO(zyan3): ResNet-style random scale jitter |
| }; |
| |
| public: |
| using OperatorBase::OutputSize; |
| using PrefetchOperator<Context>::context_; |
| using PrefetchOperator<Context>::prefetch_thread_; |
| explicit ImageInputOp(const OperatorDef& operator_def, Workspace* ws); |
| ~ImageInputOp() { |
| PrefetchOperator<Context>::Finalize(); |
| } |
| |
| bool Prefetch() override; |
| bool CopyPrefetched() override; |
| |
| private: |
| struct BoundingBox { |
| bool valid; |
| int ymin; |
| int xmin; |
| int height; |
| int width; |
| }; |
| |
| // Structure to store per-image information |
| // This can be modified by the DecodeAnd* so needs |
| // to be privatized per launch. |
| struct PerImageArg { BoundingBox bounding_params; }; |
| |
| bool GetImageAndLabelAndInfoFromDBValue( |
| const string& value, |
| cv::Mat* img, |
| PerImageArg& info, |
| int item_id, |
| std::mt19937* randgen); |
| void DecodeAndTransform( |
| const std::string& value, |
| float* image_data, |
| int item_id, |
| const int channels, |
| std::size_t thread_index); |
| void DecodeAndTransposeOnly( |
| const std::string& value, |
| uint8_t* image_data, |
| int item_id, |
| const int channels, |
| std::size_t thread_index); |
| bool ApplyTransformOnGPU( |
| const std::vector<std::int64_t>& dims, |
| const c10::Device& type); |
| |
| unique_ptr<db::DBReader> owned_reader_; |
| const db::DBReader* reader_; |
| Tensor prefetched_image_; |
| Tensor prefetched_label_; |
| vector<Tensor> prefetched_additional_outputs_; |
| Tensor prefetched_image_on_device_; |
| Tensor prefetched_label_on_device_; |
| vector<Tensor> prefetched_additional_outputs_on_device_; |
| // Default parameters for images |
| PerImageArg default_arg_; |
| int batch_size_; |
| LABEL_TYPE label_type_; |
| int num_labels_; |
| |
| bool color_; |
| bool color_jitter_; |
| float img_saturation_; |
| float img_brightness_; |
| float img_contrast_; |
| bool color_lighting_; |
| float color_lighting_std_; |
| std::vector<std::vector<float>> color_lighting_eigvecs_; |
| std::vector<float> color_lighting_eigvals_; |
| SCALE_JITTER_TYPE scale_jitter_type_; |
| int scale_; |
| // Minsize is similar to scale except that it will only |
| // force the image to scale up if it is too small. In other words, |
| // it ensures that both dimensions of the image are at least minsize_ |
| int minsize_; |
| bool warp_; |
| int crop_; |
| std::vector<float> mean_; |
| std::vector<float> std_; |
| Tensor mean_gpu_; |
| Tensor std_gpu_; |
| bool mirror_; |
| bool is_test_; |
| bool use_caffe_datum_; |
| bool gpu_transform_; |
| bool mean_std_copied_ = false; |
| |
| // thread pool for parse + decode |
| int num_decode_threads_; |
| int additional_inputs_offset_; |
| int additional_inputs_count_; |
| std::vector<int> additional_output_sizes_; |
| std::shared_ptr<TaskThreadPool> thread_pool_; |
| |
| // Output type for GPU transform path |
| TensorProto_DataType output_type_; |
| |
| // random minsize |
| vector<int> random_scale_; |
| bool random_scaling_; |
| |
| // Working variables |
| std::vector<std::mt19937> randgen_per_thread_; |
| |
| // number of exceptions produced by opencv while reading image data |
| std::atomic<long> num_decode_errors_in_batch_{0}; |
| // opencv exceptions tolerance |
| float max_decode_error_ratio_; |
| }; |
| |
| template <class Context> |
| ImageInputOp<Context>::ImageInputOp( |
| const OperatorDef& operator_def, |
| Workspace* ws) |
| : PrefetchOperator<Context>(operator_def, ws), |
| reader_(nullptr), |
| batch_size_( |
| OperatorBase::template GetSingleArgument<int>("batch_size", 0)), |
| label_type_(static_cast<LABEL_TYPE>( |
| OperatorBase::template GetSingleArgument<int>("label_type", 0))), |
| num_labels_( |
| OperatorBase::template GetSingleArgument<int>("num_labels", 0)), |
| color_(OperatorBase::template GetSingleArgument<int>("color", 1)), |
| color_jitter_( |
| OperatorBase::template GetSingleArgument<int>("color_jitter", 0)), |
| img_saturation_(OperatorBase::template GetSingleArgument<float>( |
| "img_saturation", |
| 0.4)), |
| img_brightness_(OperatorBase::template GetSingleArgument<float>( |
| "img_brightness", |
| 0.4)), |
| img_contrast_( |
| OperatorBase::template GetSingleArgument<float>("img_contrast", 0.4)), |
| color_lighting_( |
| OperatorBase::template GetSingleArgument<int>("color_lighting", 0)), |
| color_lighting_std_(OperatorBase::template GetSingleArgument<float>( |
| "color_lighting_std", |
| 0.1)), |
| scale_jitter_type_(static_cast<SCALE_JITTER_TYPE>( |
| OperatorBase::template GetSingleArgument<int>( |
| "scale_jitter_type", |
| 0))), |
| scale_(OperatorBase::template GetSingleArgument<int>("scale", -1)), |
| minsize_(OperatorBase::template GetSingleArgument<int>("minsize", -1)), |
| warp_(OperatorBase::template GetSingleArgument<int>("warp", 0)), |
| crop_(OperatorBase::template GetSingleArgument<int>("crop", -1)), |
| mirror_(OperatorBase::template GetSingleArgument<int>("mirror", 0)), |
| is_test_(OperatorBase::template GetSingleArgument<int>( |
| OpSchema::Arg_IsTest, |
| 0)), |
| use_caffe_datum_( |
| OperatorBase::template GetSingleArgument<int>("use_caffe_datum", 0)), |
| gpu_transform_(OperatorBase::template GetSingleArgument<int>( |
| "use_gpu_transform", |
| 0)), |
| num_decode_threads_( |
| OperatorBase::template GetSingleArgument<int>("decode_threads", 4)), |
| additional_output_sizes_( |
| OperatorBase::template GetRepeatedArgument<int>("output_sizes", {})), |
| thread_pool_(std::make_shared<TaskThreadPool>(num_decode_threads_)), |
| // output type only supported with CUDA and use_gpu_transform for now |
| output_type_( |
| cast::GetCastDataType(ArgumentHelper(operator_def), "output_type")), |
| random_scale_(OperatorBase::template GetRepeatedArgument<int>( |
| "random_scale", |
| {-1, -1})), |
| max_decode_error_ratio_(OperatorBase::template GetSingleArgument<float>( |
| "max_decode_error_ratio", |
| 1.0)) { |
| if ((random_scale_[0] == -1) || (random_scale_[1] == -1)) { |
| random_scaling_ = false; |
| } else { |
| random_scaling_ = true; |
| minsize_ = random_scale_[0]; |
| } |
| |
| mean_ = OperatorBase::template GetRepeatedArgument<float>( |
| "mean_per_channel", |
| {OperatorBase::template GetSingleArgument<float>("mean", 0.)}); |
| |
| std_ = OperatorBase::template GetRepeatedArgument<float>( |
| "std_per_channel", |
| {OperatorBase::template GetSingleArgument<float>("std", 1.)}); |
| |
| if (additional_output_sizes_.size() == 0) { |
| additional_output_sizes_ = std::vector<int>(OutputSize() - 2, 1); |
| } else { |
| CAFFE_ENFORCE( |
| additional_output_sizes_.size() == OutputSize() - 2, |
| "If the output sizes are specified, they must be specified for all " |
| "additional outputs"); |
| } |
| additional_inputs_count_ = OutputSize() - 2; |
| |
| default_arg_.bounding_params = { |
| false, |
| OperatorBase::template GetSingleArgument<int>("bounding_ymin", -1), |
| OperatorBase::template GetSingleArgument<int>("bounding_xmin", -1), |
| OperatorBase::template GetSingleArgument<int>("bounding_height", -1), |
| OperatorBase::template GetSingleArgument<int>("bounding_width", -1), |
| }; |
| |
| if (operator_def.input_size() == 0) { |
| LOG(ERROR) << "You are using an old ImageInputOp format that creates " |
| "a local db reader. Consider moving to the new style " |
| "that takes in a DBReader blob instead."; |
| string db_name = OperatorBase::template GetSingleArgument<string>("db", ""); |
| CAFFE_ENFORCE_GT(db_name.size(), 0, "Must specify a db name."); |
| owned_reader_.reset(new db::DBReader( |
| OperatorBase::template GetSingleArgument<string>("db_type", "leveldb"), |
| db_name)); |
| reader_ = owned_reader_.get(); |
| } |
| |
| // hard-coded PCA eigenvectors and eigenvalues, based on RBG channel order |
| color_lighting_eigvecs_.push_back( |
| std::vector<float>{-144.7125f, 183.396f, 102.2295f}); |
| color_lighting_eigvecs_.push_back( |
| std::vector<float>{-148.104f, -1.1475f, -207.57f}); |
| color_lighting_eigvecs_.push_back( |
| std::vector<float>{-148.818f, -177.174f, 107.1765f}); |
| |
| color_lighting_eigvals_ = std::vector<float>{0.2175f, 0.0188f, 0.0045f}; |
| |
| CAFFE_ENFORCE_GT(batch_size_, 0, "Batch size should be nonnegative."); |
| if (use_caffe_datum_) { |
| CAFFE_ENFORCE( |
| label_type_ == SINGLE_LABEL || label_type_ == SINGLE_LABEL_WEIGHTED, |
| "Caffe datum only supports single integer label"); |
| } |
| if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) { |
| CAFFE_ENFORCE_GT( |
| num_labels_, |
| 0, |
| "Number of labels must be set for using either sparse label indices or dense label embedding."); |
| } |
| if (label_type_ == MULTI_LABEL_WEIGHTED_SPARSE || |
| label_type_ == SINGLE_LABEL_WEIGHTED) { |
| additional_inputs_offset_ = 3; |
| } else { |
| additional_inputs_offset_ = 2; |
| } |
| CAFFE_ENFORCE( |
| (scale_ > 0) != (minsize_ > 0), |
| "Must provide one and only one of scaling or minsize"); |
| CAFFE_ENFORCE_GT(crop_, 0, "Must provide the cropping value."); |
| CAFFE_ENFORCE_GE( |
| scale_ > 0 ? scale_ : minsize_, |
| crop_, |
| "The scale/minsize value must be no smaller than the crop value."); |
| |
| CAFFE_ENFORCE_EQ( |
| mean_.size(), |
| std_.size(), |
| "The mean and std. dev vectors must be of the same size."); |
| CAFFE_ENFORCE( |
| mean_.size() == 1 || mean_.size() == 3, |
| "The mean and std. dev vectors must be of size 1 or 3"); |
| CAFFE_ENFORCE( |
| !use_caffe_datum_ || OutputSize() == 2, |
| "There can only be 2 outputs if the Caffe datum format is used"); |
| |
| CAFFE_ENFORCE( |
| random_scale_.size() == 2, "Must provide [scale_min, scale_max]"); |
| CAFFE_ENFORCE_GE( |
| random_scale_[1], |
| random_scale_[0], |
| "random scale must provide a range [min, max]"); |
| |
| if (default_arg_.bounding_params.ymin < 0 || |
| default_arg_.bounding_params.xmin < 0 || |
| default_arg_.bounding_params.height < 0 || |
| default_arg_.bounding_params.width < 0) { |
| default_arg_.bounding_params.valid = false; |
| } else { |
| default_arg_.bounding_params.valid = true; |
| } |
| |
| if (mean_.size() == 1) { |
| // We are going to extend to 3 using the first value |
| mean_.resize(3, mean_[0]); |
| std_.resize(3, std_[0]); |
| } |
| |
| LOG(INFO) << "Creating an image input op with the following setting: "; |
| LOG(INFO) << " Using " << num_decode_threads_ << " CPU threads;"; |
| if (gpu_transform_) { |
| LOG(INFO) << " Performing transformation on GPU"; |
| } |
| LOG(INFO) << " Outputting in batches of " << batch_size_ << " images;"; |
| LOG(INFO) << " Treating input image as " |
| << (color_ ? "color " : "grayscale ") << "image;"; |
| if (default_arg_.bounding_params.valid) { |
| LOG(INFO) << " Applying a default bounding box of Y [" |
| << default_arg_.bounding_params.ymin << "; " |
| << default_arg_.bounding_params.ymin + |
| default_arg_.bounding_params.height |
| << ") x X [" << default_arg_.bounding_params.xmin << "; " |
| << default_arg_.bounding_params.xmin + |
| default_arg_.bounding_params.width |
| << ")"; |
| } |
| if (scale_ > 0 && !random_scaling_) { |
| LOG(INFO) << " Scaling image to " << scale_ |
| << (warp_ ? " with " : " without ") << "warping;"; |
| } else { |
| if (random_scaling_) { |
| // randomly set min_size_ for each image |
| LOG(INFO) << " Randomly scaling shortest side between " |
| << random_scale_[0] << " and " << random_scale_[1]; |
| } else { |
| // Here, minsize_ > 0 |
| LOG(INFO) << " Ensuring minimum image size of " << minsize_ |
| << (warp_ ? " with " : " without ") << "warping;"; |
| } |
| } |
| LOG(INFO) << " " << (is_test_ ? "Central" : "Random") |
| << " cropping image to " << crop_ |
| << (mirror_ ? " with " : " without ") << "random mirroring;"; |
| LOG(INFO) << "Label Type: " << label_type_; |
| LOG(INFO) << "Num Labels: " << num_labels_; |
| |
| auto mit = mean_.begin(); |
| auto sit = std_.begin(); |
| |
| for (int i = 0; mit != mean_.end() && sit != std_.end(); ++mit, ++sit, ++i) { |
| LOG(INFO) << " Default [Channel " << i << "] Subtract mean " << *mit |
| << " and divide by std " << *sit << "."; |
| // We actually will use the inverse of std, so inverse it here |
| *sit = 1.f / *sit; |
| } |
| LOG(INFO) << " Outputting images as " |
| << OperatorBase::template GetSingleArgument<string>( |
| "output_type", "unknown") |
| << "."; |
| |
| std::mt19937 meta_randgen(time(nullptr)); |
| for (const auto i : c10::irange(num_decode_threads_)) { |
| randgen_per_thread_.emplace_back(meta_randgen()); |
| } |
| ReinitializeTensor( |
| &prefetched_image_, |
| {int64_t(batch_size_), |
| int64_t(crop_), |
| int64_t(crop_), |
| int64_t(color_ ? 3 : 1)}, |
| at::dtype<uint8_t>().device(CPU)); |
| std::vector<int64_t> sizes; |
| if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) { |
| sizes = std::vector<int64_t>{int64_t(batch_size_), int64_t(num_labels_)}; |
| } else { |
| sizes = std::vector<int64_t>{batch_size_}; |
| } |
| // data type for prefetched_label_ is actually not known here.. |
| ReinitializeTensor(&prefetched_label_, sizes, at::dtype<int>().device(CPU)); |
| |
| for (const auto i : c10::irange(additional_output_sizes_.size())) { |
| prefetched_additional_outputs_on_device_.emplace_back(); |
| prefetched_additional_outputs_.emplace_back(); |
| } |
| } |
| |
| // Inception-stype scale jittering |
| template <class Context> |
| bool RandomSizedCropping(cv::Mat* img, const int crop, std::mt19937* randgen) { |
| cv::Mat scaled_img; |
| bool inception_scale_jitter = false; |
| int im_height = img->rows, im_width = img->cols; |
| int area = im_height * im_width; |
| std::uniform_real_distribution<> area_dis(0.08, 1.0); |
| std::uniform_real_distribution<> aspect_ratio_dis(3.0 / 4.0, 4.0 / 3.0); |
| |
| cv::Mat cropping; |
| for (const auto i : c10::irange(10)) { |
| int target_area = int(ceil(area_dis(*randgen) * area)); |
| float aspect_ratio = aspect_ratio_dis(*randgen); |
| int nh = floor(std::sqrt(((float)target_area / aspect_ratio))); |
| int nw = floor(std::sqrt(((float)target_area * aspect_ratio))); |
| if (nh >= 1 && nh <= im_height && nw >= 1 && nw <= im_width) { |
| int height_offset = |
| std::uniform_int_distribution<>(0, im_height - nh)(*randgen); |
| int width_offset = |
| std::uniform_int_distribution<>(0, im_width - nw)(*randgen); |
| cv::Rect ROI(width_offset, height_offset, nw, nh); |
| cropping = (*img)(ROI); |
| cv::resize( |
| cropping, scaled_img, cv::Size(crop, crop), 0, 0, cv::INTER_AREA); |
| *img = scaled_img; |
| inception_scale_jitter = true; |
| break; |
| } |
| } |
| return inception_scale_jitter; |
| } |
| |
| template <class Context> |
| bool ImageInputOp<Context>::GetImageAndLabelAndInfoFromDBValue( |
| const string& value, |
| cv::Mat* img, |
| PerImageArg& info, |
| int item_id, |
| std::mt19937* randgen) { |
| // |
| // recommend using --caffe2_use_fatal_for_enforce=1 when using ImageInputOp |
| // as this function runs on a worker thread and the exceptions from |
| // CAFFE_ENFORCE are silently dropped by the thread worker functions |
| // |
| cv::Mat src; |
| |
| // Use the default information for images |
| info = default_arg_; |
| if (use_caffe_datum_) { |
| // The input is a caffe datum format. |
| CaffeDatum datum; |
| CAFFE_ENFORCE(datum.ParseFromString(value)); |
| |
| prefetched_label_.mutable_data<int>()[item_id] = datum.label(); |
| if (datum.encoded()) { |
| // encoded image in datum. |
| // count the number of exceptions from opencv imdecode |
| try { |
| src = cv::imdecode( |
| cv::Mat( |
| 1, |
| datum.data().size(), |
| CV_8UC1, |
| const_cast<char*>(datum.data().data())), |
| color_ ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE); |
| if (src.rows == 0 || src.cols == 0) { |
| num_decode_errors_in_batch_++; |
| src = cv::Mat::zeros(cv::Size(224, 224), CV_8UC3); |
| } |
| } catch (cv::Exception& e) { |
| num_decode_errors_in_batch_++; |
| src = cv::Mat::zeros(cv::Size(224, 224), CV_8UC3); |
| } |
| } else { |
| // Raw image in datum. |
| CAFFE_ENFORCE(datum.channels() == 3 || datum.channels() == 1); |
| |
| int src_c = datum.channels(); |
| src.create( |
| datum.height(), datum.width(), (src_c == 3) ? CV_8UC3 : CV_8UC1); |
| |
| if (src_c == 1) { |
| memcpy(src.ptr<uchar>(0), datum.data().data(), datum.data().size()); |
| } else { |
| // Datum stores things in CHW order, let's do HWC for images to make |
| // things more consistent with conventional image storage. |
| for (const auto c : c10::irange(3)) { |
| const char* datum_buffer = |
| datum.data().data() + datum.height() * datum.width() * c; |
| uchar* ptr = src.ptr<uchar>(0) + c; |
| for (const auto h : c10::irange(datum.height())) { |
| for (const auto w : c10::irange(datum.width())) { |
| *ptr = *(datum_buffer++); |
| ptr += 3; |
| } |
| } |
| } |
| } |
| } |
| } else { |
| // The input is a caffe2 format. |
| TensorProtos protos; |
| CAFFE_ENFORCE(protos.ParseFromString(value)); |
| const TensorProto& image_proto = protos.protos(0); |
| const TensorProto& label_proto = protos.protos(1); |
| // add handle protos |
| vector<TensorProto> additional_output_protos; |
| int start = additional_inputs_offset_; |
| int end = start + additional_inputs_count_; |
| for (const auto i : c10::irange(start, end)) { |
| additional_output_protos.push_back(protos.protos(i)); |
| } |
| |
| if (protos.protos_size() == end + 1) { |
| // We have bounding box information |
| const TensorProto& bounding_proto = protos.protos(end); |
| TORCH_DCHECK_EQ(bounding_proto.data_type(), TensorProto::INT32); |
| TORCH_DCHECK_EQ(bounding_proto.int32_data_size(), 4); |
| info.bounding_params.valid = true; |
| info.bounding_params.ymin = bounding_proto.int32_data(0); |
| info.bounding_params.xmin = bounding_proto.int32_data(1); |
| info.bounding_params.height = bounding_proto.int32_data(2); |
| info.bounding_params.width = bounding_proto.int32_data(3); |
| } |
| |
| if (image_proto.data_type() == TensorProto::STRING) { |
| // encoded image string. |
| TORCH_DCHECK_EQ(image_proto.string_data_size(), 1); |
| const string& encoded_image_str = image_proto.string_data(0); |
| int encoded_size = encoded_image_str.size(); |
| // We use a cv::Mat to wrap the encoded str so we do not need a copy. |
| // count the number of exceptions from opencv imdecode |
| try { |
| src = cv::imdecode( |
| cv::Mat( |
| 1, |
| &encoded_size, |
| CV_8UC1, |
| const_cast<char*>(encoded_image_str.data())), |
| color_ ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE); |
| if (src.rows == 0 || src.cols == 0) { |
| num_decode_errors_in_batch_++; |
| src = cv::Mat::zeros(cv::Size(224, 224), CV_8UC3); |
| } |
| } catch (cv::Exception& e) { |
| num_decode_errors_in_batch_++; |
| src = cv::Mat::zeros(cv::Size(224, 224), CV_8UC3); |
| } |
| } else if (image_proto.data_type() == TensorProto::BYTE) { |
| // raw image content. |
| int src_c = (image_proto.dims_size() == 3) ? image_proto.dims(2) : 1; |
| CAFFE_ENFORCE(src_c == 3 || src_c == 1); |
| |
| src.create( |
| image_proto.dims(0), |
| image_proto.dims(1), |
| (src_c == 3) ? CV_8UC3 : CV_8UC1); |
| memcpy( |
| src.ptr<uchar>(0), |
| image_proto.byte_data().data(), |
| image_proto.byte_data().size()); |
| } else { |
| LOG(FATAL) << "Unknown image data type."; |
| } |
| |
| // TODO: if image decoding was unsuccessful, set label to 0 |
| if (label_proto.data_type() == TensorProto::FLOAT) { |
| if (label_type_ == SINGLE_LABEL || label_type_ == SINGLE_LABEL_WEIGHTED) { |
| TORCH_DCHECK_EQ(label_proto.float_data_size(), 1); |
| prefetched_label_.mutable_data<float>()[item_id] = |
| label_proto.float_data(0); |
| } else if (label_type_ == MULTI_LABEL_SPARSE) { |
| float* label_data = |
| prefetched_label_.mutable_data<float>() + item_id * num_labels_; |
| memset(label_data, 0, sizeof(float) * num_labels_); |
| for (const auto i : c10::irange(label_proto.float_data_size())) { |
| label_data[(int)label_proto.float_data(i)] = 1.0; |
| } |
| } else if (label_type_ == MULTI_LABEL_WEIGHTED_SPARSE) { |
| const TensorProto& weight_proto = protos.protos(2); |
| float* label_data = |
| prefetched_label_.mutable_data<float>() + item_id * num_labels_; |
| memset(label_data, 0, sizeof(float) * num_labels_); |
| for (const auto i : c10::irange(label_proto.float_data_size())) { |
| label_data[(int)label_proto.float_data(i)] = |
| weight_proto.float_data(i); |
| } |
| } else if ( |
| label_type_ == MULTI_LABEL_DENSE || label_type_ == EMBEDDING_LABEL) { |
| CAFFE_ENFORCE(label_proto.float_data_size() == num_labels_); |
| float* label_data = |
| prefetched_label_.mutable_data<float>() + item_id * num_labels_; |
| for (const auto i : c10::irange(label_proto.float_data_size())) { |
| label_data[i] = label_proto.float_data(i); |
| } |
| } else { |
| LOG(ERROR) << "Unknown label type:" << label_type_; |
| } |
| } else if (label_proto.data_type() == TensorProto::INT32) { |
| if (label_type_ == SINGLE_LABEL || label_type_ == SINGLE_LABEL_WEIGHTED) { |
| TORCH_DCHECK_EQ(label_proto.int32_data_size(), 1); |
| prefetched_label_.mutable_data<int>()[item_id] = |
| label_proto.int32_data(0); |
| } else if (label_type_ == MULTI_LABEL_SPARSE) { |
| int* label_data = |
| prefetched_label_.mutable_data<int>() + item_id * num_labels_; |
| memset(label_data, 0, sizeof(int) * num_labels_); |
| for (const auto i : c10::irange(label_proto.int32_data_size())) { |
| label_data[label_proto.int32_data(i)] = 1; |
| } |
| } else if (label_type_ == MULTI_LABEL_WEIGHTED_SPARSE) { |
| const TensorProto& weight_proto = protos.protos(2); |
| float* label_data = |
| prefetched_label_.mutable_data<float>() + item_id * num_labels_; |
| memset(label_data, 0, sizeof(float) * num_labels_); |
| for (const auto i : c10::irange(label_proto.int32_data_size())) { |
| label_data[label_proto.int32_data(i)] = weight_proto.float_data(i); |
| } |
| } else if ( |
| label_type_ == MULTI_LABEL_DENSE || label_type_ == EMBEDDING_LABEL) { |
| CAFFE_ENFORCE(label_proto.int32_data_size() == num_labels_); |
| int* label_data = |
| prefetched_label_.mutable_data<int>() + item_id * num_labels_; |
| for (const auto i : c10::irange(label_proto.int32_data_size())) { |
| label_data[i] = label_proto.int32_data(i); |
| } |
| } else { |
| LOG(ERROR) << "Unknown label type:" << label_type_; |
| } |
| } else { |
| LOG(FATAL) << "Unsupported label data type."; |
| } |
| |
| for (const auto i : c10::irange(additional_output_protos.size())) { |
| auto additional_output_proto = additional_output_protos[i]; |
| if (additional_output_proto.data_type() == TensorProto::FLOAT) { |
| float* additional_output = |
| prefetched_additional_outputs_[i].template mutable_data<float>() + |
| item_id * additional_output_proto.float_data_size(); |
| |
| for (const auto j : c10::irange(additional_output_proto.float_data_size())) { |
| additional_output[j] = additional_output_proto.float_data(j); |
| } |
| } else if (additional_output_proto.data_type() == TensorProto::INT32) { |
| int* additional_output = |
| prefetched_additional_outputs_[i].template mutable_data<int>() + |
| item_id * additional_output_proto.int32_data_size(); |
| |
| for (const auto j : c10::irange(additional_output_proto.int32_data_size())) { |
| additional_output[j] = additional_output_proto.int32_data(j); |
| } |
| } else if (additional_output_proto.data_type() == TensorProto::INT64) { |
| int64_t* additional_output = |
| prefetched_additional_outputs_[i].template mutable_data<int64_t>() + |
| item_id * additional_output_proto.int64_data_size(); |
| |
| for (const auto j : c10::irange(additional_output_proto.int64_data_size())) { |
| additional_output[j] = additional_output_proto.int64_data(j); |
| } |
| } else if (additional_output_proto.data_type() == TensorProto::UINT8) { |
| uint8_t* additional_output = |
| prefetched_additional_outputs_[i].template mutable_data<uint8_t>() + |
| item_id * additional_output_proto.int32_data_size(); |
| |
| for (const auto j : c10::irange(additional_output_proto.int32_data_size())) { |
| additional_output[j] = |
| static_cast<uint8_t>(additional_output_proto.int32_data(j)); |
| } |
| } else { |
| LOG(FATAL) << "Unsupported output type."; |
| } |
| } |
| } |
| |
| // |
| // convert source to the color format requested from Op |
| // |
| int out_c = color_ ? 3 : 1; |
| if (out_c == src.channels()) { |
| *img = src; |
| } else { |
| cv::cvtColor( |
| src, *img, (out_c == 1) ? cv::COLOR_BGR2GRAY : cv::COLOR_GRAY2BGR); |
| } |
| |
| // Note(Yangqing): I believe that the mat should be created continuous. |
| CAFFE_ENFORCE(img->isContinuous()); |
| |
| // Sanity check now that we decoded everything |
| |
| // Ensure that the bounding box is legit |
| if (info.bounding_params.valid && |
| (src.rows < info.bounding_params.ymin + info.bounding_params.height || |
| src.cols < info.bounding_params.xmin + info.bounding_params.width)) { |
| info.bounding_params.valid = false; |
| } |
| |
| // Apply the bounding box if requested |
| if (info.bounding_params.valid) { |
| // If we reach here, we know the parameters are sane |
| cv::Rect bounding_box( |
| info.bounding_params.xmin, |
| info.bounding_params.ymin, |
| info.bounding_params.width, |
| info.bounding_params.height); |
| *img = (*img)(bounding_box); |
| |
| /* |
| LOG(INFO) << "Did bounding with ymin:" |
| << info.bounding_params.ymin << " xmin:" << |
| info.bounding_params.xmin |
| << " height:" << info.bounding_params.height |
| << " width:" << info.bounding_params.width << "\n"; |
| LOG(INFO) << "Bounded matrix: " << img; |
| */ |
| } else { |
| // LOG(INFO) << "No bounding\n"; |
| } |
| |
| cv::Mat scaled_img; |
| bool inception_scale_jitter = false; |
| if (scale_jitter_type_ == INCEPTION_STYLE) { |
| if (!is_test_) { |
| // Inception-stype scale jittering is only used for training |
| inception_scale_jitter = |
| RandomSizedCropping<Context>(img, crop_, randgen); |
| // if a random crop is still not found, do simple random cropping later |
| } |
| } |
| |
| if ((scale_jitter_type_ == NO_SCALE_JITTER) || |
| (scale_jitter_type_ == INCEPTION_STYLE && !inception_scale_jitter)) { |
| int scaled_width, scaled_height; |
| int scale_to_use = scale_ > 0 ? scale_ : minsize_; |
| |
| // set the random minsize |
| if (random_scaling_) { |
| scale_to_use = std::uniform_int_distribution<>( |
| random_scale_[0], random_scale_[1])(*randgen); |
| } |
| |
| if (warp_) { |
| scaled_width = scale_to_use; |
| scaled_height = scale_to_use; |
| } else if (img->rows > img->cols) { |
| scaled_width = scale_to_use; |
| scaled_height = static_cast<float>(img->rows) * scale_to_use / img->cols; |
| } else { |
| scaled_height = scale_to_use; |
| scaled_width = static_cast<float>(img->cols) * scale_to_use / img->rows; |
| } |
| if ((scale_ > 0 && |
| (scaled_height != img->rows || scaled_width != img->cols)) || |
| (scaled_height > img->rows || scaled_width > img->cols)) { |
| // We rescale in all cases if we are using scale_ |
| // but only to make the image bigger if using minsize_ |
| /* |
| LOG(INFO) << "Scaling to " << scaled_width << " x " << scaled_height |
| << " From " << img->cols << " x " << img->rows; |
| */ |
| cv::resize( |
| *img, |
| scaled_img, |
| cv::Size(scaled_width, scaled_height), |
| 0, |
| 0, |
| cv::INTER_AREA); |
| *img = scaled_img; |
| } |
| } |
| |
| // TODO(Yangqing): return false if any error happens. |
| return true; |
| } |
| |
| // assume HWC order and color channels BGR |
| template <class Context> |
| void Saturation( |
| float* img, |
| const int img_size, |
| const float alpha_rand, |
| std::mt19937* randgen) { |
| float alpha = 1.0f + |
| std::uniform_real_distribution<float>(-alpha_rand, alpha_rand)(*randgen); |
| // BGR to Gray scale image: R -> 0.299, G -> 0.587, B -> 0.114 |
| int p = 0; |
| for (const auto h : c10::irange(img_size)) { |
| for (const auto w : c10::irange(img_size)) { |
| float gray_color = img[3 * p] * 0.114f + img[3 * p + 1] * 0.587f + |
| img[3 * p + 2] * 0.299f; |
| for (const auto c : c10::irange(3)) { |
| img[3 * p + c] = img[3 * p + c] * alpha + gray_color * (1.0f - alpha); |
| } |
| p++; |
| } |
| } |
| } |
| |
| // assume HWC order and color channels BGR |
| template <class Context> |
| void Brightness( |
| float* img, |
| const int img_size, |
| const float alpha_rand, |
| std::mt19937* randgen) { |
| float alpha = 1.0f + |
| std::uniform_real_distribution<float>(-alpha_rand, alpha_rand)(*randgen); |
| int p = 0; |
| for (const auto h : c10::irange(img_size)) { |
| for (const auto w : c10::irange(img_size)) { |
| for (const auto c : c10::irange(3)) { |
| img[p++] *= alpha; |
| } |
| } |
| } |
| } |
| |
| // assume HWC order and color channels BGR |
| template <class Context> |
| void Contrast( |
| float* img, |
| const int img_size, |
| const float alpha_rand, |
| std::mt19937* randgen) { |
| float gray_mean = 0; |
| int p = 0; |
| for (const auto h : c10::irange(img_size)) { |
| for (const auto w : c10::irange(img_size)) { |
| // BGR to Gray scale image: R -> 0.299, G -> 0.587, B -> 0.114 |
| gray_mean += img[3 * p] * 0.114f + img[3 * p + 1] * 0.587f + |
| img[3 * p + 2] * 0.299f; |
| p++; |
| } |
| } |
| gray_mean /= (img_size * img_size); |
| |
| float alpha = 1.0f + |
| std::uniform_real_distribution<float>(-alpha_rand, alpha_rand)(*randgen); |
| p = 0; |
| for (const auto h : c10::irange(img_size)) { |
| for (const auto w : c10::irange(img_size)) { |
| for (const auto c : c10::irange(3)) { |
| img[p] = img[p] * alpha + gray_mean * (1.0f - alpha); |
| p++; |
| } |
| } |
| } |
| } |
| |
| // assume HWC order and color channels BGR |
| template <class Context> |
| void ColorJitter( |
| float* img, |
| const int img_size, |
| const float saturation, |
| const float brightness, |
| const float contrast, |
| std::mt19937* randgen) { |
| std::srand(unsigned(std::time(0))); |
| std::vector<int> jitter_order{0, 1, 2}; |
| // obtain a time-based seed: |
| unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); |
| std::shuffle( |
| jitter_order.begin(), |
| jitter_order.end(), |
| std::default_random_engine(seed)); |
| |
| for (const auto i : c10::irange(3)) { |
| if (jitter_order[i] == 0) { |
| Saturation<Context>(img, img_size, saturation, randgen); |
| } else if (jitter_order[i] == 1) { |
| Brightness<Context>(img, img_size, brightness, randgen); |
| } else { |
| Contrast<Context>(img, img_size, contrast, randgen); |
| } |
| } |
| } |
| |
| // assume HWC order and color channels BGR |
| template <class Context> |
| void ColorLighting( |
| float* img, |
| const int img_size, |
| const float alpha_std, |
| const std::vector<std::vector<float>>& eigvecs, |
| const std::vector<float>& eigvals, |
| std::mt19937* randgen) { |
| std::normal_distribution<float> d(0, alpha_std); |
| std::vector<float> alphas(3); |
| for (const auto i : c10::irange(3)) { |
| alphas[i] = d(*randgen); |
| } |
| |
| std::vector<float> delta_rgb(3, 0.0); |
| for (const auto i : c10::irange(3)) { |
| for (const auto j : c10::irange(3)) { |
| delta_rgb[i] += eigvecs[i][j] * eigvals[j] * alphas[j]; |
| } |
| } |
| |
| int p = 0; |
| for (const auto h : c10::irange(img_size)) { |
| for (const auto w : c10::irange(img_size)) { |
| for (const auto c : c10::irange(3)) { |
| img[p++] += delta_rgb[2 - c]; |
| } |
| } |
| } |
| } |
| |
| // assume HWC order and color channels BGR |
| // mean subtraction and scaling. |
| template <class Context> |
| void ColorNormalization( |
| float* img, |
| const int img_size, |
| const int channels, |
| const std::vector<float>& mean, |
| const std::vector<float>& std) { |
| int p = 0; |
| for (const auto h : c10::irange(img_size)) { |
| for (const auto w : c10::irange(img_size)) { |
| for (const auto c : c10::irange(channels)) { |
| img[p] = (img[p] - mean[c]) * std[c]; |
| p++; |
| } |
| } |
| } |
| } |
| |
| // Factored out image transformation |
| template <class Context> |
| void TransformImage( |
| const cv::Mat& scaled_img, |
| const int channels, |
| float* image_data, |
| const bool color_jitter, |
| const float saturation, |
| const float brightness, |
| const float contrast, |
| const bool color_lighting, |
| const float color_lighting_std, |
| const std::vector<std::vector<float>>& color_lighting_eigvecs, |
| const std::vector<float>& color_lighting_eigvals, |
| const int crop, |
| const bool mirror, |
| const std::vector<float>& mean, |
| const std::vector<float>& std, |
| std::mt19937* randgen, |
| std::bernoulli_distribution* mirror_this_image, |
| bool is_test = false) { |
| CAFFE_ENFORCE_GE( |
| scaled_img.rows, crop, "Image height must be bigger than crop."); |
| CAFFE_ENFORCE_GE( |
| scaled_img.cols, crop, "Image width must be bigger than crop."); |
| |
| // find the cropped region, and copy it to the destination matrix |
| int width_offset, height_offset; |
| if (is_test) { |
| width_offset = (scaled_img.cols - crop) / 2; |
| height_offset = (scaled_img.rows - crop) / 2; |
| } else { |
| width_offset = |
| std::uniform_int_distribution<>(0, scaled_img.cols - crop)(*randgen); |
| height_offset = |
| std::uniform_int_distribution<>(0, scaled_img.rows - crop)(*randgen); |
| } |
| |
| float* image_data_ptr = image_data; |
| if (!is_test && mirror && (*mirror_this_image)(*randgen)) { |
| // Copy mirrored image. |
| for (int h = height_offset; h < height_offset + crop; ++h) { |
| for (int w = width_offset + crop - 1; w >= width_offset; --w) { |
| const uint8_t* cv_data = scaled_img.ptr(h) + w * channels; |
| for (const auto c : c10::irange(channels)) { |
| *(image_data_ptr++) = static_cast<float>(cv_data[c]); |
| } |
| } |
| } |
| } else { |
| // Copy normally. |
| for (int h = height_offset; h < height_offset + crop; ++h) { |
| for (int w = width_offset; w < width_offset + crop; ++w) { |
| const uint8_t* cv_data = scaled_img.ptr(h) + w * channels; |
| for (const auto c : c10::irange(channels)) { |
| *(image_data_ptr++) = static_cast<float>(cv_data[c]); |
| } |
| } |
| } |
| } |
| |
| if (color_jitter && channels == 3 && !is_test) { |
| ColorJitter<Context>( |
| image_data, crop, saturation, brightness, contrast, randgen); |
| } |
| if (color_lighting && channels == 3 && !is_test) { |
| ColorLighting<Context>( |
| image_data, |
| crop, |
| color_lighting_std, |
| color_lighting_eigvecs, |
| color_lighting_eigvals, |
| randgen); |
| } |
| |
| // Color normalization |
| // Mean subtraction and scaling. |
| ColorNormalization<Context>(image_data, crop, channels, mean, std); |
| } |
| |
| // Only crop / transpose the image |
| // leave in uint8_t dataType |
| template <class Context> |
| void CropTransposeImage( |
| const cv::Mat& scaled_img, |
| const int channels, |
| uint8_t* cropped_data, |
| const int crop, |
| const bool mirror, |
| std::mt19937* randgen, |
| std::bernoulli_distribution* mirror_this_image, |
| bool is_test = false) { |
| CAFFE_ENFORCE_GE( |
| scaled_img.rows, crop, "Image height must be bigger than crop."); |
| CAFFE_ENFORCE_GE( |
| scaled_img.cols, crop, "Image width must be bigger than crop."); |
| |
| // find the cropped region, and copy it to the destination matrix |
| int width_offset, height_offset; |
| if (is_test) { |
| width_offset = (scaled_img.cols - crop) / 2; |
| height_offset = (scaled_img.rows - crop) / 2; |
| } else { |
| width_offset = |
| std::uniform_int_distribution<>(0, scaled_img.cols - crop)(*randgen); |
| height_offset = |
| std::uniform_int_distribution<>(0, scaled_img.rows - crop)(*randgen); |
| } |
| |
| if (mirror && (*mirror_this_image)(*randgen)) { |
| // Copy mirrored image. |
| for (int h = height_offset; h < height_offset + crop; ++h) { |
| for (int w = width_offset + crop - 1; w >= width_offset; --w) { |
| const uint8_t* cv_data = scaled_img.ptr(h) + w * channels; |
| for (const auto c : c10::irange(channels)) { |
| *(cropped_data++) = cv_data[c]; |
| } |
| } |
| } |
| } else { |
| // Copy normally. |
| for (int h = height_offset; h < height_offset + crop; ++h) { |
| for (int w = width_offset; w < width_offset + crop; ++w) { |
| const uint8_t* cv_data = scaled_img.ptr(h) + w * channels; |
| for (const auto c : c10::irange(channels)) { |
| *(cropped_data++) = cv_data[c]; |
| } |
| } |
| } |
| } |
| } |
| |
| // Parse datum, decode image, perform transform |
| // Intended as entry point for binding to thread pool |
| template <class Context> |
| void ImageInputOp<Context>::DecodeAndTransform( |
| const std::string& value, |
| float* image_data, |
| int item_id, |
| const int channels, |
| std::size_t thread_index) { |
| CAFFE_ENFORCE((int)thread_index < num_decode_threads_); |
| |
| std::bernoulli_distribution mirror_this_image(0.5f); |
| std::mt19937* randgen = &(randgen_per_thread_[thread_index]); |
| |
| cv::Mat img; |
| // Decode the image |
| PerImageArg info; |
| CHECK( |
| GetImageAndLabelAndInfoFromDBValue(value, &img, info, item_id, randgen)); |
| // Factor out the image transformation |
| TransformImage<Context>( |
| img, |
| channels, |
| image_data, |
| color_jitter_, |
| img_saturation_, |
| img_brightness_, |
| img_contrast_, |
| color_lighting_, |
| color_lighting_std_, |
| color_lighting_eigvecs_, |
| color_lighting_eigvals_, |
| crop_, |
| mirror_, |
| mean_, |
| std_, |
| randgen, |
| &mirror_this_image, |
| is_test_); |
| } |
| |
| template <class Context> |
| void ImageInputOp<Context>::DecodeAndTransposeOnly( |
| const std::string& value, |
| uint8_t* image_data, |
| int item_id, |
| const int channels, |
| std::size_t thread_index) { |
| CAFFE_ENFORCE((int)thread_index < num_decode_threads_); |
| |
| std::bernoulli_distribution mirror_this_image(0.5f); |
| std::mt19937* randgen = &(randgen_per_thread_[thread_index]); |
| |
| cv::Mat img; |
| // Decode the image |
| PerImageArg info; |
| CHECK( |
| GetImageAndLabelAndInfoFromDBValue(value, &img, info, item_id, randgen)); |
| |
| // Factor out the image transformation |
| CropTransposeImage<Context>( |
| img, |
| channels, |
| image_data, |
| crop_, |
| mirror_, |
| randgen, |
| &mirror_this_image, |
| is_test_); |
| } |
| |
| template <class Context> |
| bool ImageInputOp<Context>::Prefetch() { |
| if (!owned_reader_.get()) { |
| // if we are not owning the reader, we will get the reader pointer from |
| // input. Otherwise the constructor should have already set the reader |
| // pointer. |
| reader_ = &OperatorBase::Input<db::DBReader>(0); |
| } |
| const int channels = color_ ? 3 : 1; |
| // Call mutable_data() once to allocate the underlying memory. |
| if (gpu_transform_) { |
| // we'll transfer up in int8, then convert later |
| prefetched_image_.mutable_data<uint8_t>(); |
| } else { |
| prefetched_image_.mutable_data<float>(); |
| } |
| |
| prefetched_label_.mutable_data<int>(); |
| // Prefetching handled with a thread pool of "decode_threads" threads. |
| |
| for (const auto item_id : c10::irange(batch_size_)) { |
| std::string key, value; |
| cv::Mat img; |
| |
| // read data |
| reader_->Read(&key, &value); |
| |
| // determine label type based on first item |
| if (item_id == 0) { |
| if (use_caffe_datum_) { |
| prefetched_label_.mutable_data<int>(); |
| } else { |
| TensorProtos protos; |
| CAFFE_ENFORCE(protos.ParseFromString(value)); |
| TensorProto_DataType labeldt = protos.protos(1).data_type(); |
| if (labeldt == TensorProto::INT32) { |
| prefetched_label_.mutable_data<int>(); |
| } else if (labeldt == TensorProto::FLOAT) { |
| prefetched_label_.mutable_data<float>(); |
| } else { |
| LOG(FATAL) << "Unsupported label type."; |
| } |
| |
| for (const auto i : c10::irange(additional_inputs_count_)) { |
| int index = additional_inputs_offset_ + i; |
| TensorProto additional_output_proto = protos.protos(index); |
| auto sizes = |
| std::vector<int64_t>({batch_size_, additional_output_sizes_[i]}); |
| if (additional_output_proto.data_type() == TensorProto::FLOAT) { |
| prefetched_additional_outputs_[i] = |
| caffe2::empty(sizes, at::dtype<float>().device(CPU)); |
| } else if ( |
| additional_output_proto.data_type() == TensorProto::INT32) { |
| prefetched_additional_outputs_[i] = |
| caffe2::empty(sizes, at::dtype<int>().device(CPU)); |
| } else if ( |
| additional_output_proto.data_type() == TensorProto::INT64) { |
| prefetched_additional_outputs_[i] = |
| caffe2::empty(sizes, at::dtype<int64_t>().device(CPU)); |
| } else if ( |
| additional_output_proto.data_type() == TensorProto::UINT8) { |
| prefetched_additional_outputs_[i] = |
| caffe2::empty(sizes, at::dtype<uint8_t>().device(CPU)); |
| } else { |
| LOG(FATAL) << "Unsupported output type."; |
| } |
| } |
| } |
| } |
| |
| // launch into thread pool for processing |
| // TODO: support color jitter and color lighting in gpu_transform |
| if (gpu_transform_) { |
| // output of decode will still be int8 |
| uint8_t* image_data = prefetched_image_.mutable_data<uint8_t>() + |
| crop_ * crop_ * channels * item_id; |
| thread_pool_->runTaskWithID(std::bind( |
| &ImageInputOp<Context>::DecodeAndTransposeOnly, |
| this, |
| std::string(value), |
| image_data, |
| item_id, |
| channels, |
| std::placeholders::_1)); |
| } else { |
| float* image_data = prefetched_image_.mutable_data<float>() + |
| crop_ * crop_ * channels * item_id; |
| thread_pool_->runTaskWithID(std::bind( |
| &ImageInputOp<Context>::DecodeAndTransform, |
| this, |
| std::string(value), |
| image_data, |
| item_id, |
| channels, |
| std::placeholders::_1)); |
| } |
| } |
| thread_pool_->waitWorkComplete(); |
| |
| // we allow to get at most max_decode_error_ratio from |
| // opencv imdecode until raising a runtime exception |
| if ((float)num_decode_errors_in_batch_ / batch_size_ > |
| max_decode_error_ratio_) { |
| throw std::runtime_error( |
| "max_decode_error_ratio exceeded " + |
| c10::to_string(max_decode_error_ratio_)); |
| } |
| |
| // If the context is not CPUContext, we will need to do a copy in the |
| // prefetch function as well. |
| auto device = at::device(Context::GetDeviceType()); |
| if (!std::is_same<Context, CPUContext>::value) { |
| // do sync copies |
| ReinitializeAndCopyFrom( |
| &prefetched_image_on_device_, device, prefetched_image_); |
| ReinitializeAndCopyFrom( |
| &prefetched_label_on_device_, device, prefetched_label_); |
| |
| for (const auto i : c10::irange(prefetched_additional_outputs_on_device_.size())) { |
| ReinitializeAndCopyFrom( |
| &prefetched_additional_outputs_on_device_[i], |
| device, |
| prefetched_additional_outputs_[i]); |
| } |
| } |
| |
| num_decode_errors_in_batch_ = 0; |
| |
| return true; |
| } |
| |
| template <class Context> |
| bool ImageInputOp<Context>::CopyPrefetched() { |
| auto type = Device(Context::GetDeviceType()); |
| auto options = at::device(type); |
| |
| // Note(jiayq): The if statement below should be optimized away by the |
| // compiler since std::is_same is a constexpr. |
| if (std::is_same<Context, CPUContext>::value) { |
| OperatorBase::OutputTensorCopyFrom( |
| 0, options, prefetched_image_, /* async */ true); |
| OperatorBase::OutputTensorCopyFrom( |
| 1, options, prefetched_label_, /* async */ true); |
| |
| for (const auto i : c10::irange(2, OutputSize())) { |
| OperatorBase::OutputTensorCopyFrom( |
| i, options, prefetched_additional_outputs_[i - 2], /* async */ true); |
| } |
| } else { |
| // TODO: support color jitter and color lighting in gpu_transform |
| if (gpu_transform_) { |
| if (!mean_std_copied_) { |
| ReinitializeTensor( |
| &mean_gpu_, |
| {static_cast<int64_t>(mean_.size())}, |
| at::dtype<float>().device(Context::GetDeviceType())); |
| ReinitializeTensor( |
| &std_gpu_, |
| {static_cast<int64_t>(std_.size())}, |
| at::dtype<float>().device(Context::GetDeviceType())); |
| |
| context_.template CopyFromCPU<float>( |
| mean_.size(), |
| mean_.data(), |
| mean_gpu_.template mutable_data<float>()); |
| context_.template CopyFromCPU<float>( |
| std_.size(), std_.data(), std_gpu_.template mutable_data<float>()); |
| mean_std_copied_ = true; |
| } |
| const auto& X = prefetched_image_on_device_; |
| // data comes in as NHWC |
| const int N = X.dim32(0), C = X.dim32(3), H = X.dim32(1), W = X.dim32(2); |
| // data goes out as NCHW |
| auto dims = std::vector<int64_t>{N, C, H, W}; |
| if (!ApplyTransformOnGPU(dims, type)) { |
| return false; |
| } |
| |
| } else { |
| OperatorBase::OutputTensorCopyFrom( |
| 0, type, prefetched_image_on_device_, /* async */ true); |
| } |
| OperatorBase::OutputTensorCopyFrom( |
| 1, type, prefetched_label_on_device_, /* async */ true); |
| |
| for (const auto i : c10::irange(2, OutputSize())) { |
| OperatorBase::OutputTensorCopyFrom( |
| i, |
| type, |
| prefetched_additional_outputs_on_device_[i - 2], |
| /* async */ true); |
| } |
| } |
| return true; |
| } |
| } // namespace caffe2 |
| |
| #endif // CAFFE2_IMAGE_IMAGE_INPUT_OP_H_ |