| #include <caffe2/core/logging.h> |
| #include <caffe2/video/video_io.h> |
| #include <algorithm> |
| #include <random> |
| #include <string> |
| |
| namespace caffe2 { |
| |
| void ClipTransformRGB( |
| const unsigned char* buffer_rgb, |
| const int crop_size, |
| const int length_rgb, |
| const int channels_rgb, |
| const int sampling_rate_rgb, |
| const int height, |
| const int width, |
| const int h_off, |
| const int w_off, |
| const bool mirror_me, |
| const std::vector<float>& mean_rgb, |
| const std::vector<float>& inv_std_rgb, |
| float* transformed_clip) { |
| // The order of output dimensions is C, L, H, W |
| int orig_index, tran_index; |
| for (int c = 0; c < channels_rgb; ++c) { |
| for (int l = 0; l < length_rgb; ++l) { |
| int orig_index_l = l * sampling_rate_rgb * height * width * channels_rgb; |
| int tran_index_l = (c * length_rgb + l) * crop_size; |
| |
| for (int h = 0; h < crop_size; ++h) { |
| int orig_index_h = orig_index_l + (h + h_off) * width * channels_rgb; |
| int tran_index_h = (tran_index_l + h) * crop_size; |
| |
| for (int w = 0; w < crop_size; ++w) { |
| orig_index = orig_index_h + (w + w_off) * channels_rgb + c; |
| |
| // mirror the frame |
| if (mirror_me) { |
| tran_index = tran_index_h + (crop_size - 1 - w); |
| } else { |
| tran_index = tran_index_h + w; |
| } |
| |
| // normalize and transform the clip |
| transformed_clip[tran_index] = |
| (buffer_rgb[orig_index] - mean_rgb[c]) * inv_std_rgb[c]; |
| } |
| } |
| } |
| } |
| } |
| |
| void ClipTransformOpticalFlow( |
| const unsigned char* buffer_rgb, |
| const int crop_size, |
| const int length_of, |
| const int channels_of, |
| const int sampling_rate_of, |
| const int height, |
| const int width, |
| const cv::Rect& rect, |
| const int channels_rgb, |
| const bool mirror_me, |
| const int flow_alg_type, |
| const int flow_data_type, |
| const int frame_gap_of, |
| const bool do_flow_aggregation, |
| const std::vector<float>& mean_of, |
| const std::vector<float>& inv_std_of, |
| float* transformed_clip) { |
| const int frame_size = crop_size * crop_size; |
| const int channel_size_flow = length_of * frame_size; |
| |
| // for get the mean and std of the input data |
| bool extract_statistics = false; |
| static std::vector<double> mean_static(channels_of, 0.f); |
| static std::vector<double> std_static(channels_of, 0.f); |
| static long long count = 0; |
| cv::Scalar mean_img, std_img; |
| |
| for (int l = 0; l < length_of; l++) { |
| // get the grayscale frames |
| std::vector<cv::Mat> grays, rgbs; |
| int step_size = do_flow_aggregation ? 1 : frame_gap_of; |
| for (int j = 0; j <= frame_gap_of; j += step_size) { |
| // get the current frame |
| const unsigned char* curr_frame = buffer_rgb + |
| (l * sampling_rate_of + j) * height * width * channels_rgb; |
| cv::Mat img = cv::Mat::zeros(height, width, CV_8UC3); |
| memcpy( |
| img.data, |
| curr_frame, |
| height * width * channels_rgb * sizeof(unsigned char)); |
| |
| // crop and mirror the frame |
| cv::Mat img_cropped = img(rect); |
| if (mirror_me) { |
| cv::flip(img_cropped, img_cropped, 1); |
| } |
| |
| cv::Mat gray; |
| cv::cvtColor(img_cropped, gray, cv::COLOR_RGB2GRAY); |
| grays.push_back(gray); |
| rgbs.push_back(img_cropped); |
| } |
| |
| cv::Mat first_gray, first_rgb; |
| cv::Mat flow = cv::Mat::zeros(crop_size, crop_size, CV_32FC2); |
| MultiFrameOpticalFlowExtractor(grays, flow_alg_type, flow); |
| |
| std::vector<cv::Mat> imgs; |
| cv::split(flow, imgs); |
| // save the 2-channel optical flow first |
| int c = 0; |
| for (; c < 2; c++) { |
| if (extract_statistics) { |
| cv::meanStdDev(imgs[c], mean_img, std_img); |
| mean_static[c] += mean_img[0]; |
| std_static[c] += std_img[0]; |
| } |
| |
| imgs[c] -= mean_of[c]; |
| imgs[c] *= inv_std_of[c]; |
| memcpy( |
| transformed_clip + c * channel_size_flow + l * frame_size, |
| imgs[c].data, |
| frame_size * sizeof(float)); |
| } |
| |
| cv::Mat mag; |
| std::vector<cv::Mat> chans; |
| // augment the optical flow with more channels |
| switch (flow_data_type) { |
| case FlowDataType::Flow2C: |
| // nothing to do if we only need two channels |
| break; |
| |
| case FlowDataType::Flow3C: |
| // use magnitude as the third channel |
| mag = cv::abs(imgs[0]) + cv::abs(imgs[1]); |
| if (extract_statistics) { |
| cv::meanStdDev(mag, mean_img, std_img); |
| mean_static[c] += mean_img[0]; |
| std_static[c] += std_img[0]; |
| } |
| |
| mag -= mean_of[c]; |
| mag *= inv_std_of[c]; |
| memcpy( |
| transformed_clip + c * channel_size_flow + l * frame_size, |
| mag.data, |
| frame_size * sizeof(float)); |
| break; |
| |
| case FlowDataType::FlowWithGray: |
| // add grayscale image as the third channel |
| grays[0].convertTo(first_gray, CV_32FC1); |
| if (extract_statistics) { |
| cv::meanStdDev(first_gray, mean_img, std_img); |
| mean_static[c] += mean_img[0]; |
| std_static[c] += std_img[0]; |
| } |
| |
| first_gray -= mean_of[c]; |
| first_gray *= inv_std_of[c]; |
| memcpy( |
| transformed_clip + c * channel_size_flow + l * frame_size, |
| first_gray.data, |
| frame_size * sizeof(float)); |
| break; |
| |
| case FlowDataType::FlowWithRGB: |
| // add all three rgb channels |
| rgbs[0].convertTo(first_rgb, CV_32FC3); |
| cv::split(first_rgb, chans); |
| for (; c < channels_of; c++) { |
| if (extract_statistics) { |
| cv::meanStdDev(chans[c - 2], mean_img, std_img); |
| mean_static[c] += mean_img[0]; |
| std_static[c] += std_img[0]; |
| } |
| |
| chans[c - 2] -= mean_of[c]; |
| chans[c - 2] *= inv_std_of[c]; |
| memcpy( |
| transformed_clip + c * channel_size_flow + l * frame_size, |
| chans[c - 2].data, |
| frame_size * sizeof(float)); |
| } |
| break; |
| |
| default: |
| LOG(ERROR) << "Unsupported optical flow data type " << flow_data_type; |
| break; |
| } |
| |
| if (extract_statistics) { |
| count++; |
| if (count % 1000 == 1) { |
| for (int i = 0; i < channels_of; i++) { |
| LOG(INFO) << i |
| << "-th channel mean: " << mean_static[i] / float(count) |
| << " std: " << std_static[i] / float(count); |
| } |
| } |
| } |
| } |
| } |
| |
| } // namespace caffe2 |