iamf/cli/demixing_module.cc - platform/external/iamf_tools - Git at Google

 /*
  * Copyright (c) 2023, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 3-Clause Clear License
  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
  * License was not distributed with this source code in the LICENSE file, you
  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
  * Alliance for Open Media Patent License 1.0 was not distributed with this
  * source code in the PATENTS file, you can obtain it at
  * www.aomedia.org/license/patent.
  */
 #include "iamf/cli/demixing_module.h"

 #include <algorithm>
 #include <climits>
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
 #include <list>
 #include <utility>
 #include <variant>
 #include <vector>

 #include "absl/container/flat_hash_map.h"
 #include "absl/container/flat_hash_set.h"
 #include "absl/log/check.h"
 #include "absl/log/log.h"
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
 #include "iamf/cli/audio_element_with_data.h"
 #include "iamf/cli/audio_frame_decoder.h"
 #include "iamf/cli/audio_frame_with_data.h"
 #include "iamf/cli/channel_label.h"
 #include "iamf/cli/cli_util.h"
 #include "iamf/common/utils/macros.h"
 #include "iamf/common/utils/numeric_utils.h"
 #include "iamf/obu/audio_element.h"
 #include "iamf/obu/audio_frame.h"
 #include "iamf/obu/demixing_info_parameter_data.h"
 #include "iamf/obu/types.h"

 namespace iamf_tools {

 namespace {

 using enum ChannelLabel::Label;

 using DemixingMetadataForAudioElementId =
     DemixingModule::DemixingMetadataForAudioElementId;

 absl::Status S7ToS5DownMixer(const DownMixingParams& down_mixing_params,
                              LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "S7 to S5";

   // Check input to perform this down-mixing exist.
   if (label_to_samples.find(kL7) == label_to_samples.end() ||
       label_to_samples.find(kR7) == label_to_samples.end() ||
       label_to_samples.find(kLss7) == label_to_samples.end() ||
       label_to_samples.find(kLrs7) == label_to_samples.end() ||
       label_to_samples.find(kRss7) == label_to_samples.end() ||
       label_to_samples.find(kRrs7) == label_to_samples.end()) {
     return absl::InvalidArgumentError("Missing some input channels");
   }

   const auto& l7_samples = label_to_samples[kL7];
   const auto& lss7_samples = label_to_samples[kLss7];
   const auto& lrs7_samples = label_to_samples[kLrs7];
   const auto& r7_samples = label_to_samples[kR7];
   const auto& rss7_samples = label_to_samples[kRss7];
   const auto& rrs7_samples = label_to_samples[kRrs7];

   auto& l5_samples = label_to_samples[kL5];
   auto& r5_samples = label_to_samples[kR5];
   auto& ls5_samples = label_to_samples[kLs5];
   auto& rs5_samples = label_to_samples[kRs5];

   // Directly copy L7/R7 to L5/R5, because they are the same.
   l5_samples = l7_samples;
   r5_samples = r7_samples;

   // Handle Ls5 and Rs5.
   ls5_samples.resize(lss7_samples.size());
   rs5_samples.resize(rss7_samples.size());
   for (int i = 0; i < ls5_samples.size(); i++) {
     ls5_samples[i] = down_mixing_params.alpha * lss7_samples[i] +
                      down_mixing_params.beta * lrs7_samples[i];
     rs5_samples[i] = down_mixing_params.alpha * rss7_samples[i] +
                      down_mixing_params.beta * rrs7_samples[i];
   }

   return absl::OkStatus();
 }

 absl::Status S5ToS7Demixer(const DownMixingParams& down_mixing_params,
                            LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "S5 to S7";

   const std::vector<InternalSampleType>* l5_samples;
   const std::vector<InternalSampleType>* ls5_samples;
   const std::vector<InternalSampleType>* lss7_samples;
   const std::vector<InternalSampleType>* r5_samples;
   const std::vector<InternalSampleType>* rs5_samples;
   const std::vector<InternalSampleType>* rss7_samples;
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kL5, label_to_samples, &l5_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kLs5, label_to_samples, &ls5_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kLss7, label_to_samples, &lss7_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kR5, label_to_samples, &r5_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kRs5, label_to_samples, &rs5_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kRss7, label_to_samples, &rss7_samples));

   auto& l7_samples = label_to_samples[kDemixedL7];
   auto& r7_samples = label_to_samples[kDemixedR7];
   auto& lrs7_samples = label_to_samples[kDemixedLrs7];
   auto& rrs7_samples = label_to_samples[kDemixedRrs7];

   // Directly copy L5/R5 to L7/R7, because they are the same.
   l7_samples = *l5_samples;
   r7_samples = *r5_samples;

   // Handle Lrs7 and Rrs7.
   const size_t num_ticks = l5_samples->size();
   lrs7_samples.resize(num_ticks, 0.0);
   rrs7_samples.resize(num_ticks, 0.0);
   for (int i = 0; i < num_ticks; i++) {
     lrs7_samples[i] =
         ((*ls5_samples)[i] - down_mixing_params.alpha * (*lss7_samples)[i]) /
         down_mixing_params.beta;
     rrs7_samples[i] =
         ((*rs5_samples)[i] - down_mixing_params.alpha * (*rss7_samples)[i]) /
         down_mixing_params.beta;
   }

   return absl::OkStatus();
 }

 absl::Status S5ToS3DownMixer(const DownMixingParams& down_mixing_params,
                              LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "S5 to S3";

   // Check input to perform this down-mixing exist.
   if (label_to_samples.find(kL5) == label_to_samples.end() ||
       label_to_samples.find(kLs5) == label_to_samples.end() ||
       label_to_samples.find(kR5) == label_to_samples.end() ||
       label_to_samples.find(kRs5) == label_to_samples.end()) {
     return absl::InvalidArgumentError("Missing some input channels");
   }

   const auto& l5_samples = label_to_samples[kL5];
   const auto& ls5_samples = label_to_samples[kLs5];
   const auto& r5_samples = label_to_samples[kR5];
   const auto& rs5_samples = label_to_samples[kRs5];

   auto& l3_samples = label_to_samples[kL3];
   auto& r3_samples = label_to_samples[kR3];
   l3_samples.resize(l5_samples.size());
   r3_samples.resize(r5_samples.size());
   for (int i = 0; i < l3_samples.size(); i++) {
     l3_samples[i] = l5_samples[i] + down_mixing_params.delta * ls5_samples[i];
     r3_samples[i] = r5_samples[i] + down_mixing_params.delta * rs5_samples[i];
   }

   return absl::OkStatus();
 }

 absl::Status S3ToS5Demixer(const DownMixingParams& down_mixing_params,
                            LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "S3 to S5";

   const std::vector<InternalSampleType>* l3_samples;
   const std::vector<InternalSampleType>* l5_samples;
   const std::vector<InternalSampleType>* r3_samples;
   const std::vector<InternalSampleType>* r5_samples;
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kL3, label_to_samples, &l3_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kL5, label_to_samples, &l5_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kR3, label_to_samples, &r3_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kR5, label_to_samples, &r5_samples));

   auto& ls5_samples = label_to_samples[kDemixedLs5];
   auto& rs5_samples = label_to_samples[kDemixedRs5];

   const size_t num_ticks = l3_samples->size();
   ls5_samples.resize(num_ticks, 0.0);
   rs5_samples.resize(num_ticks, 0.0);
   for (int i = 0; i < num_ticks; i++) {
     ls5_samples[i] =
         ((*l3_samples)[i] - (*l5_samples)[i]) / down_mixing_params.delta;
     rs5_samples[i] =
         ((*r3_samples)[i] - (*r5_samples)[i]) / down_mixing_params.delta;
   }

   return absl::OkStatus();
 }

 absl::Status S3ToS2DownMixer(const DownMixingParams& /*down_mixing_params*/,
                              LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "S3 to S2";

   // Check input to perform this down-mixing exist.
   if (label_to_samples.find(kL3) == label_to_samples.end() ||
       label_to_samples.find(kR3) == label_to_samples.end() ||
       label_to_samples.find(kCentre) == label_to_samples.end()) {
     return absl::InvalidArgumentError("Missing some input channels");
   }

   const auto& l3_samples = label_to_samples[kL3];
   const auto& r3_samples = label_to_samples[kR3];
   const auto& c_samples = label_to_samples[kCentre];

   auto& l2_samples = label_to_samples[kL2];
   auto& r2_samples = label_to_samples[kR2];
   l2_samples.resize(l3_samples.size());
   r2_samples.resize(r3_samples.size());
   for (int i = 0; i < l2_samples.size(); i++) {
     l2_samples[i] = l3_samples[i] + 0.707 * c_samples[i];
     r2_samples[i] = r3_samples[i] + 0.707 * c_samples[i];
   }

   return absl::OkStatus();
 }

 absl::Status S2ToS3Demixer(const DownMixingParams& /*down_mixing_params*/,
                            LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "S2 to S3";

   const std::vector<InternalSampleType>* l2_samples;
   const std::vector<InternalSampleType>* r2_samples;
   const std::vector<InternalSampleType>* c_samples;
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kL2, label_to_samples, &l2_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kR2, label_to_samples, &r2_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kCentre, label_to_samples, &c_samples));

   auto& l3_samples = label_to_samples[kDemixedL3];
   auto& r3_samples = label_to_samples[kDemixedR3];

   const size_t num_ticks = c_samples->size();
   l3_samples.resize(num_ticks, 0.0);
   r3_samples.resize(num_ticks, 0.0);
   for (int i = 0; i < num_ticks; i++) {
     l3_samples[i] = ((*l2_samples)[i] - 0.707 * (*c_samples)[i]);
     r3_samples[i] = ((*r2_samples)[i] - 0.707 * (*c_samples)[i]);
   }

   return absl::OkStatus();
 }

 absl::Status S2ToS1DownMixer(const DownMixingParams& /*down_mixing_params*/,
                              LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "S2 to S1";

   // Check input to perform this down-mixing exist.
   if (label_to_samples.find(kL2) == label_to_samples.end() ||
       label_to_samples.find(kR2) == label_to_samples.end()) {
     return absl::UnknownError("Missing some input channels");
   }

   const auto& l2_samples = label_to_samples[kL2];
   const auto& r2_samples = label_to_samples[kR2];

   auto& mono_samples = label_to_samples[kMono];
   mono_samples.resize(l2_samples.size());
   for (int i = 0; i < mono_samples.size(); i++) {
     mono_samples[i] = 0.5 * (l2_samples[i] + r2_samples[i]);
   }

   return absl::OkStatus();
 }

 absl::Status S1ToS2Demixer(const DownMixingParams& /*down_mixing_params*/,
                            LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "S1 to S2";

   const std::vector<InternalSampleType>* l2_samples;
   const std::vector<InternalSampleType>* mono_samples;
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kL2, label_to_samples, &l2_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kMono, label_to_samples, &mono_samples));

   auto& r2_samples = label_to_samples[kDemixedR2];
   const size_t num_ticks = mono_samples->size();
   r2_samples.resize(num_ticks, 0.0);
   for (int i = 0; i < num_ticks; i++) {
     r2_samples[i] = 2.0 * (*mono_samples)[i] - (*l2_samples)[i];
   }

   return absl::OkStatus();
 }

 absl::Status T4ToT2DownMixer(const DownMixingParams& down_mixing_params,
                              LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "T4 to T2";

   // Check input to perform this down-mixing exist.
   if (label_to_samples.find(kLtf4) == label_to_samples.end() ||
       label_to_samples.find(kLtb4) == label_to_samples.end() ||
       label_to_samples.find(kRtf4) == label_to_samples.end() ||
       label_to_samples.find(kRtb4) == label_to_samples.end()) {
     return absl::UnknownError("Missing some input channels");
   }

   const auto& ltf4_samples = label_to_samples[kLtf4];
   const auto& ltb4_samples = label_to_samples[kLtb4];
   const auto& rtf4_samples = label_to_samples[kRtf4];
   const auto& rtb4_samples = label_to_samples[kRtb4];

   auto& ltf2_samples = label_to_samples[kLtf2];
   auto& rtf2_samples = label_to_samples[kRtf2];
   ltf2_samples.resize(ltf4_samples.size());
   rtf2_samples.resize(rtf4_samples.size());
   for (int i = 0; i < ltf2_samples.size(); i++) {
     ltf2_samples[i] =
         ltf4_samples[i] + down_mixing_params.gamma * ltb4_samples[i];
     rtf2_samples[i] =
         rtf4_samples[i] + down_mixing_params.gamma * rtb4_samples[i];
   }

   return absl::OkStatus();
 }

 absl::Status T2ToT4Demixer(const DownMixingParams& down_mixing_params,
                            LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "T2 to T4";

   const std::vector<InternalSampleType>* ltf2_samples;
   const std::vector<InternalSampleType>* ltf4_samples;
   const std::vector<InternalSampleType>* rtf2_samples;
   const std::vector<InternalSampleType>* rtf4_samples;
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kLtf2, label_to_samples, &ltf2_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kLtf4, label_to_samples, &ltf4_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kRtf2, label_to_samples, &rtf2_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kRtf4, label_to_samples, &rtf4_samples));

   auto& ltb4_samples = label_to_samples[kDemixedLtb4];
   auto& rtb4_samples = label_to_samples[kDemixedRtb4];
   const size_t num_ticks = ltf2_samples->size();
   ltb4_samples.resize(num_ticks, 0.0);
   rtb4_samples.resize(num_ticks, 0.0);
   for (int i = 0; i < num_ticks; i++) {
     ltb4_samples[i] =
         ((*ltf2_samples)[i] - (*ltf4_samples)[i]) / down_mixing_params.gamma;
     rtb4_samples[i] =
         ((*rtf2_samples)[i] - (*rtf4_samples)[i]) / down_mixing_params.gamma;
   }

   return absl::OkStatus();
 }

 absl::Status T2ToTf2DownMixer(const DownMixingParams& down_mixing_params,
                               LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "T2 to TF2";

   // Check input to perform this down-mixing exist.
   if (label_to_samples.find(kLtf2) == label_to_samples.end() ||
       label_to_samples.find(kLs5) == label_to_samples.end() ||
       label_to_samples.find(kRtf2) == label_to_samples.end() ||
       label_to_samples.find(kRs5) == label_to_samples.end()) {
     return absl::UnknownError("Missing some input channels");
   }

   const auto& ltf2_samples = label_to_samples[kLtf2];
   const auto& ls5_samples = label_to_samples[kLs5];
   const auto& rtf2_samples = label_to_samples[kRtf2];
   const auto& rs5_samples = label_to_samples[kRs5];

   auto& ltf3_samples = label_to_samples[kLtf3];
   auto& rtf3_samples = label_to_samples[kRtf3];
   ltf3_samples.resize(ltf2_samples.size());
   rtf3_samples.resize(rtf2_samples.size());
   for (int i = 0; i < ltf2_samples.size(); i++) {
     ltf3_samples[i] = ltf2_samples[i] + down_mixing_params.w *
                                             down_mixing_params.delta *
                                             ls5_samples[i];
     rtf3_samples[i] = rtf2_samples[i] + down_mixing_params.w *
                                             down_mixing_params.delta *
                                             rs5_samples[i];
   }

   return absl::OkStatus();
 }

 absl::Status Tf2ToT2Demixer(const DownMixingParams& down_mixing_params,
                             LabelSamplesMap& label_to_samples) {
   LOG_FIRST_N(INFO, 1) << "TF2 to T2";

   const std::vector<InternalSampleType>* ltf3_samples;
   const std::vector<InternalSampleType>* l3_samples;
   const std::vector<InternalSampleType>* l5_samples;
   const std::vector<InternalSampleType>* rtf3_samples;
   const std::vector<InternalSampleType>* r3_samples;
   const std::vector<InternalSampleType>* r5_samples;
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kLtf3, label_to_samples, &ltf3_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kL3, label_to_samples, &l3_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kL5, label_to_samples, &l5_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kRtf3, label_to_samples, &rtf3_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kR3, label_to_samples, &r3_samples));
   RETURN_IF_NOT_OK(DemixingModule::FindSamplesOrDemixedSamples(
       kR5, label_to_samples, &r5_samples));

   auto& ltf2_samples = label_to_samples[kDemixedLtf2];
   auto& rtf2_samples = label_to_samples[kDemixedRtf2];
   const size_t num_ticks = ltf3_samples->size();
   ltf2_samples.resize(num_ticks, 0.0);
   rtf2_samples.resize(num_ticks, 0.0);
   for (int i = 0; i < num_ticks; i++) {
     ltf2_samples[i] =
         (*ltf3_samples)[i] -
         down_mixing_params.w * ((*l3_samples)[i] - (*l5_samples)[i]);
     rtf2_samples[i] =
         (*rtf3_samples)[i] -
         down_mixing_params.w * ((*r3_samples)[i] - (*r5_samples)[i]);
   }

   return absl::OkStatus();
 }

 // Helper to fill in the fields of `DemixingMetadataForAudioElementId`.
 absl::Status FillRequiredDemixingMetadata(
     const absl::flat_hash_set<ChannelLabel::Label>& labels_to_demix,
     const SubstreamIdLabelsMap& substream_id_to_labels,
     const LabelGainMap& label_to_output_gain,
     DemixingMetadataForAudioElementId& demixing_metadata) {
   auto& down_mixers = demixing_metadata.down_mixers;
   auto& demixers = demixing_metadata.demixers;

   if (!down_mixers.empty() || !demixers.empty()) {
     return absl::UnknownError(
         "`FillRequiredDemixingMetadata()` should only be called once per Audio "
         "Element ID");
   }
   demixing_metadata.substream_id_to_labels = substream_id_to_labels;
   demixing_metadata.label_to_output_gain = label_to_output_gain;

   // Find the input surround number.
   int input_surround_number = 0;
   if (labels_to_demix.contains(kL7)) {
     input_surround_number = 7;
   } else if (labels_to_demix.contains(kL5)) {
     input_surround_number = 5;
   } else if (labels_to_demix.contains(kL3)) {
     input_surround_number = 3;
   } else if (labels_to_demix.contains(kL2)) {
     input_surround_number = 2;
   } else if (labels_to_demix.contains(kMono)) {
     input_surround_number = 1;
   }

   // Find the lowest output surround number.
   int output_lowest_surround_number = INT_MAX;
   for (const auto& [substream_id, labels] :
        demixing_metadata.substream_id_to_labels) {
     if (std::find(labels.begin(), labels.end(), kL7) != labels.end() &&
         output_lowest_surround_number > 7) {
       output_lowest_surround_number = 7;
     } else if (std::find(labels.begin(), labels.end(), kL5) != labels.end() &&
                output_lowest_surround_number > 5) {
       output_lowest_surround_number = 5;
     } else if (std::find(labels.begin(), labels.end(), kL3) != labels.end() &&
                output_lowest_surround_number > 3) {
       output_lowest_surround_number = 3;
     } else if (std::find(labels.begin(), labels.end(), kL2) != labels.end() &&
                output_lowest_surround_number > 2) {
       output_lowest_surround_number = 2;
     } else if (std::find(labels.begin(), labels.end(), kMono) != labels.end() &&
                output_lowest_surround_number > 1) {
       output_lowest_surround_number = 1;
       // This is the lowest possible value, abort.
       break;
     }
   }
   LOG(INFO) << "Surround down-mixers from S" << input_surround_number << " to S"
             << output_lowest_surround_number << " needed:";
   for (int surround_number = input_surround_number;
        surround_number > output_lowest_surround_number; surround_number--) {
     if (surround_number == 7) {
       down_mixers.push_back(S7ToS5DownMixer);
       LOG(INFO) << "  S7ToS5DownMixer added";
       demixers.push_front(S5ToS7Demixer);
       LOG(INFO) << "  S5ToS7Demixer added";
     } else if (surround_number == 5) {
       down_mixers.push_back(S5ToS3DownMixer);
       LOG(INFO) << "  S5ToS3DownMixer added";
       demixers.push_front(S3ToS5Demixer);
       LOG(INFO) << "  S3ToS5Demixer added";
     } else if (surround_number == 3) {
       down_mixers.push_back(S3ToS2DownMixer);
       LOG(INFO) << "  S3ToS2DownMixer added";
       demixers.push_front(S2ToS3Demixer);
       LOG(INFO) << "  S2ToS3Demixer added";
     } else if (surround_number == 2) {
       down_mixers.push_back(S2ToS1DownMixer);
       LOG(INFO) << "  S2ToS1DownMixer added";
       demixers.push_front(S1ToS2Demixer);
       LOG(INFO) << "  S1ToS2Demixer added";
     }
   }

   // Find the input height number. Artificially defining the height number of
   // "TF2" as 1.
   int input_height_number = 0;
   if (labels_to_demix.contains(kLtf4)) {
     input_height_number = 4;
   } else if (labels_to_demix.contains(kLtf2)) {
     input_height_number = 2;
   } else if (labels_to_demix.contains(kLtf3)) {
     input_height_number = 1;
   }

   // Find the lowest output height number.
   int output_lowest_height_number = INT_MAX;
   for (const auto& [substream_id, labels] :
        demixing_metadata.substream_id_to_labels) {
     if (std::find(labels.begin(), labels.end(), kLtf4) != labels.end() &&
         output_lowest_height_number > 4) {
       output_lowest_height_number = 4;
     } else if (std::find(labels.begin(), labels.end(), kLtf2) != labels.end() &&
                output_lowest_height_number > 2) {
       output_lowest_height_number = 2;
     } else if (std::find(labels.begin(), labels.end(), kLtf3) != labels.end() &&
                output_lowest_height_number > 1) {
       output_lowest_height_number = 1;
       // This is the lowest possible value, abort.
       break;
     }
   }

   // Collect demixers in a separate list first and append the list to the
   // output later. Height demixers need to be in reverse order as height
   // down-mixers but should go after the surround demixers.
   LOG(INFO) << "Height down-mixers from T" << input_height_number << " to "
             << (output_lowest_height_number == 2 ? "T2" : "TF3") << " needed:";
   std::list<Demixer> height_demixers;
   for (int height_number = input_height_number;
        height_number > output_lowest_height_number; height_number--) {
     if (height_number == 4) {
       down_mixers.push_back(T4ToT2DownMixer);
       LOG(INFO) << "  T4ToT2DownMixer added";
       height_demixers.push_front(T2ToT4Demixer);
       LOG(INFO) << "  T2ToT4Demixer added";
     } else if (height_number == 2) {
       down_mixers.push_back(T2ToTf2DownMixer);
       LOG(INFO) << "  T2ToTf2DownMixer added";
       height_demixers.push_front(Tf2ToT2Demixer);
       LOG(INFO) << "  Tf2ToT2Demixer added";
     }
   }
   demixers.splice(demixers.end(), height_demixers);

   return absl::OkStatus();
 }

 void ConfigureLabeledFrame(const AudioFrameWithData& audio_frame,
                            LabeledFrame& labeled_frame) {
   labeled_frame.end_timestamp = audio_frame.end_timestamp;
   labeled_frame.samples_to_trim_at_end =
       audio_frame.obu.header_.num_samples_to_trim_at_end;
   labeled_frame.samples_to_trim_at_start =
       audio_frame.obu.header_.num_samples_to_trim_at_start;
   labeled_frame.demixing_params = audio_frame.down_mixing_params;
 }

 void ConfigureLabeledFrame(const DecodedAudioFrame& decoded_audio_frame,
                            LabeledFrame& labeled_decoded_frame) {
   labeled_decoded_frame.end_timestamp = decoded_audio_frame.end_timestamp;
   labeled_decoded_frame.samples_to_trim_at_end =
       decoded_audio_frame.samples_to_trim_at_end;
   labeled_decoded_frame.samples_to_trim_at_start =
       decoded_audio_frame.samples_to_trim_at_start;
   labeled_decoded_frame.demixing_params =
       decoded_audio_frame.down_mixing_params;
 }

 uint32_t GetSubstreamId(const AudioFrameWithData& audio_frame_with_data) {
   return audio_frame_with_data.obu.GetSubstreamId();
 }

 uint32_t GetSubstreamId(const DecodedAudioFrame& audio_frame_with_data) {
   return audio_frame_with_data.substream_id;
 }

 const std::vector<std::vector<int32_t>>* GetSamples(
     const AudioFrameWithData& audio_frame_with_data) {
   if (!audio_frame_with_data.pcm_samples.has_value()) {
     return nullptr;
   }
   return &audio_frame_with_data.pcm_samples.value();
 }

 const std::vector<std::vector<int32_t>>* GetSamples(
     const DecodedAudioFrame& audio_frame_with_data) {
   return &audio_frame_with_data.decoded_samples;
 }

 // NOOP function if the frame is not a DecodedAudioFrame.
 absl::Status PassThroughReconGainData(const AudioFrameWithData& /*audio_frame*/,
                                       LabeledFrame& /*labeled_frame*/) {
   return absl::OkStatus();
 }

 absl::Status PassThroughReconGainData(
     const DecodedAudioFrame& decoded_audio_frame,
     LabeledFrame& labeled_decoded_frame) {
   if (decoded_audio_frame.audio_element_with_data == nullptr) {
     LOG(INFO)
         << "No audio element with data found, thus layer info is inaccessible.";
     return absl::OkStatus();
   }
   auto layout_config = std::get_if<ScalableChannelLayoutConfig>(
       &decoded_audio_frame.audio_element_with_data->obu.config_);
   if (layout_config == nullptr) {
     LOG_IF(INFO, decoded_audio_frame.start_timestamp == 0)
         << "No scalable channel layout config found, thus recon gain "
            "info is not necessary.";
     return absl::OkStatus();
   }
   auto& loudspeaker_layout_per_layer =
       labeled_decoded_frame.loudspeaker_layout_per_layer;
   loudspeaker_layout_per_layer.clear();
   loudspeaker_layout_per_layer.reserve(
       layout_config->channel_audio_layer_configs.size());
   for (const auto& channel_audio_layer_config :
        layout_config->channel_audio_layer_configs) {
     loudspeaker_layout_per_layer.push_back(
         channel_audio_layer_config.loudspeaker_layout);
   }
   labeled_decoded_frame.recon_gain_info_parameter_data =
       decoded_audio_frame.recon_gain_info_parameter_data;
   return absl::OkStatus();
 }

 // TODO(b/377553811): Unify `AudioFrameWithData` and `DecodedAudioFrame`.
 template <typename T>
 absl::Status StoreSamplesForAudioElementId(
     const std::list<T>& audio_frames_or_decoded_audio_frames,
     const SubstreamIdLabelsMap& substream_id_to_labels,
     LabeledFrame& labeled_frame) {
   if (audio_frames_or_decoded_audio_frames.empty()) {
     return absl::OkStatus();
   }
   const int32_t common_start_timestamp =
       audio_frames_or_decoded_audio_frames.begin()->start_timestamp;

   for (auto& audio_frame : audio_frames_or_decoded_audio_frames) {
     const auto substream_id = GetSubstreamId(audio_frame);
     auto substream_id_labels_iter = substream_id_to_labels.find(substream_id);
     if (substream_id_labels_iter == substream_id_to_labels.end()) {
       // This audio frame might belong to a different audio element; skip it.
       continue;
     }

     // Validate that the frames are all aligned in time.
     RETURN_IF_NOT_OK(CompareTimestamps(common_start_timestamp,
                                        audio_frame.start_timestamp,
                                        "In StoreSamplesForAudioElementId(): "));

     const auto& labels = substream_id_labels_iter->second;
     int channel_index = 0;
     for (const auto& label : labels) {
       const auto* input_samples = GetSamples(audio_frame);
       if (input_samples == nullptr) {
         return absl::InvalidArgumentError(
             "Input samples are not available for down-mixing.");
       }
       const size_t num_ticks = input_samples->size();

       ConfigureLabeledFrame(audio_frame, labeled_frame);

       auto& samples = labeled_frame.label_to_samples[label];
       samples.resize(num_ticks, 0);
       for (int t = 0; t < samples.size(); t++) {
         samples[t] = Int32ToNormalizedFloatingPoint<InternalSampleType>(
             (*input_samples)[t][channel_index]);
       }
       channel_index++;
     }
     RETURN_IF_NOT_OK(PassThroughReconGainData(audio_frame, labeled_frame));
   }

   return absl::OkStatus();
 }

 absl::Status ApplyDemixers(const std::list<Demixer>& demixers,
                            LabeledFrame& labeled_frame) {
   for (const auto& demixer : demixers) {
     RETURN_IF_NOT_OK(
         demixer(labeled_frame.demixing_params, labeled_frame.label_to_samples));
   }
   return absl::OkStatus();
 }

 absl::Status GetDemixerMetadata(
     const DecodedUleb128 audio_element_id,
     const absl::flat_hash_map<DecodedUleb128,
                               DemixingMetadataForAudioElementId>&
         audio_element_id_to_demixing_metadata,
     const DemixingMetadataForAudioElementId*& demixing_metadata) {
   const auto iter =
       audio_element_id_to_demixing_metadata.find(audio_element_id);
   if (iter == audio_element_id_to_demixing_metadata.end()) {
     return absl::InvalidArgumentError(absl::StrCat(
         "Demxiing metadata for Audio Element ID= ", audio_element_id,
         " not found"));
   }
   demixing_metadata = &iter->second;
   return absl::OkStatus();
 }

 absl::StatusOr<absl::flat_hash_set<ChannelLabel::Label>>
 LookupLabelsToReconstruct(const AudioElementObu& obu) {
   switch (obu.GetAudioElementType()) {
     using enum AudioElementObu::AudioElementType;
     case kAudioElementChannelBased: {
       const auto& channel_audio_layer_configs =
           std::get<ScalableChannelLayoutConfig>(obu.config_)
               .channel_audio_layer_configs;
       if (channel_audio_layer_configs.empty()) {
         return absl::InvalidArgumentError(absl::StrCat(
             "Expected non-empty channel audio layer configs for Audio "
             "Element ID= ",
             obu.GetAudioElementId()));
       }

       // Reconstruct the highest layer.
       return ChannelLabel::
           LookupLabelsToReconstructFromScalableLoudspeakerLayout(
               channel_audio_layer_configs.back().loudspeaker_layout,
               channel_audio_layer_configs.back().expanded_loudspeaker_layout);
     }
     case kAudioElementSceneBased:
       // OK. Ambisonics does not have any channels to be reconstructed.
       return absl::flat_hash_set<ChannelLabel::Label>{};
       break;
     default:
       return absl::UnimplementedError(absl::StrCat(
           "Unsupported audio element type= ", obu.GetAudioElementType()));
   }
 }
 void LogForAudioElementId(absl::string_view log_prefix,
                           DecodedUleb128 audio_element_id,
                           const IdLabeledFrameMap& id_to_labeled_frame) {
   if (!id_to_labeled_frame.contains(audio_element_id)) {
     return;
   }
   for (const auto& [label, samples] :
        id_to_labeled_frame.at(audio_element_id).label_to_samples) {
     LOG_FIRST_N(INFO, 1) << "  Channel " << label << ":\t" << log_prefix
                          << " frame size= " << samples.size() << ".";
   }
 }

 }  // namespace

 absl::Status DemixingModule::FindSamplesOrDemixedSamples(
     ChannelLabel::Label label, const LabelSamplesMap& label_to_samples,
     const std::vector<InternalSampleType>** samples) {
   if (label_to_samples.find(label) != label_to_samples.end()) {
     *samples = &label_to_samples.at(label);
     return absl::OkStatus();
   }

   auto demixed_label = ChannelLabel::GetDemixedLabel(label);
   if (!demixed_label.ok()) {
     return demixed_label.status();
   }
   if (label_to_samples.find(*demixed_label) != label_to_samples.end()) {
     *samples = &label_to_samples.at(*demixed_label);
     return absl::OkStatus();
   } else {
     *samples = nullptr;
     return absl::UnknownError(
         absl::StrCat("Channel ", label, " or ", *demixed_label, " not found"));
   }
 }

 absl::StatusOr<DemixingModule>
 DemixingModule::CreateForDownMixingAndReconstruction(
     const absl::flat_hash_map<
         DecodedUleb128, DownmixingAndReconstructionConfig>&& id_to_config_map) {
   absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
       audio_element_id_to_demixing_metadata;

   for (const auto& [audio_element_id, config] : id_to_config_map) {
     RETURN_IF_NOT_OK(FillRequiredDemixingMetadata(
         config.user_labels, config.substream_id_to_labels,
         config.label_to_output_gain,
         audio_element_id_to_demixing_metadata[audio_element_id]));
   }

   return DemixingModule(DemixingMode::kDownMixingAndReconstruction,
                         std::move(audio_element_id_to_demixing_metadata));
 }

 absl::StatusOr<DemixingModule> DemixingModule::CreateForReconstruction(
     const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
         audio_elements) {
   absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
       audio_element_id_to_demixing_metadata;
   for (const auto& [audio_element_id, audio_element_with_data] :
        audio_elements) {
     const auto labels_to_reconstruct =
         LookupLabelsToReconstruct(audio_element_with_data.obu);
     if (!labels_to_reconstruct.ok()) {
       return labels_to_reconstruct.status();
     }

     auto [iter, inserted] = audio_element_id_to_demixing_metadata.insert(
         {audio_element_id, DemixingMetadataForAudioElementId()});
     CHECK(inserted) << "The target map was initially empty, iterating over "
                        "`audio_elements` cannot produce a duplicate key.";
     RETURN_IF_NOT_OK(FillRequiredDemixingMetadata(
         *labels_to_reconstruct, audio_element_with_data.substream_id_to_labels,
         audio_element_with_data.label_to_output_gain, iter->second));
     iter->second.down_mixers.clear();
   }

   return DemixingModule(DemixingMode::kReconstruction,
                         std::move(audio_element_id_to_demixing_metadata));
 }

 absl::Status DemixingModule::DownMixSamplesToSubstreams(
     DecodedUleb128 audio_element_id, const DownMixingParams& down_mixing_params,
     LabelSamplesMap& input_label_to_samples,
     absl::flat_hash_map<uint32_t, SubstreamData>&
         substream_id_to_substream_data) const {
   const DemixingMetadataForAudioElementId* demixing_metadata = nullptr;
   RETURN_IF_NOT_OK(GetDemixerMetadata(audio_element_id,
                                       audio_element_id_to_demixing_metadata_,
                                       demixing_metadata));

   // First perform all the down mixing.
   for (const auto& down_mixer : demixing_metadata->down_mixers) {
     RETURN_IF_NOT_OK(down_mixer(down_mixing_params, input_label_to_samples));
   }

   const size_t num_time_ticks = input_label_to_samples.begin()->second.size();

   for (const auto& [substream_id, output_channel_labels] :
        demixing_metadata->substream_id_to_labels) {
     std::vector<std::vector<int32_t>> substream_samples(
         num_time_ticks,
         // One or two channels.
         std::vector<int32_t>(output_channel_labels.size(), 0));
     // Output gains to be applied to the (one or two) channels.
     std::vector<double> output_gains_linear(output_channel_labels.size());
     int channel_index = 0;
     for (const auto& output_channel_label : output_channel_labels) {
       auto iter = input_label_to_samples.find(output_channel_label);
       if (iter == input_label_to_samples.end()) {
         return absl::UnknownError(absl::StrCat(
             "Samples do not exist for channel: ", output_channel_label));
       }
       for (int t = 0; t < num_time_ticks; t++) {
         RETURN_IF_NOT_OK(NormalizedFloatingPointToInt32(
             iter->second[t], substream_samples[t][channel_index]));
       }

       // Compute and store the linear output gains.
       auto gain_iter =
           demixing_metadata->label_to_output_gain.find(output_channel_label);
       output_gains_linear[channel_index] = 1.0;
       if (gain_iter != demixing_metadata->label_to_output_gain.end()) {
         output_gains_linear[channel_index] =
             std::pow(10.0, gain_iter->second / 20.0);
       }

       channel_index++;
     }

     // Find the `SubstreamData` with this `substream_id`.
     auto substream_data_iter =
         substream_id_to_substream_data.find(substream_id);
     if (substream_data_iter == substream_id_to_substream_data.end()) {
       return absl::UnknownError(absl::StrCat(
           "Failed to find substream data for substream ID= ", substream_id));
     }
     auto& substream_data = substream_data_iter->second;

     // Add all down mixed samples to both queues.

     for (const auto& channel_samples : substream_samples) {
       substream_data.samples_obu.push_back(channel_samples);

       // Apply output gains to the samples going to the encoder.
       std::vector<int32_t> attenuated_channel_samples(channel_samples.size());
       for (int i = 0; i < channel_samples.size(); ++i) {
         // Intermediate computation is a `double`. But both `channel_samples`
         // and `attenuated_channel_samples` are `int32_t`.
         const double attenuated_sample =
             static_cast<double>(channel_samples[i]) / output_gains_linear[i];
         RETURN_IF_NOT_OK(ClipDoubleToInt32(attenuated_sample,
                                            attenuated_channel_samples[i]));
       }
       substream_data.samples_encode.push_back(attenuated_channel_samples);
     }
   }

   return absl::OkStatus();
 }

 // TODO(b/288240600): Down-mix audio samples in a standalone function too.
 absl::StatusOr<IdLabeledFrameMap> DemixingModule::DemixOriginalAudioSamples(
     const std::list<AudioFrameWithData>& audio_frames) const {
   if (demixing_mode_ == DemixingMode::kReconstruction) {
     return absl::FailedPreconditionError(
         "Demixing original audio samples is not available in reconstruction "
         "mode.");
   }
   IdLabeledFrameMap id_to_labeled_frame;
   for (const auto& [audio_element_id, demixing_metadata] :
        audio_element_id_to_demixing_metadata_) {
     // Process the original audio frames.
     LabeledFrame labeled_frame;
     RETURN_IF_NOT_OK(StoreSamplesForAudioElementId(
         audio_frames, demixing_metadata.substream_id_to_labels, labeled_frame));
     if (!labeled_frame.label_to_samples.empty()) {
       RETURN_IF_NOT_OK(
           ApplyDemixers(demixing_metadata.demixers, labeled_frame));
       id_to_labeled_frame[audio_element_id] = std::move(labeled_frame);
     }

     LogForAudioElementId("Original", audio_element_id, id_to_labeled_frame);
   }

   return id_to_labeled_frame;
 }

 absl::StatusOr<IdLabeledFrameMap> DemixingModule::DemixDecodedAudioSamples(
     const std::list<DecodedAudioFrame>& decoded_audio_frames) const {
   IdLabeledFrameMap id_to_labeled_decoded_frame;
   for (const auto& [audio_element_id, demixing_metadata] :
        audio_element_id_to_demixing_metadata_) {
     // Process the decoded audio frames.
     LabeledFrame labeled_decoded_frame;
     RETURN_IF_NOT_OK(StoreSamplesForAudioElementId(
         decoded_audio_frames, demixing_metadata.substream_id_to_labels,
         labeled_decoded_frame));
     if (!labeled_decoded_frame.label_to_samples.empty()) {
       RETURN_IF_NOT_OK(
           ApplyDemixers(demixing_metadata.demixers, labeled_decoded_frame));
       id_to_labeled_decoded_frame[audio_element_id] =
           std::move(labeled_decoded_frame);
     }

     LogForAudioElementId("Decoded", audio_element_id,
                          id_to_labeled_decoded_frame);
   }

   return id_to_labeled_decoded_frame;
 }

 absl::Status DemixingModule::GetDownMixers(
     DecodedUleb128 audio_element_id,
     const std::list<Demixer>*& down_mixers) const {
   const DemixingMetadataForAudioElementId* demixing_metadata = nullptr;
   RETURN_IF_NOT_OK(GetDemixerMetadata(audio_element_id,
                                       audio_element_id_to_demixing_metadata_,
                                       demixing_metadata));
   down_mixers = &demixing_metadata->down_mixers;
   return absl::OkStatus();
 }

 absl::Status DemixingModule::GetDemixers(
     DecodedUleb128 audio_element_id,
     const std::list<Demixer>*& demixers) const {
   const DemixingMetadataForAudioElementId* demixing_metadata = nullptr;
   RETURN_IF_NOT_OK(GetDemixerMetadata(audio_element_id,
                                       audio_element_id_to_demixing_metadata_,
                                       demixing_metadata));
   demixers = &demixing_metadata->demixers;
   return absl::OkStatus();
 }

 }  // namespace iamf_tools