iamf/cli/demixing_module.h - platform/external/iamf_tools - Git at Google

 /*
  * Copyright (c) 2023, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 3-Clause Clear License
  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
  * License was not distributed with this source code in the LICENSE file, you
  * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
  * Alliance for Open Media Patent License 1.0 was not distributed with this
  * source code in the PATENTS file, you can obtain it at
  * www.aomedia.org/license/patent.
  */

 #ifndef CLI_DEMIXING_MODULE_H_
 #define CLI_DEMIXING_MODULE_H_

 #include <cstdint>
 #include <deque>
 #include <list>
 #include <utility>
 #include <vector>

 #include "absl/container/flat_hash_map.h"
 #include "absl/container/flat_hash_set.h"
 #include "absl/container/node_hash_map.h"
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
 #include "iamf/cli/audio_element_with_data.h"
 #include "iamf/cli/audio_frame_decoder.h"
 #include "iamf/cli/audio_frame_with_data.h"
 #include "iamf/cli/channel_label.h"
 #include "iamf/obu/audio_element.h"
 #include "iamf/obu/demixing_info_parameter_data.h"
 #include "iamf/obu/recon_gain_info_parameter_data.h"
 #include "iamf/obu/types.h"

 namespace iamf_tools {

 struct SubstreamData {
   uint32_t substream_id;

   // Samples arranged in a FIFO queue with a vector of channels. There can only
   // be one or two channels. Includes "virtual" samples that are output from the
   // encoder, but are not passed to the encoder.
   std::deque<std::vector<int32_t>> samples_obu;
   // Samples to pass to encoder.
   std::deque<std::vector<int32_t>> samples_encode;
   // One or two elements; corresponding to the output gain to be applied to
   // each channel.
   std::vector<double> output_gains_linear;
   uint32_t num_samples_to_trim_at_end;
   uint32_t num_samples_to_trim_at_start;
 };

 // Mapping from channel label to a frame of samples.
 typedef absl::node_hash_map<ChannelLabel::Label,
                             std::vector<InternalSampleType>>
     LabelSamplesMap;

 struct LabeledFrame {
   int32_t end_timestamp;
   uint32_t samples_to_trim_at_end;
   uint32_t samples_to_trim_at_start;
   LabelSamplesMap label_to_samples;
   DownMixingParams demixing_params;
   ReconGainInfoParameterData recon_gain_info_parameter_data;
   // Vector of length `num_layers`. Only populated for scalable channel audio.
   std::vector<ChannelAudioLayerConfig::LoudspeakerLayout>
       loudspeaker_layout_per_layer;
 };

 // Mapping from audio element ids to `LabeledFrame`s.
 typedef absl::flat_hash_map<DecodedUleb128, LabeledFrame> IdLabeledFrameMap;

 typedef absl::Status (*Demixer)(const DownMixingParams&, LabelSamplesMap&);

 /*!\brief Manages data and processing to down-mix and demix audio elements.
  *
  * This class relates to the "Element Reconstructor" as used in the IAMF
  * specifications. "An Element Reconstructor re-assembles the Audio Elements by
  * combining the Channel Group(s) guided by Descriptors and Parameter
  * Substream(s)." This class does not apply the reconstruction gain, so
  * additional post processing is needed to finish audio element reconstruction.
  *
  * Down-mixers are used to down-mix the input channels to the substream
  * channels. Typically there are down-mixers for scalable channel audio
  * elements with more than one layer. Down-mixers are created according to
  * https://aomediacodec.github.io/iamf/#iamfgeneration-scalablechannelaudio-downmixmechanism
  *
  * Demixers are used to recreate the original audio from the substreams.
  * Demixers are created according to
  * https://aomediacodec.github.io/iamf/#processing-scalablechannelaudio.
  */
 class DemixingModule {
  public:
   struct DemixingMetadataForAudioElementId {
     std::list<Demixer> demixers;
     std::list<Demixer> down_mixers;
     SubstreamIdLabelsMap substream_id_to_labels;
     LabelGainMap label_to_output_gain;
   };

   struct DownmixingAndReconstructionConfig {
     absl::flat_hash_set<ChannelLabel::Label> user_labels;
     SubstreamIdLabelsMap substream_id_to_labels;
     LabelGainMap label_to_output_gain;
   };

   /*!\brief Creates a `DemixingModule` for down-mixing and reconstruction.
    *
    * This is most useful from the context of an encoder. For example, to encode
    * a scalable channel audio element with two layers, the input channels are
    * down-mixed according to various rules in the spec.
    *
    * Initializes metadata for each input audio element ID. The metadata includes
    * information about the channels and the specific down-mixers and demixers
    * needed for that audio element.
    *
    * \param id_to_config_map Map of Audio Element IDs to
    *        `DownmixingAndReconstructionConfig`, which contains the
    *        user-provided labels and the `substream_id_to_labels` and
    *        `label_to_output_gain` from the corresponding
    *        `AudioElementWithData`.
    * \return `absl::OkStatus()` on success. A specific status on failure.
    */
   static absl::StatusOr<DemixingModule> CreateForDownMixingAndReconstruction(
       const absl::flat_hash_map<DecodedUleb128,
                                 DownmixingAndReconstructionConfig>&&
           id_to_config_map);

   /*!\brief Initializes for reconstruction (demixing) the input audio elements.
    *
    * This is most useful from the context of a decoder. For example, to decode
    * a scalable channel audio element with two layers, the substreams are
    * demixed according to various rules in the spec.
    *
    * Initializes metadata for each input audio element ID. The metadata includes
    * information about the channels and the specific down-mixers and demixers
    * needed for that audio element.
    *
    * \param audio_elements Audio elements.
    * \return `absl::OkStatus()` on success. A specific status on failure.
    */
   static absl::StatusOr<DemixingModule> CreateForReconstruction(
       const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
           audio_elements);

   /*!\brief Searches the input map for the target samples or demixed samples.
    *
    * \param label Label of the channel (or its demixed version) to search for.
    * \param label_to_samples Map of label to samples to search.
    * \param samples Output argument for the samples if found.
    * \return `absl::OkStatus()` on success. `absl::UnknownError()` if the search
    *         failed.
    */
   static absl::Status FindSamplesOrDemixedSamples(
       ChannelLabel::Label label, const LabelSamplesMap& label_to_samples,
       const std::vector<InternalSampleType>** samples);

   /*!\brief Down-mixes samples of input channels to substreams.
    *
    * \param audio_element_id Audio Element ID of these substreams.
    * \param down_mixing_params Down mixing parameters to use. Ignored when
    *        there is no associated down-mixer.
    * \param input_label_to_samples Samples in input channels organized by the
    *        channel labels.
    * \param substream_id_to_substream_data Mapping from substream IDs to
    *        substream data.
    * \return `absl::OkStatus()` on success. A specific status on failure.
    */
   absl::Status DownMixSamplesToSubstreams(
       DecodedUleb128 audio_element_id,
       const DownMixingParams& down_mixing_params,
       LabelSamplesMap& input_label_to_samples,
       absl::flat_hash_map<uint32_t, SubstreamData>&
           substream_id_to_substream_data) const;

   /*!\brief Demix original audio samples.
    *
    * This is most useful when the original (before lossy codec) samples are
    * known, such as when encoding original audio.
    *
    * \param audio_frames Audio Frames.
    * \return Output data structure for samples, or a specific status on failure.
    */
   absl::StatusOr<IdLabeledFrameMap> DemixOriginalAudioSamples(
       const std::list<AudioFrameWithData>& audio_frames) const;

   /*!\brief Demix decoded audio samples.
    *
    * This is most useful when the decoded (after lossy codec) samples are
    * known, such as when decoding an IA Sequence, or when analyzing the effect
    * of a lossy codec to determine appropriate recon gain values.
    *
    * \param decoded_audio_frames Decoded Audio Frames.
    * \return Output data structure for samples, or a specific status on failure.
    */
   absl::StatusOr<IdLabeledFrameMap> DemixDecodedAudioSamples(
       const std::list<DecodedAudioFrame>& decoded_audio_frame) const;

   /*!\brief Gets the down-mixers associated with an Audio Element ID.
    *
    * \param audio_element_id Audio Element ID
    * \param down_mixers Output pointer to the list of down-mixers.
    * \return `absl::OkStatus()` on success. A specific status on failure.
    */
   absl::Status GetDownMixers(DecodedUleb128 audio_element_id,
                              const std::list<Demixer>*& down_mixers) const;

   /*!\brief Gets the demixers associated with an Audio Element ID.
    *
    * \param audio_element_id Audio Element ID
    * \param demixers Output pointer to the list of demixers.
    * \return `absl::OkStatus()` on success. A specific status on failure.
    */
   absl::Status GetDemixers(DecodedUleb128 audio_element_id,
                            const std::list<Demixer>*& demixers) const;

  private:
   enum class DemixingMode { kDownMixingAndReconstruction, kReconstruction };

   /*!\brief Private constructor.
    *
    * For use with `CreateForDownMixingAndReconstruction` and
    * `CreateForReconstruction`.
    *
    * \param demixing_mode Mode of the class.
    * \param audio_element_id_to_demixing_metadata Mapping from audio element ID
    *        to demixing metadata.
    */
   DemixingModule(
       DemixingMode demixing_mode,
       absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>&&
           audio_element_id_to_demixing_metadata)
       : demixing_mode_(demixing_mode),
         audio_element_id_to_demixing_metadata_(
             std::move(audio_element_id_to_demixing_metadata)) {}

   DemixingMode demixing_mode_;

   const absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
       audio_element_id_to_demixing_metadata_;
 };

 }  // namespace iamf_tools

 #endif  // CLI_DEMIXING_MODULE_H_
	/*
	* Copyright (c) 2023, Alliance for Open Media. All rights reserved
	*
	* This source code is subject to the terms of the BSD 3-Clause Clear License
	* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
	* License was not distributed with this source code in the LICENSE file, you
	* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
	* Alliance for Open Media Patent License 1.0 was not distributed with this
	* source code in the PATENTS file, you can obtain it at
	* www.aomedia.org/license/patent.
	*/

	#ifndef CLI_DEMIXING_MODULE_H_
	#define CLI_DEMIXING_MODULE_H_

	#include <cstdint>
	#include <deque>
	#include <list>
	#include <utility>
	#include <vector>

	#include "absl/container/flat_hash_map.h"
	#include "absl/container/flat_hash_set.h"
	#include "absl/container/node_hash_map.h"
	#include "absl/status/status.h"
	#include "absl/status/statusor.h"
	#include "iamf/cli/audio_element_with_data.h"
	#include "iamf/cli/audio_frame_decoder.h"
	#include "iamf/cli/audio_frame_with_data.h"
	#include "iamf/cli/channel_label.h"
	#include "iamf/obu/audio_element.h"
	#include "iamf/obu/demixing_info_parameter_data.h"
	#include "iamf/obu/recon_gain_info_parameter_data.h"
	#include "iamf/obu/types.h"

	namespace iamf_tools {

	struct SubstreamData {
	uint32_t substream_id;

	// Samples arranged in a FIFO queue with a vector of channels. There can only
	// be one or two channels. Includes "virtual" samples that are output from the
	// encoder, but are not passed to the encoder.
	std::deque<std::vector<int32_t>> samples_obu;
	// Samples to pass to encoder.
	std::deque<std::vector<int32_t>> samples_encode;
	// One or two elements; corresponding to the output gain to be applied to
	// each channel.
	std::vector<double> output_gains_linear;
	uint32_t num_samples_to_trim_at_end;
	uint32_t num_samples_to_trim_at_start;
	};

	// Mapping from channel label to a frame of samples.
	typedef absl::node_hash_map<ChannelLabel::Label,
	std::vector<InternalSampleType>>
	LabelSamplesMap;

	struct LabeledFrame {
	int32_t end_timestamp;
	uint32_t samples_to_trim_at_end;
	uint32_t samples_to_trim_at_start;
	LabelSamplesMap label_to_samples;
	DownMixingParams demixing_params;
	ReconGainInfoParameterData recon_gain_info_parameter_data;
	// Vector of length `num_layers`. Only populated for scalable channel audio.
	std::vector<ChannelAudioLayerConfig::LoudspeakerLayout>
	loudspeaker_layout_per_layer;
	};

	// Mapping from audio element ids to `LabeledFrame`s.
	typedef absl::flat_hash_map<DecodedUleb128, LabeledFrame> IdLabeledFrameMap;

	typedef absl::Status (*Demixer)(const DownMixingParams&, LabelSamplesMap&);

	/*!\brief Manages data and processing to down-mix and demix audio elements.
	*
	* This class relates to the "Element Reconstructor" as used in the IAMF
	* specifications. "An Element Reconstructor re-assembles the Audio Elements by
	* combining the Channel Group(s) guided by Descriptors and Parameter
	* Substream(s)." This class does not apply the reconstruction gain, so
	* additional post processing is needed to finish audio element reconstruction.
	*
	* Down-mixers are used to down-mix the input channels to the substream
	* channels. Typically there are down-mixers for scalable channel audio
	* elements with more than one layer. Down-mixers are created according to
	* https://aomediacodec.github.io/iamf/#iamfgeneration-scalablechannelaudio-downmixmechanism
	*
	* Demixers are used to recreate the original audio from the substreams.
	* Demixers are created according to
	* https://aomediacodec.github.io/iamf/#processing-scalablechannelaudio.
	*/
	class DemixingModule {
	public:
	struct DemixingMetadataForAudioElementId {
	std::list<Demixer> demixers;
	std::list<Demixer> down_mixers;
	SubstreamIdLabelsMap substream_id_to_labels;
	LabelGainMap label_to_output_gain;
	};

	struct DownmixingAndReconstructionConfig {
	absl::flat_hash_set<ChannelLabel::Label> user_labels;
	SubstreamIdLabelsMap substream_id_to_labels;
	LabelGainMap label_to_output_gain;
	};

	/*!\brief Creates a `DemixingModule` for down-mixing and reconstruction.
	*
	* This is most useful from the context of an encoder. For example, to encode
	* a scalable channel audio element with two layers, the input channels are
	* down-mixed according to various rules in the spec.
	*
	* Initializes metadata for each input audio element ID. The metadata includes
	* information about the channels and the specific down-mixers and demixers
	* needed for that audio element.
	*
	* \param id_to_config_map Map of Audio Element IDs to
	* `DownmixingAndReconstructionConfig`, which contains the
	* user-provided labels and the `substream_id_to_labels` and
	* `label_to_output_gain` from the corresponding
	* `AudioElementWithData`.
	* \return `absl::OkStatus()` on success. A specific status on failure.
	*/
	static absl::StatusOr<DemixingModule> CreateForDownMixingAndReconstruction(
	const absl::flat_hash_map<DecodedUleb128,
	DownmixingAndReconstructionConfig>&&
	id_to_config_map);

	/*!\brief Initializes for reconstruction (demixing) the input audio elements.
	*
	* This is most useful from the context of a decoder. For example, to decode
	* a scalable channel audio element with two layers, the substreams are
	* demixed according to various rules in the spec.
	*
	* Initializes metadata for each input audio element ID. The metadata includes
	* information about the channels and the specific down-mixers and demixers
	* needed for that audio element.
	*
	* \param audio_elements Audio elements.
	* \return `absl::OkStatus()` on success. A specific status on failure.
	*/
	static absl::StatusOr<DemixingModule> CreateForReconstruction(
	const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
	audio_elements);

	/*!\brief Searches the input map for the target samples or demixed samples.
	*
	* \param label Label of the channel (or its demixed version) to search for.
	* \param label_to_samples Map of label to samples to search.
	* \param samples Output argument for the samples if found.
	* \return `absl::OkStatus()` on success. `absl::UnknownError()` if the search
	* failed.
	*/
	static absl::Status FindSamplesOrDemixedSamples(
	ChannelLabel::Label label, const LabelSamplesMap& label_to_samples,
	const std::vector<InternalSampleType>** samples);

	/*!\brief Down-mixes samples of input channels to substreams.
	*
	* \param audio_element_id Audio Element ID of these substreams.
	* \param down_mixing_params Down mixing parameters to use. Ignored when
	* there is no associated down-mixer.
	* \param input_label_to_samples Samples in input channels organized by the
	* channel labels.
	* \param substream_id_to_substream_data Mapping from substream IDs to
	* substream data.
	* \return `absl::OkStatus()` on success. A specific status on failure.
	*/
	absl::Status DownMixSamplesToSubstreams(
	DecodedUleb128 audio_element_id,
	const DownMixingParams& down_mixing_params,
	LabelSamplesMap& input_label_to_samples,
	absl::flat_hash_map<uint32_t, SubstreamData>&
	substream_id_to_substream_data) const;

	/*!\brief Demix original audio samples.
	*
	* This is most useful when the original (before lossy codec) samples are
	* known, such as when encoding original audio.
	*
	* \param audio_frames Audio Frames.
	* \return Output data structure for samples, or a specific status on failure.
	*/
	absl::StatusOr<IdLabeledFrameMap> DemixOriginalAudioSamples(
	const std::list<AudioFrameWithData>& audio_frames) const;

	/*!\brief Demix decoded audio samples.
	*
	* This is most useful when the decoded (after lossy codec) samples are
	* known, such as when decoding an IA Sequence, or when analyzing the effect
	* of a lossy codec to determine appropriate recon gain values.
	*
	* \param decoded_audio_frames Decoded Audio Frames.
	* \return Output data structure for samples, or a specific status on failure.
	*/
	absl::StatusOr<IdLabeledFrameMap> DemixDecodedAudioSamples(
	const std::list<DecodedAudioFrame>& decoded_audio_frame) const;

	/*!\brief Gets the down-mixers associated with an Audio Element ID.
	*
	* \param audio_element_id Audio Element ID
	* \param down_mixers Output pointer to the list of down-mixers.
	* \return `absl::OkStatus()` on success. A specific status on failure.
	*/
	absl::Status GetDownMixers(DecodedUleb128 audio_element_id,
	const std::list<Demixer>*& down_mixers) const;

	/*!\brief Gets the demixers associated with an Audio Element ID.
	*
	* \param audio_element_id Audio Element ID
	* \param demixers Output pointer to the list of demixers.
	* \return `absl::OkStatus()` on success. A specific status on failure.
	*/
	absl::Status GetDemixers(DecodedUleb128 audio_element_id,
	const std::list<Demixer>*& demixers) const;

	private:
	enum class DemixingMode { kDownMixingAndReconstruction, kReconstruction };

	/*!\brief Private constructor.
	*
	* For use with `CreateForDownMixingAndReconstruction` and
	* `CreateForReconstruction`.
	*
	* \param demixing_mode Mode of the class.
	* \param audio_element_id_to_demixing_metadata Mapping from audio element ID
	* to demixing metadata.
	*/
	DemixingModule(
	DemixingMode demixing_mode,
	absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>&&
	audio_element_id_to_demixing_metadata)
	: demixing_mode_(demixing_mode),
	audio_element_id_to_demixing_metadata_(
	std::move(audio_element_id_to_demixing_metadata)) {}

	DemixingMode demixing_mode_;

	const absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
	audio_element_id_to_demixing_metadata_;
	};

	} // namespace iamf_tools

	#endif // CLI_DEMIXING_MODULE_H_