blob: aa9b1f55a708c05b6b202cff61eec41b0db16f02 [file] [log] [blame]
/*
* Copyright (c) 2023, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* www.aomedia.org/license/patent.
*/
#ifndef CLI_DEMIXING_MODULE_H_
#define CLI_DEMIXING_MODULE_H_
#include <cstdint>
#include <deque>
#include <list>
#include <utility>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/container/node_hash_map.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/audio_frame_decoder.h"
#include "iamf/cli/audio_frame_with_data.h"
#include "iamf/cli/channel_label.h"
#include "iamf/obu/audio_element.h"
#include "iamf/obu/demixing_info_parameter_data.h"
#include "iamf/obu/recon_gain_info_parameter_data.h"
#include "iamf/obu/types.h"
namespace iamf_tools {
struct SubstreamData {
uint32_t substream_id;
// Samples arranged in a FIFO queue with a vector of channels. There can only
// be one or two channels. Includes "virtual" samples that are output from the
// encoder, but are not passed to the encoder.
std::deque<std::vector<int32_t>> samples_obu;
// Samples to pass to encoder.
std::deque<std::vector<int32_t>> samples_encode;
// One or two elements; corresponding to the output gain to be applied to
// each channel.
std::vector<double> output_gains_linear;
uint32_t num_samples_to_trim_at_end;
uint32_t num_samples_to_trim_at_start;
};
// Mapping from channel label to a frame of samples.
typedef absl::node_hash_map<ChannelLabel::Label,
std::vector<InternalSampleType>>
LabelSamplesMap;
struct LabeledFrame {
int32_t end_timestamp;
uint32_t samples_to_trim_at_end;
uint32_t samples_to_trim_at_start;
LabelSamplesMap label_to_samples;
DownMixingParams demixing_params;
ReconGainInfoParameterData recon_gain_info_parameter_data;
// Vector of length `num_layers`. Only populated for scalable channel audio.
std::vector<ChannelAudioLayerConfig::LoudspeakerLayout>
loudspeaker_layout_per_layer;
};
// Mapping from audio element ids to `LabeledFrame`s.
typedef absl::flat_hash_map<DecodedUleb128, LabeledFrame> IdLabeledFrameMap;
typedef absl::Status (*Demixer)(const DownMixingParams&, LabelSamplesMap&);
/*!\brief Manages data and processing to down-mix and demix audio elements.
*
* This class relates to the "Element Reconstructor" as used in the IAMF
* specifications. "An Element Reconstructor re-assembles the Audio Elements by
* combining the Channel Group(s) guided by Descriptors and Parameter
* Substream(s)." This class does not apply the reconstruction gain, so
* additional post processing is needed to finish audio element reconstruction.
*
* Down-mixers are used to down-mix the input channels to the substream
* channels. Typically there are down-mixers for scalable channel audio
* elements with more than one layer. Down-mixers are created according to
* https://aomediacodec.github.io/iamf/#iamfgeneration-scalablechannelaudio-downmixmechanism
*
* Demixers are used to recreate the original audio from the substreams.
* Demixers are created according to
* https://aomediacodec.github.io/iamf/#processing-scalablechannelaudio.
*/
class DemixingModule {
public:
struct DemixingMetadataForAudioElementId {
std::list<Demixer> demixers;
std::list<Demixer> down_mixers;
SubstreamIdLabelsMap substream_id_to_labels;
LabelGainMap label_to_output_gain;
};
struct DownmixingAndReconstructionConfig {
absl::flat_hash_set<ChannelLabel::Label> user_labels;
SubstreamIdLabelsMap substream_id_to_labels;
LabelGainMap label_to_output_gain;
};
/*!\brief Creates a `DemixingModule` for down-mixing and reconstruction.
*
* This is most useful from the context of an encoder. For example, to encode
* a scalable channel audio element with two layers, the input channels are
* down-mixed according to various rules in the spec.
*
* Initializes metadata for each input audio element ID. The metadata includes
* information about the channels and the specific down-mixers and demixers
* needed for that audio element.
*
* \param id_to_config_map Map of Audio Element IDs to
* `DownmixingAndReconstructionConfig`, which contains the
* user-provided labels and the `substream_id_to_labels` and
* `label_to_output_gain` from the corresponding
* `AudioElementWithData`.
* \return `absl::OkStatus()` on success. A specific status on failure.
*/
static absl::StatusOr<DemixingModule> CreateForDownMixingAndReconstruction(
const absl::flat_hash_map<DecodedUleb128,
DownmixingAndReconstructionConfig>&&
id_to_config_map);
/*!\brief Initializes for reconstruction (demixing) the input audio elements.
*
* This is most useful from the context of a decoder. For example, to decode
* a scalable channel audio element with two layers, the substreams are
* demixed according to various rules in the spec.
*
* Initializes metadata for each input audio element ID. The metadata includes
* information about the channels and the specific down-mixers and demixers
* needed for that audio element.
*
* \param audio_elements Audio elements.
* \return `absl::OkStatus()` on success. A specific status on failure.
*/
static absl::StatusOr<DemixingModule> CreateForReconstruction(
const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
audio_elements);
/*!\brief Searches the input map for the target samples or demixed samples.
*
* \param label Label of the channel (or its demixed version) to search for.
* \param label_to_samples Map of label to samples to search.
* \param samples Output argument for the samples if found.
* \return `absl::OkStatus()` on success. `absl::UnknownError()` if the search
* failed.
*/
static absl::Status FindSamplesOrDemixedSamples(
ChannelLabel::Label label, const LabelSamplesMap& label_to_samples,
const std::vector<InternalSampleType>** samples);
/*!\brief Down-mixes samples of input channels to substreams.
*
* \param audio_element_id Audio Element ID of these substreams.
* \param down_mixing_params Down mixing parameters to use. Ignored when
* there is no associated down-mixer.
* \param input_label_to_samples Samples in input channels organized by the
* channel labels.
* \param substream_id_to_substream_data Mapping from substream IDs to
* substream data.
* \return `absl::OkStatus()` on success. A specific status on failure.
*/
absl::Status DownMixSamplesToSubstreams(
DecodedUleb128 audio_element_id,
const DownMixingParams& down_mixing_params,
LabelSamplesMap& input_label_to_samples,
absl::flat_hash_map<uint32_t, SubstreamData>&
substream_id_to_substream_data) const;
/*!\brief Demix original audio samples.
*
* This is most useful when the original (before lossy codec) samples are
* known, such as when encoding original audio.
*
* \param audio_frames Audio Frames.
* \return Output data structure for samples, or a specific status on failure.
*/
absl::StatusOr<IdLabeledFrameMap> DemixOriginalAudioSamples(
const std::list<AudioFrameWithData>& audio_frames) const;
/*!\brief Demix decoded audio samples.
*
* This is most useful when the decoded (after lossy codec) samples are
* known, such as when decoding an IA Sequence, or when analyzing the effect
* of a lossy codec to determine appropriate recon gain values.
*
* \param decoded_audio_frames Decoded Audio Frames.
* \return Output data structure for samples, or a specific status on failure.
*/
absl::StatusOr<IdLabeledFrameMap> DemixDecodedAudioSamples(
const std::list<DecodedAudioFrame>& decoded_audio_frame) const;
/*!\brief Gets the down-mixers associated with an Audio Element ID.
*
* \param audio_element_id Audio Element ID
* \param down_mixers Output pointer to the list of down-mixers.
* \return `absl::OkStatus()` on success. A specific status on failure.
*/
absl::Status GetDownMixers(DecodedUleb128 audio_element_id,
const std::list<Demixer>*& down_mixers) const;
/*!\brief Gets the demixers associated with an Audio Element ID.
*
* \param audio_element_id Audio Element ID
* \param demixers Output pointer to the list of demixers.
* \return `absl::OkStatus()` on success. A specific status on failure.
*/
absl::Status GetDemixers(DecodedUleb128 audio_element_id,
const std::list<Demixer>*& demixers) const;
private:
enum class DemixingMode { kDownMixingAndReconstruction, kReconstruction };
/*!\brief Private constructor.
*
* For use with `CreateForDownMixingAndReconstruction` and
* `CreateForReconstruction`.
*
* \param demixing_mode Mode of the class.
* \param audio_element_id_to_demixing_metadata Mapping from audio element ID
* to demixing metadata.
*/
DemixingModule(
DemixingMode demixing_mode,
absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>&&
audio_element_id_to_demixing_metadata)
: demixing_mode_(demixing_mode),
audio_element_id_to_demixing_metadata_(
std::move(audio_element_id_to_demixing_metadata)) {}
DemixingMode demixing_mode_;
const absl::flat_hash_map<DecodedUleb128, DemixingMetadataForAudioElementId>
audio_element_id_to_demixing_metadata_;
};
} // namespace iamf_tools
#endif // CLI_DEMIXING_MODULE_H_