blob: cc607e33be9d69f9d6f978e914aa709947367966 [file] [log] [blame]
/*
* Copyright (c) 2024, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* www.aomedia.org/license/patent.
*/
#ifndef CLI_OBU_PROCESSOR_H_
#define CLI_OBU_PROCESSOR_H_
#include <cstdint>
#include <list>
#include <memory>
#include <optional>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/types/span.h"
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/audio_frame_decoder.h"
#include "iamf/cli/audio_frame_with_data.h"
#include "iamf/cli/demixing_module.h"
#include "iamf/cli/global_timing_module.h"
#include "iamf/cli/parameter_block_with_data.h"
#include "iamf/cli/parameters_manager.h"
#include "iamf/cli/rendering_mix_presentation_finalizer.h"
#include "iamf/common/read_bit_buffer.h"
#include "iamf/obu/codec_config.h"
#include "iamf/obu/ia_sequence_header.h"
#include "iamf/obu/mix_presentation.h"
#include "iamf/obu/param_definition_variant.h"
#include "iamf/obu/temporal_delimiter.h"
#include "iamf/obu/types.h"
namespace iamf_tools {
class ObuProcessor {
public:
/*!\brief Processes the Descriptor OBUs of an IA Sequence.
*
* If insufficient data to process all descriptor OBUs is provided, a failing
* status will be returned. `insufficient_data` will be set to true, the
* read_bit_buffer will not be consumed, and the output parameters will not be
* populated. A user should call this function again after providing more
* data within the read_bit_buffer.
*
* \param is_exhaustive_and_exact Whether the bitstream provided is meant to
* include all descriptor OBUs and no other data. This should only be
* set to true if the user knows the exact boundaries of their set of
* descriptor OBUs.
* \param read_bit_buffer Buffer containing a portion of an iamf bitstream
* containing a sequence of OBUs. The buffer will be consumed up to the
* end of the descriptor OBUs if processing is successful.
* \param output_sequence_header IA sequence header processed from the
* bitstream.
* \param output_codec_config_obus Map of Codec Config OBUs processed from the
* bitstream.
* \param output_audio_elements_with_data Map of Audio Elements and metadata
* processed from the bitstream.
* \param output_mix_presentation_obus List of Mix Presentation OBUs processed
* from the bitstream.
* \param insufficient_data Whether the bitstream provided is insufficient to
* process all descriptor OBUs.
* \return `absl::OkStatus()` if the process is successful. A specific status
* on failure.
*/
[[deprecated(
"Remove when all tests are ported. Use the non-static version instead.")]]
static absl::Status ProcessDescriptorObus(
bool is_exhaustive_and_exact, ReadBitBuffer& read_bit_buffer,
IASequenceHeaderObu& output_sequence_header,
absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
output_codec_config_obus,
absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
output_audio_elements_with_data,
std::list<MixPresentationObu>& output_mix_presentation_obus,
bool& insufficient_data);
// TODO(b/330732117): Remove this function and use the non-static version.
/*!\brief Processes one Temporal Unit OBU of an IA Sequence.
*
* This function should only be called after successfully calling
* ProcessDescriptorObus. Output audio frames and parameter blocks are
* ordered by timestamps first and then by IDs.
*
* \param audio_elements_with_data Map containing the audio elements that
* were present in the descriptor OBUs, keyed by audio element ID.
* \param codec_config_obus Map containing the codec configs that were
* present in the descriptor OBUs, keyed by codec config ID.
* \param substream_id_to_audio_element Mapping from substream IDs to the
* audio elements that they belong to.
* \param param_definition_variants Map containing the param definitions that
* were present in the descriptor OBUs, keyed by parameter ID.
* \param parameters_manager Manager of parameters.
* \param read_bit_buffer Buffer reader that reads the IAMF bitstream.
* \param global_timing_module Module to keep track of the timing of audio
* frames and parameters.
* \param output_audio_frame_with_data Output Audio Frame with the requisite
* data.
* \param output_parameter_block_with_data Output parameter Block with the
* requisite data.
* \param output_temporal_delimiter Output temporal deilimiter OBU.
* \param continue_processing Whether the processing should be continued.
* \return `absl::OkStatus()` if the process is successful. A specific status
* on failure.
*/
static absl::Status ProcessTemporalUnitObu(
const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
audio_elements_with_data,
const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>&
codec_config_obus,
const absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>&
substream_id_to_audio_element,
const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>&
param_definition_variants,
ParametersManager& parameters_manager, ReadBitBuffer& read_bit_buffer,
GlobalTimingModule& global_timing_module,
std::optional<AudioFrameWithData>& output_audio_frame_with_data,
std::optional<ParameterBlockWithData>& output_parameter_block_with_data,
std::optional<TemporalDelimiterObu>& output_temporal_delimiter,
bool& continue_processing);
/*!\brief Creates the OBU processor.
*
* Creation succeeds only if the descriptor OBUs are successfully processed.
*
* \param is_exhaustive_and_exact Whether the bitstream provided is meant to
* include all descriptor OBUs and no other data. This should only be
* set to true if the user knows the exact boundaries of their set of
* descriptor OBUs.
* \param read_bit_buffer Pointer to the read bit buffer that reads the IAMF
* bitstream.
* \param output_insufficient_data True iff the bitstream provided is
* insufficient to process all descriptor OBUs and there is no other
* error.
* \return std::unique_ptr<ObuProcessor> on success. `nullptr` on failure.
*/
static std::unique_ptr<ObuProcessor> Create(bool is_exhaustive_and_exact,
ReadBitBuffer* read_bit_buffer,
bool& output_insufficient_data);
/*!\brief Move constructor. */
ObuProcessor(ObuProcessor&& obu_processor) = delete;
/*!\brief Creates the OBU processor for rendering.
*
* Creation succeeds only if the descriptor OBUs are successfully processed
* and all rendering modules are successfully initialized.
*
* \param desired_layout Specifies the desired layout that will be used to
* render the audio, if available in the mix presentations. If not
* available, the first layout in the first mix presentation will be
* used.
* \param sample_processor_factory Factory to create post processors.
* \param is_exhaustive_and_exact Whether the bitstream provided is meant to
* include all descriptor OBUs and no other data. This should only be
* set to true if the user knows the exact boundaries of their set of
* descriptor OBUs.
* \param read_bit_buffer Pointer to the read bit buffer that reads the IAMF
* bitstream.
* \param output_layout The layout that will be used to render the audio. This
* is the same as `desired_layout` if it is available in the mix
* presentations, otherwise a default layout is used.
* \param output_insufficient_data True iff the bitstream provided is
* insufficient to process all descriptor OBUs and there is no other
* error.
* \return Pointer to an ObuProcessor on success. `nullptr` on failure.
*/
static std::unique_ptr<ObuProcessor> CreateForRendering(
const Layout& desired_layout,
const RenderingMixPresentationFinalizer::SampleProcessorFactory&
sample_processor_factory,
bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer,
Layout& output_layout, bool& output_insufficient_data);
/*!\brief Gets the sample rate of the output audio.
*
* \return Sample rate of the output audio, or a specific error code on
* failure.
*/
absl::StatusOr<uint32_t> GetOutputSampleRate() const;
/*!\brief Gets the frame size of the output audio.
*
* Useful to determine the maximum number of samples per
* `RenderTemporalUnitAndMeasureLoudness` call.
*
* \return Number of samples in per frame of the output audio, or a specific
* specific error code on failure.
*/
absl::StatusOr<uint32_t> GetOutputFrameSize() const;
// TODO(b/381072155): Consider removing this one in favor of
// `ProcessTemporalUnit()`, which outputs all OBUs
// belonging the whole temporal unit.
/*!\brief Processes one Temporal Unit OBU from the stored IA Sequence.
*
* `Initialize()` must be called first to ready the input bitstream.
*
* \param output_audio_frame_with_data Output Audio Frame with the requisite
* data.
* \param output_parameter_block_with_data Output Parameter Block with the
* requisite data.
* \param output_temporal_delimiter Output temporal deilimiter OBU.
* \param continue_processing Whether the processing should be continued.
* \return `absl::OkStatus()` if the process is successful. A specific status
* on failure.
*/
absl::Status ProcessTemporalUnitObu(
std::optional<AudioFrameWithData>& output_audio_frame_with_data,
std::optional<ParameterBlockWithData>& output_parameter_block_with_data,
std::optional<TemporalDelimiterObu>& output_temporal_delimiter,
bool& continue_processing);
struct OutputTemporalUnit {
std::list<AudioFrameWithData> output_audio_frames;
std::list<ParameterBlockWithData> output_parameter_blocks;
InternalTimestamp output_timestamp;
};
// TODO(b/379819959): Also handle Temporal Delimiter OBUs.
/*!\brief Processes all OBUs from a Temporal Unit from the stored IA Sequence.
*
* \param eos_is_end_of_sequence Whether reaching the end of the stream
* should be considered as the end of the sequence, and therefore the
* end of the temporal unit.
* \param output_temporal_unit Contains the data from the temporal unit that
* is processed.
* \param continue_processing Whether the processing should be continued.
* \return `absl::OkStatus()` if the process is successful. A specific status
* on failure.
*/
absl::Status ProcessTemporalUnit(
bool eos_is_end_of_sequence,
std::optional<OutputTemporalUnit>& output_temporal_unit,
bool& continue_processing);
/*!\brief Renders a temporal unit and measures loudness.
*
* `InitializeForRendering()` must be called before calling this.
*
* \param timestamp Timestamp of this temporal unit. Used to verify that
* the input OBUs actually belong to the same temporal unit.
* \param audio_frames_with_data Audio Frames with the requisite data.
* \param parameter_blocks_with_data Parameter Blocks with the requisite data.
* \param output_rendered_pcm_samples Output rendered PCM samples. These
* should be used immediately after this function is called; they will
* be invalidated after the next call to
* `RenderTemporalUnitAndMeasureLoudness()`, as well as after the
* `ObuProcessor` is destroyed.
* \return `absl::OkStatus()` if the process is successful. A specific status
* on failure.
*/
absl::Status RenderTemporalUnitAndMeasureLoudness(
InternalTimestamp timestamp,
const std::list<AudioFrameWithData>& audio_frames,
const std::list<ParameterBlockWithData>& parameter_blocks,
absl::Span<const std::vector<int32_t>>& output_rendered_pcm_samples);
IASequenceHeaderObu ia_sequence_header_;
absl::flat_hash_map<DecodedUleb128, CodecConfigObu> codec_config_obus_ = {};
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements_ =
{};
std::list<MixPresentationObu> mix_presentations_ = {};
private:
/*!\brief Private constructor used only by Create() and CreateForRendering().
*
* \param read_bit_buffer Pointer to the read bit buffer that reads the IAMF
* bitstream.
* \return ObuProcessor instance.
*/
explicit ObuProcessor(ReadBitBuffer* buffer) : read_bit_buffer_(buffer) {}
/*!\brief Performs internal initialization of the OBU processor.
*
* Only used by Create() and CreateForRendering().
*
* \param is_exhaustive_and_exact Whether the bitstream provided is meant to
* include all descriptor OBUs and no other data. This should only be
* set to true if the user knows the exact boundaries of their set of
* descriptor OBUs.
\param output_insufficient_data True iff the bitstream provided is
* insufficient to process all descriptor OBUs and there is no other
* error.
* \return `absl::OkStatus()` if initialization is successful. A specific
* status on failure.
*/
absl::Status InitializeInternal(bool is_exhaustive_and_exact,
bool& output_insufficient_data);
/*!\brief Initializes the OBU processor for rendering.
*
* Must be called after `Initialize()` is called.
*
* \param desired_layout Specifies the layout that will be used to render the
* audio, if available.
* \param sample_processor_factory Factory to create post processors.
* \param output_layout The layout that will be used to render the audio. This
* is the same as `desired_layout` if it is available, otherwise a
* default layout is used.
* \return `absl::OkStatus()` if the process is successful. A specific status
* on failure.
*/
absl::Status InitializeForRendering(
const Layout& desired_layout,
const RenderingMixPresentationFinalizer::SampleProcessorFactory&
sample_processor_factory,
Layout& output_layout);
struct DecodingLayoutInfo {
DecodedUleb128 mix_presentation_id;
int sub_mix_index;
int layout_index;
};
struct TemporalUnitData {
std::list<ParameterBlockWithData> parameter_blocks;
std::list<AudioFrameWithData> audio_frames;
std::optional<TemporalDelimiterObu> temporal_delimiter;
std::optional<int32_t> timestamp;
bool Empty() const {
return parameter_blocks.empty() && audio_frames.empty();
}
void Clear() {
audio_frames.clear();
parameter_blocks.clear();
temporal_delimiter.reset();
timestamp.reset();
}
template <class T>
static void AddDataToCorrectTemporalUnit(
TemporalUnitData& current_temporal_unit,
TemporalUnitData& next_temporal_unit, T&& obu_with_data) {
const auto new_timestamp = obu_with_data.start_timestamp;
if (!current_temporal_unit.timestamp.has_value()) {
current_temporal_unit.timestamp = new_timestamp;
}
if (*current_temporal_unit.timestamp == new_timestamp) {
current_temporal_unit.GetList<T>().push_back(
std::forward<T>(obu_with_data));
} else {
next_temporal_unit.GetList<T>().push_back(
std::forward<T>(obu_with_data));
next_temporal_unit.timestamp = new_timestamp;
}
}
private:
template <class T>
std::list<T>& GetList() {
if constexpr (std::is_same_v<T, ParameterBlockWithData>) {
return parameter_blocks;
} else if constexpr (std::is_same_v<T, AudioFrameWithData>) {
return audio_frames;
}
};
};
std::optional<uint32_t> output_sample_rate_;
std::optional<uint32_t> output_frame_size_;
absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>
param_definition_variants_;
absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>
substream_id_to_audio_element_;
std::unique_ptr<GlobalTimingModule> global_timing_module_;
std::optional<ParametersManager> parameters_manager_;
ReadBitBuffer* read_bit_buffer_;
// Contains target layout information for rendering.
DecodingLayoutInfo decoding_layout_info_;
// Cached data when processing temporal units.
TemporalUnitData current_temporal_unit_;
TemporalUnitData next_temporal_unit_;
std::list<DecodedAudioFrame> decoded_frames_for_temporal_unit_;
// Modules used for rendering.
std::optional<AudioFrameDecoder> audio_frame_decoder_;
std::optional<DemixingModule> demixing_module_;
std::optional<RenderingMixPresentationFinalizer> mix_presentation_finalizer_;
};
} // namespace iamf_tools
#endif // CLI_OBU_PROCESSOR_H_