blob: 4274c3bd9bf4761fced43e661e6a2f11ed7d5547 [file] [log] [blame]
/*
* Copyright (c) 2024, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* www.aomedia.org/license/patent.
*/
#ifndef CLI_IAMF_ENCODER_H_
#define CLI_IAMF_ENCODER_H_
#include <cstdint>
#include <list>
#include <memory>
#include <optional>
#include <utility>
#include <vector>
#include "absl/base/nullability.h"
#include "absl/container/flat_hash_map.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/audio_frame_decoder.h"
#include "iamf/cli/audio_frame_with_data.h"
#include "iamf/cli/channel_label.h"
#include "iamf/cli/demixing_module.h"
#include "iamf/cli/global_timing_module.h"
#include "iamf/cli/loudness_calculator_factory_base.h"
#include "iamf/cli/parameter_block_with_data.h"
#include "iamf/cli/parameters_manager.h"
#include "iamf/cli/proto/test_vector_metadata.pb.h"
#include "iamf/cli/proto/user_metadata.pb.h"
#include "iamf/cli/proto_conversion/proto_to_obu/audio_frame_generator.h"
#include "iamf/cli/proto_conversion/proto_to_obu/parameter_block_generator.h"
#include "iamf/cli/renderer_factory.h"
#include "iamf/cli/rendering_mix_presentation_finalizer.h"
#include "iamf/obu/arbitrary_obu.h"
#include "iamf/obu/codec_config.h"
#include "iamf/obu/ia_sequence_header.h"
#include "iamf/obu/mix_presentation.h"
#include "iamf/obu/param_definition_variant.h"
#include "iamf/obu/types.h"
namespace iamf_tools {
/*!\brief A class that encodes an IA Sequence and generates OBUs.
*
* Descriptor OBUs are generated once at the beginning, and data OBUs are
* generated iteratively for each temporal unit (TU). The use pattern of this
* class is:
* // Call factory function.
* absl::StatusOr<IamfEncoder> encoder = IamfEncoder::Create(...);
* if(!encoder.ok()) {
* // Handle error.
* }
*
* while (encoder->GeneratingDataObus()) {
* // Prepare for the next temporal unit; clear state of the previous TU.
* encoder->BeginTemporalUnit();
*
* // For all audio elements and labels corresponding to this temporal unit:
* for each audio element: {
* for each channel label from the current element {
* encoder->AddSamples(audio_element_id, label, samples);
* }
* }
*
* // When all samples (for all temporal units) are added:
* if (done_receiving_all_audio) {
* encoder->FinalizeAddSamples();
* }
*
* // For all parameter block metadata corresponding to this temporal unit:
* encoder->AddParameterBlockMetadata(...);
*
* // Get OBUs for next encoded temporal unit.
* encoder->OutputTemporalUnit(...);
* }
* // Get the final mix presentation OBUs, with measured loudness information.
* auto mix_presentation_obus = encoder->GetFinalizedMixPresentationObus();
*
* Note the timestamps corresponding to `AddSamples()` and
* `AddParameterBlockMetadata()` might be different from that of the output
* OBUs obtained in `OutputTemporalUnit()`, because some codecs introduce a
* frame of delay. We thus distinguish the concepts of input and output
* timestamps (`input_timestamp` and `output_timestamp`) in the code below.
*/
class IamfEncoder {
public:
/*!\brief Factory function to create an `IamfEncoder`.
*
* \param user_metadata Input user metadata describing the IAMF stream.
* \param renderer_factory Factory to create renderers for use in measuring
* the loudness.
* \param loudness_calculator_factory Factory to create loudness calculators
* to measure the loudness of the output layouts.
* \param sample_processor_factory Factory to create processors for use after
* rendering.
* \param ia_sequence_header_obu Generated IA Sequence Header OBU.
* \param codec_config_obus Map of Codec Config ID to generated Codec Config
* OBUs.
* \param audio_elements Map of Audio Element IDs to generated OBUs with data.
* \param preliminary_mix_presentation_obus List of preliminary Mix
* Presentation OBUs. Using these directly almost certainly results in
* incorrect loudness metadata. It is best practice to replace these
* with the result of `GetFinalizedMixPresentationObus()` after all
* data OBUs are generated.
* \param arbitrary_obus List of generated Arbitrary OBUs.
* \return `absl::OkStatus()` if successful. A specific status on failure.
*/
static absl::StatusOr<IamfEncoder> Create(
const iamf_tools_cli_proto::UserMetadata& user_metadata,
absl::Nullable<const RendererFactoryBase*> renderer_factory,
absl::Nullable<const LoudnessCalculatorFactoryBase*>
loudness_calculator_factory,
const RenderingMixPresentationFinalizer::SampleProcessorFactory&
sample_processor_factory,
std::optional<IASequenceHeaderObu>& ia_sequence_header_obu,
absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements,
std::list<MixPresentationObu>& preliminary_mix_presentation_obus,
std::list<ArbitraryObu>& arbitrary_obus);
/*!\brief Returns whether this encoder is generating data OBUs.
*
* \return True if still generating data OBUs.
*/
bool GeneratingDataObus() const;
/*!\brief Clears the state, e.g. accumulated samples for next temporal unit.
*/
void BeginTemporalUnit();
/*!\brief Gets the input timestamp of the data OBU generation iteration.
*
* \param input_timestamp Result of input timestamp.
* \return `absl::OkStatus()` if successful. A specific status on failure.
*/
absl::Status GetInputTimestamp(int32_t& input_timestamp);
/*!\brief Adds audio samples belonging to the same temporal unit.
*
* The best practice is to not call this function after
* `FinalizeAddSamples()`. But it is OK if you do -- just that the added
* samples will be ignored and not encoded.
*
* \param audio_element_id ID of the audio element to add samples to.
* \param label Channel label to add samples to.
* \param samples Audio samples to add.
*/
void AddSamples(DecodedUleb128 audio_element_id, ChannelLabel::Label label,
const std::vector<InternalSampleType>& samples);
/*!\brief Finalizes the process of adding samples.
*
* This will signal the underlying codecs to flush all remaining samples,
* as well as trim samples from the end.
*/
void FinalizeAddSamples();
/*!\brief Adds parameter block metadata belonging to the same temporal unit.
*
* \param parameter_block_metadata Parameter block metadata to add.
* \return `absl::OkStatus()` if successful. A specific status on failure.
*/
absl::Status AddParameterBlockMetadata(
const iamf_tools_cli_proto::ParameterBlockObuMetadata&
parameter_block_metadata);
/*!\brief Outputs data OBUs corresponding to one temporal unit.
*
* \param audio_frames List of generated audio frames corresponding to this
* temporal unit.
* \param parameter_blocks List of generated parameter block corresponding
* to this temporal unit.
* \return `absl::OkStatus()` if successful. A specific status on failure.
*/
absl::Status OutputTemporalUnit(
std::list<AudioFrameWithData>& audio_frames,
std::list<ParameterBlockWithData>& parameter_blocks);
/*!\brief Gets the finalized mix presentation OBUs.
*
* Mix Presentation OBUs contain loudness information, which is only possible
* to know after all data OBUs are generated.
*
* Must only be called only once and after all data OBUs are generated, i.e.
* after `GeneratingDataObus()` returns false.
*
* \return Finalized Mix Presentation OBUs. A specific status on failure.
*/
absl::StatusOr<std::list<MixPresentationObu>>
GetFinalizedMixPresentationObus();
private:
/*!\brief Private constructor.
*
* Moves from the input arguments Some arguments are wrapped in unique
* pointers to ensure pointer or reference stability after move.
*
* \param validate_user_loudness Whether to validate the user-provided
* loudness.
* \param parameter_id_to_metadata Mapping from parameter IDs to per-ID
* parameter metadata.
* \param param_definition_variants Parameter definitions for the IA Sequence.
* \param parameters_manager Manager to support internal querying
* of parameters.
* \param demixing_module Module to demix audio elements.
* \param audio_frame_generator Audio frame generator.
* \param audio_frame_decoder Decodes the original audio frames, to facilitate
* recon gain computation.
* \param global_timing_module Manages global timing information.
*/
IamfEncoder(bool validate_user_loudness,
std::unique_ptr<
absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>>
param_definition_variants,
ParameterBlockGenerator&& parameter_block_generator,
std::unique_ptr<ParametersManager> parameters_manager,
const DemixingModule& demixing_module,
std::unique_ptr<AudioFrameGenerator> audio_frame_generator,
AudioFrameDecoder&& audio_frame_decoder,
std::unique_ptr<GlobalTimingModule> global_timing_module,
RenderingMixPresentationFinalizer&& mix_presentation_finalizer)
: validate_user_loudness_(validate_user_loudness),
param_definition_variants_(std::move(param_definition_variants)),
parameter_block_generator_(std::move(parameter_block_generator)),
parameters_manager_(std::move(parameters_manager)),
demixing_module_(demixing_module),
audio_frame_generator_(std::move(audio_frame_generator)),
audio_frame_decoder_(std::move(audio_frame_decoder)),
global_timing_module_(std::move(global_timing_module)),
mix_presentation_finalizer_(std::move(mix_presentation_finalizer)) {}
const bool validate_user_loudness_;
// Mapping from parameter IDs to parameter definitions.
// Parameter block generator owns a reference to this map. Wrapped in
// `std::unique_ptr` for reference stability after move.
absl::Nonnull<std::unique_ptr<
const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>>>
param_definition_variants_;
// Saved parameter blocks generated in one iteration.
std::list<ParameterBlockWithData> temp_mix_gain_parameter_blocks_;
std::list<ParameterBlockWithData> temp_demixing_parameter_blocks_;
std::list<ParameterBlockWithData> temp_recon_gain_parameter_blocks_;
// Cached mapping from Audio Element ID to labeled samples added in the same
// iteration.
absl::flat_hash_map<DecodedUleb128, LabelSamplesMap> id_to_labeled_samples_;
// Whether the `FinalizeAddSamples()` has been called.
bool add_samples_finalized_ = false;
// Various generators and modules used when generating data OBUs iteratively.
// Some are held in `unique_ptr` for reference stability after move.
ParameterBlockGenerator parameter_block_generator_;
absl::Nonnull<std::unique_ptr<ParametersManager>> parameters_manager_;
const DemixingModule demixing_module_;
absl::Nonnull<std::unique_ptr<AudioFrameGenerator>> audio_frame_generator_;
AudioFrameDecoder audio_frame_decoder_;
absl::Nonnull<std::unique_ptr<GlobalTimingModule>> global_timing_module_;
// Modules to render the output layouts and measure their loudness.
RenderingMixPresentationFinalizer mix_presentation_finalizer_;
};
} // namespace iamf_tools
#endif // CLI_IAMF_ENCODER_H_