blob: 9fce5fa6483b9d11c4d9477b108efcc3803ac46e [file] [log] [blame]
/*
* Copyright (c) 2023, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* www.aomedia.org/license/patent.
*/
#include "iamf/cli/rendering_mix_presentation_finalizer.h"
#include <algorithm>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <list>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/base/nullability.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/functional/any_invocable.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/cli_util.h"
#include "iamf/cli/demixing_module.h"
#include "iamf/cli/loudness_calculator_base.h"
#include "iamf/cli/loudness_calculator_factory_base.h"
#include "iamf/cli/parameter_block_with_data.h"
#include "iamf/cli/renderer/audio_element_renderer_base.h"
#include "iamf/cli/renderer_factory.h"
#include "iamf/cli/sample_processor_base.h"
#include "iamf/common/utils/macros.h"
#include "iamf/common/utils/numeric_utils.h"
#include "iamf/common/utils/sample_processing_utils.h"
#include "iamf/common/utils/validation_utils.h"
#include "iamf/obu/audio_element.h"
#include "iamf/obu/codec_config.h"
#include "iamf/obu/mix_presentation.h"
#include "iamf/obu/param_definitions.h"
#include "iamf/obu/parameter_block.h"
#include "iamf/obu/types.h"
namespace iamf_tools {
namespace {
using LayoutRenderingMetadata =
RenderingMixPresentationFinalizer::LayoutRenderingMetadata;
using SubmixRenderingMetadata =
RenderingMixPresentationFinalizer::SubmixRenderingMetadata;
bool CanRenderAnyLayout(
const std::vector<SubmixRenderingMetadata>& rendering_metadata) {
for (auto& submix_rendering_metadata : rendering_metadata) {
for (auto& layout_rendering_metadata :
submix_rendering_metadata.layout_rendering_metadata) {
if (layout_rendering_metadata.can_render) {
return true;
}
}
}
return false;
}
absl::Status CollectAudioElementsInSubMix(
const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
const std::vector<SubMixAudioElement>& sub_mix_audio_elements,
std::vector<const AudioElementWithData*>& audio_elements_in_sub_mix) {
audio_elements_in_sub_mix.reserve(sub_mix_audio_elements.size());
for (const auto& audio_element : sub_mix_audio_elements) {
auto iter = audio_elements.find(audio_element.audio_element_id);
if (iter == audio_elements.end()) {
return absl::InvalidArgumentError(absl::StrCat(
"Audio Element with ID= ", audio_element.audio_element_id,
" not found"));
}
audio_elements_in_sub_mix.push_back(&iter->second);
}
return absl::OkStatus();
}
absl::Status GetCommonCodecConfigPropertiesFromAudioElementIds(
const std::vector<const AudioElementWithData*>& audio_elements_in_sub_mix,
uint32_t& common_sample_rate, uint8_t& common_bit_depth,
uint32_t& common_num_samples_per_frame, bool& requires_resampling) {
absl::flat_hash_set<uint32_t> sample_rates;
absl::flat_hash_set<uint32_t> num_samples_per_frame;
absl::flat_hash_set<uint8_t> bit_depths;
// Get all the bit-depths and sample_rates from each Audio Element.
for (const auto* audio_element : audio_elements_in_sub_mix) {
num_samples_per_frame.insert(
audio_element->codec_config->GetNumSamplesPerFrame());
sample_rates.insert(audio_element->codec_config->GetOutputSampleRate());
bit_depths.insert(
audio_element->codec_config->GetBitDepthToMeasureLoudness());
}
RETURN_IF_NOT_OK(GetCommonSampleRateAndBitDepth(
sample_rates, bit_depths, common_sample_rate, common_bit_depth,
requires_resampling));
if (num_samples_per_frame.size() != 1) {
return absl::InvalidArgumentError(
"Audio elements in a submix must have the same number of samples per "
"frame.");
}
common_num_samples_per_frame = *num_samples_per_frame.begin();
return absl::OkStatus();
}
using AudioElementRenderingMetadata =
RenderingMixPresentationFinalizer::AudioElementRenderingMetadata;
absl::Status InitializeRenderingMetadata(
const RendererFactoryBase& renderer_factory,
const std::vector<const AudioElementWithData*>& audio_elements_in_sub_mix,
const std::vector<SubMixAudioElement>& sub_mix_audio_elements,
const Layout& loudness_layout, const uint32_t common_sample_rate,
std::vector<AudioElementRenderingMetadata>& rendering_metadata_array) {
rendering_metadata_array.resize(audio_elements_in_sub_mix.size());
for (int i = 0; i < audio_elements_in_sub_mix.size(); i++) {
const auto& sub_mix_audio_element = *audio_elements_in_sub_mix[i];
auto& rendering_metadata = rendering_metadata_array[i];
rendering_metadata.audio_element = &(sub_mix_audio_element.obu);
rendering_metadata.codec_config = sub_mix_audio_element.codec_config;
rendering_metadata.renderer = renderer_factory.CreateRendererForLayout(
sub_mix_audio_element.obu.audio_substream_ids_,
sub_mix_audio_element.substream_id_to_labels,
rendering_metadata.audio_element->GetAudioElementType(),
sub_mix_audio_element.obu.config_,
sub_mix_audio_elements[i].rendering_config, loudness_layout,
static_cast<size_t>(
rendering_metadata.codec_config->GetNumSamplesPerFrame()));
if (rendering_metadata.renderer == nullptr) {
return absl::UnknownError("Unable to create renderer.");
}
const uint32_t output_sample_rate =
sub_mix_audio_element.codec_config->GetOutputSampleRate();
if (common_sample_rate != output_sample_rate) {
// Theoretically, we would have to resample this audio element to the
// common sample rate. However, as of IAMF v1.1.0, the spec forbids
// multiple Codec Config OBUs. This case is not possible to occur with a
// single Codec Config OBU.
return absl::UnimplementedError(
absl::StrCat("OBUs with different sample rates not supported yet: (",
common_sample_rate, " != ", output_sample_rate, ")."));
}
}
return absl::OkStatus();
}
absl::Status FlushUntilNonEmptyOrTimeout(
AudioElementRendererBase& audio_element_renderer,
std::vector<InternalSampleType>& rendered_samples) {
static const int kMaxNumTries = 500;
for (int i = 0; i < kMaxNumTries; i++) {
RETURN_IF_NOT_OK(audio_element_renderer.Flush(rendered_samples));
if (!rendered_samples.empty()) {
// Usually samples will be ready right away. So avoid sleeping.
return absl::OkStatus();
}
absl::SleepFor(absl::Milliseconds(10));
}
return absl::DeadlineExceededError("Timed out waiting for samples.");
}
absl::Status RenderLabeledFrameToLayout(
const LabeledFrame& labeled_frame,
const AudioElementRenderingMetadata& rendering_metadata,
std::vector<InternalSampleType>& rendered_samples) {
const auto num_time_ticks =
rendering_metadata.renderer->RenderLabeledFrame(labeled_frame);
if (!num_time_ticks.ok()) {
return num_time_ticks.status();
} else if (*num_time_ticks >
static_cast<size_t>(
rendering_metadata.codec_config->GetNumSamplesPerFrame())) {
return absl::InvalidArgumentError("Too many samples in this frame");
} else if (*num_time_ticks == 0) {
// This was an empty frame.
return absl::OkStatus();
}
return FlushUntilNonEmptyOrTimeout(*rendering_metadata.renderer,
rendered_samples);
}
absl::Status GetParameterBlockLinearMixGainsPerTick(
uint32_t common_sample_rate, const ParameterBlockWithData& parameter_block,
const MixGainParamDefinition& mix_gain,
std::vector<float>& linear_mix_gain_per_tick) {
if (mix_gain.parameter_rate_ != common_sample_rate) {
// TODO(b/283281856): Support resampling parameter blocks.
return absl::UnimplementedError(
"Parameter blocks that require resampling are not supported yet.");
}
const int16_t default_mix_gain = mix_gain.default_mix_gain_;
// Initialize to the default gain value.
std::fill(linear_mix_gain_per_tick.begin(), linear_mix_gain_per_tick.end(),
std::pow(10.0f, Q7_8ToFloat(default_mix_gain) / 20.0f));
InternalTimestamp cur_tick = parameter_block.start_timestamp;
// Process as many ticks as possible until all are found or the parameter
// block ends.
while (cur_tick < parameter_block.end_timestamp &&
(cur_tick - parameter_block.start_timestamp) <
linear_mix_gain_per_tick.size()) {
RETURN_IF_NOT_OK(parameter_block.obu->GetLinearMixGain(
cur_tick - parameter_block.start_timestamp,
linear_mix_gain_per_tick[cur_tick - parameter_block.start_timestamp]));
cur_tick++;
}
return absl::OkStatus();
}
// Fills in the output `mix_gains` with the gain in Q7.8 format to apply at each
// tick.
// TODO(b/288073842): Consider improving computational efficiency instead of
// searching through all parameter blocks for each frame.
// TODO(b/379961928): Remove this function once the new
// `GetParameterBlockLinearMixGainsPerTick()` is in use.
absl::Status GetParameterBlockLinearMixGainsPerTick(
uint32_t common_sample_rate, InternalTimestamp start_timestamp,
InternalTimestamp end_timestamp,
const std::list<ParameterBlockWithData>& parameter_blocks,
const MixGainParamDefinition& mix_gain,
std::vector<float>& linear_mix_gain_per_tick) {
if (mix_gain.parameter_rate_ != common_sample_rate) {
// TODO(b/283281856): Support resampling parameter blocks.
return absl::UnimplementedError(
"Parameter blocks that require resampling are not supported yet.");
}
const auto parameter_id = mix_gain.parameter_id_;
const int16_t default_mix_gain = mix_gain.default_mix_gain_;
// Initialize to the default gain value.
std::fill(linear_mix_gain_per_tick.begin(), linear_mix_gain_per_tick.end(),
std::pow(10.0f, Q7_8ToFloat(default_mix_gain) / 20.0f));
InternalTimestamp cur_tick = start_timestamp;
// Find the mix gain at each tick. May terminate early if there are samples to
// trim at the end.
while (cur_tick < end_timestamp &&
(cur_tick - start_timestamp) < linear_mix_gain_per_tick.size()) {
// Find the parameter block that this tick occurs during.
const auto parameter_block_iter = std::find_if(
parameter_blocks.begin(), parameter_blocks.end(),
[cur_tick, parameter_id](const auto& parameter_block) {
return parameter_block.obu->parameter_id_ == parameter_id &&
parameter_block.start_timestamp <= cur_tick &&
cur_tick < parameter_block.end_timestamp;
});
if (parameter_block_iter == parameter_blocks.end()) {
// Default mix gain will be used for this frame. Logic elsewhere validates
// the rest of the audio frames have consistent coverage.
break;
}
// Process as many ticks as possible until all are found or the parameter
// block ends.
while (cur_tick < end_timestamp &&
cur_tick < parameter_block_iter->end_timestamp &&
(cur_tick - start_timestamp) < linear_mix_gain_per_tick.size()) {
RETURN_IF_NOT_OK(parameter_block_iter->obu->GetLinearMixGain(
cur_tick - parameter_block_iter->start_timestamp,
linear_mix_gain_per_tick[cur_tick - start_timestamp]));
cur_tick++;
}
}
return absl::OkStatus();
}
absl::Status GetAndApplyMixGain( // NOLINT
uint32_t common_sample_rate, const ParameterBlockWithData& parameter_block,
const MixGainParamDefinition& mix_gain, int32_t num_channels,
std::vector<InternalSampleType>& rendered_samples) {
if (rendered_samples.size() % num_channels != 0) {
return absl::InvalidArgumentError(absl::StrCat(
"Expected an integer number of interlaced channels. "
"renderered_samples.size()= ",
rendered_samples.size(), ", num_channels= ", num_channels));
}
// Get the mix gain on a per tick basis from the parameter block.
std::vector<float> linear_mix_gain_per_tick(rendered_samples.size() /
num_channels);
RETURN_IF_NOT_OK(GetParameterBlockLinearMixGainsPerTick(
common_sample_rate, parameter_block, mix_gain, linear_mix_gain_per_tick));
if (!linear_mix_gain_per_tick.empty()) {
LOG_FIRST_N(INFO, 6) << " First tick in this frame has gain: "
<< linear_mix_gain_per_tick.front();
}
for (int tick = 0; tick < linear_mix_gain_per_tick.size(); tick++) {
for (int channel = 0; channel < num_channels; channel++) {
// Apply the same mix gain to all `num_channels` associated with this
// tick.
rendered_samples[tick * num_channels + channel] *=
linear_mix_gain_per_tick[tick];
}
}
return absl::OkStatus();
}
// TODO(b/379961928): Remove once the new GetAndApplyMixGain is in use.
absl::Status GetAndApplyMixGain(
uint32_t common_sample_rate, InternalTimestamp start_timestamp,
InternalTimestamp end_timestamp,
const std::list<ParameterBlockWithData>& parameter_blocks,
const MixGainParamDefinition& mix_gain, int32_t num_channels,
std::vector<float>& linear_mix_gain_per_tick,
std::vector<InternalSampleType>& rendered_samples) {
if (rendered_samples.size() % num_channels != 0) {
return absl::InvalidArgumentError(absl::StrCat(
"Expected an integer number of interlaced channels. "
"renderered_samples.size()= ",
rendered_samples.size(), ", num_channels= ", num_channels));
}
// Get the mix gain on a per tick basis from the parameter block.
linear_mix_gain_per_tick.resize(rendered_samples.size() / num_channels, 0.0f);
RETURN_IF_NOT_OK(GetParameterBlockLinearMixGainsPerTick(
common_sample_rate, start_timestamp, end_timestamp, parameter_blocks,
mix_gain, linear_mix_gain_per_tick));
if (!linear_mix_gain_per_tick.empty()) {
LOG_FIRST_N(INFO, 6) << " First tick in this frame has gain: "
<< linear_mix_gain_per_tick.front();
}
for (int tick = 0; tick < linear_mix_gain_per_tick.size(); tick++) {
for (int channel = 0; channel < num_channels; channel++) {
// Apply the same mix gain to all `num_channels` associated with this
// tick.
rendered_samples[tick * num_channels + channel] *=
linear_mix_gain_per_tick[tick];
}
}
return absl::OkStatus();
}
absl::Status MixAudioElements(
std::vector<std::vector<InternalSampleType>>& rendered_audio_elements,
std::vector<InternalSampleType>& rendered_samples) {
const size_t num_samples = rendered_audio_elements.empty()
? 0
: rendered_audio_elements.front().size();
rendered_samples.reserve(num_samples);
for (const auto& rendered_audio_element : rendered_audio_elements) {
if (rendered_audio_element.size() != num_samples) {
return absl::UnknownError(
"Expected all frames to have the same number of samples.");
}
}
for (int i = 0; i < num_samples; i++) {
InternalSampleType mixed_sample = 0;
// Sum all audio elements for this tick.
for (const auto& rendered_audio_element : rendered_audio_elements) {
mixed_sample += rendered_audio_element[i];
}
// Push the clipped result.
rendered_samples.push_back(mixed_sample);
}
return absl::OkStatus();
}
// Fills in `valid_rendered_samples` which is a view backed by
// `rendered_samples` of the ticks actually rendered.
absl::Status RenderAllFramesForLayout(
int32_t num_channels,
const std::vector<SubMixAudioElement> sub_mix_audio_elements,
const MixGainParamDefinition& output_mix_gain,
const IdLabeledFrameMap& id_to_labeled_frame,
const std::vector<AudioElementRenderingMetadata>& rendering_metadata_array,
InternalTimestamp start_timestamp, InternalTimestamp end_timestamp,
const std::list<ParameterBlockWithData>& parameter_blocks,
const uint32_t common_sample_rate,
std::vector<std::vector<int32_t>>& rendered_samples,
absl::Span<const std::vector<int32_t>>& valid_rendered_samples) {
// Each audio element rendered individually with `element_mix_gain` applied.
std::vector<std::vector<InternalSampleType>> rendered_audio_elements(
sub_mix_audio_elements.size());
std::vector<float> linear_mix_gain_per_tick;
for (int i = 0; i < sub_mix_audio_elements.size(); i++) {
const SubMixAudioElement& sub_mix_audio_element = sub_mix_audio_elements[i];
const auto audio_element_id = sub_mix_audio_element.audio_element_id;
const auto& rendering_metadata = rendering_metadata_array[i];
if (id_to_labeled_frame.find(audio_element_id) !=
id_to_labeled_frame.end()) {
const auto& labeled_frame = id_to_labeled_frame.at(audio_element_id);
// Render the frame to the specified `loudness_layout` and apply element
// mix gain.
RETURN_IF_NOT_OK(RenderLabeledFrameToLayout(
labeled_frame, rendering_metadata, rendered_audio_elements[i]));
}
RETURN_IF_NOT_OK(GetAndApplyMixGain(
common_sample_rate, start_timestamp, end_timestamp, parameter_blocks,
sub_mix_audio_element.element_mix_gain, num_channels,
linear_mix_gain_per_tick, rendered_audio_elements[i]));
}
// Mix the audio elements.
std::vector<InternalSampleType> rendered_samples_internal;
RETURN_IF_NOT_OK(
MixAudioElements(rendered_audio_elements, rendered_samples_internal));
LOG_FIRST_N(INFO, 1) << " Applying output_mix_gain.default_mix_gain= "
<< output_mix_gain.default_mix_gain_;
RETURN_IF_NOT_OK(
GetAndApplyMixGain(common_sample_rate, start_timestamp, end_timestamp,
parameter_blocks, output_mix_gain, num_channels,
linear_mix_gain_per_tick, rendered_samples_internal));
// Convert the rendered samples to int32, clipping if needed.
size_t num_ticks = 0;
RETURN_IF_NOT_OK(ConvertInterleavedToTimeChannel(
absl::MakeConstSpan(rendered_samples_internal), num_channels,
absl::AnyInvocable<absl::Status(InternalSampleType, int32_t&) const>(
NormalizedFloatingPointToInt32<InternalSampleType>),
rendered_samples, num_ticks));
valid_rendered_samples =
absl::MakeConstSpan(rendered_samples).first(num_ticks);
return absl::OkStatus();
}
absl::Status ValidateUserLoudness(const LoudnessInfo& user_loudness,
const uint32_t mix_presentation_id,
const int sub_mix_index,
const int layout_index,
const LoudnessInfo& output_loudness,
bool& loudness_matches_user_data) {
const std::string mix_presentation_sub_mix_layout_index =
absl::StrCat("Mix Presentation(ID ", mix_presentation_id, ")->sub_mixes[",
sub_mix_index, "]->layouts[", layout_index, "]: ");
if (output_loudness.integrated_loudness !=
user_loudness.integrated_loudness) {
LOG(ERROR) << mix_presentation_sub_mix_layout_index
<< "Computed integrated loudness different from "
<< "user specification: " << output_loudness.integrated_loudness
<< " vs " << user_loudness.integrated_loudness;
loudness_matches_user_data = false;
}
if (output_loudness.digital_peak != user_loudness.digital_peak) {
LOG(ERROR) << mix_presentation_sub_mix_layout_index
<< "Computed digital peak different from "
<< "user specification: " << output_loudness.digital_peak
<< " vs " << user_loudness.digital_peak;
loudness_matches_user_data = false;
}
if (output_loudness.info_type & LoudnessInfo::kTruePeak) {
if (output_loudness.true_peak != user_loudness.true_peak) {
LOG(ERROR) << mix_presentation_sub_mix_layout_index
<< "Computed true peak different from "
<< "user specification: " << output_loudness.true_peak
<< " vs " << user_loudness.true_peak;
loudness_matches_user_data = false;
}
}
// Anchored loudness and layout extension are copied from the user input
// and do not need to be validated.
return absl::OkStatus();
}
// Calculates the loudness of the rendered samples. These rendered samples are
// for a specific timestamp for a given submix and layout. If
// `validate_loudness` is true, then the user provided loudness values are
// validated against the computed values.
absl::Status UpdateLoudnessInfoForLayout(
bool validate_loudness, const LoudnessInfo& input_loudness,
const uint32_t mix_presentation_id, const int sub_mix_index,
const int layout_index, bool& loudness_matches_user_data,
std::unique_ptr<LoudnessCalculatorBase> loudness_calculator,
LoudnessInfo& output_calculated_loudness) {
// Copy the final loudness values back to the output OBU.
auto calculated_loudness_info = loudness_calculator->QueryLoudness();
if (!calculated_loudness_info.ok()) {
return calculated_loudness_info.status();
}
if (validate_loudness) {
// Validate any user provided loudness values match computed values.
RETURN_IF_NOT_OK(ValidateUserLoudness(
input_loudness, mix_presentation_id, sub_mix_index, layout_index,
*calculated_loudness_info, loudness_matches_user_data));
}
output_calculated_loudness = *calculated_loudness_info;
return absl::OkStatus();
}
// Generates rendering metadata for all layouts within a submix. This includes
// optionally creating a sample processor and/or a loudness calculator for each
// layout.
absl::Status GenerateRenderingMetadataForLayouts(
const RendererFactoryBase& renderer_factory,
const LoudnessCalculatorFactoryBase* loudness_calculator_factory,
const RenderingMixPresentationFinalizer::SampleProcessorFactory&
sample_processor_factory,
const DecodedUleb128 mix_presentation_id,
const MixPresentationSubMix& sub_mix, int sub_mix_index,
const std::vector<const AudioElementWithData*>& audio_elements_in_sub_mix,
uint32_t common_sample_rate, uint8_t rendering_bit_depth,
uint32_t common_num_samples_per_frame,
std::vector<LayoutRenderingMetadata>& output_layout_rendering_metadata) {
output_layout_rendering_metadata.resize(sub_mix.layouts.size());
for (int layout_index = 0; layout_index < sub_mix.layouts.size();
layout_index++) {
LayoutRenderingMetadata& layout_rendering_metadata =
output_layout_rendering_metadata[layout_index];
const auto& layout = sub_mix.layouts[layout_index];
int32_t num_channels = 0;
auto can_render_status = MixPresentationObu::GetNumChannelsFromLayout(
layout.loudness_layout, num_channels);
layout_rendering_metadata.num_channels = num_channels;
can_render_status.Update(InitializeRenderingMetadata(
renderer_factory, audio_elements_in_sub_mix, sub_mix.audio_elements,
layout.loudness_layout, common_sample_rate,
layout_rendering_metadata.audio_element_rendering_metadata));
if (!can_render_status.ok()) {
layout_rendering_metadata.can_render = false;
continue;
} else {
layout_rendering_metadata.can_render = true;
}
if (loudness_calculator_factory != nullptr) {
// Optionally create a loudness calculator.
layout_rendering_metadata.loudness_calculator =
loudness_calculator_factory->CreateLoudnessCalculator(
layout, common_num_samples_per_frame, common_sample_rate,
rendering_bit_depth);
}
// Optionally create a post-processor.
layout_rendering_metadata.sample_processor = sample_processor_factory(
mix_presentation_id, sub_mix_index, layout_index,
layout.loudness_layout, num_channels, common_sample_rate,
rendering_bit_depth, common_num_samples_per_frame);
// Pre-allocate a buffer to store a frame's worth of rendered samples.
layout_rendering_metadata.rendered_samples.resize(
common_num_samples_per_frame, std::vector<int32_t>(num_channels, 0));
}
return absl::OkStatus();
}
// We generate one rendering metadata object for each submix. Once this
// metadata is generated, we will loop through it to render all submixes
// for a given timestamp. Within a submix, there can be many different audio
// elements and layouts that need to be rendered as well. Not all of these
// need to be rendered; only the ones that either have a wav writer or a
// loudness calculator.
absl::Status GenerateRenderingMetadataForSubmixes(
const RendererFactoryBase& renderer_factory,
absl::Nullable<const LoudnessCalculatorFactoryBase*>
loudness_calculator_factory,
const RenderingMixPresentationFinalizer::SampleProcessorFactory&
sample_processor_factory,
const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
const MixPresentationObu& mix_presentation_obu,
std::vector<SubmixRenderingMetadata>& output_rendering_metadata) {
const auto mix_presentation_id = mix_presentation_obu.GetMixPresentationId();
output_rendering_metadata.resize(mix_presentation_obu.sub_mixes_.size());
for (int sub_mix_index = 0;
sub_mix_index < mix_presentation_obu.sub_mixes_.size();
++sub_mix_index) {
SubmixRenderingMetadata& submix_rendering_metadata =
output_rendering_metadata[sub_mix_index];
const auto& sub_mix = mix_presentation_obu.sub_mixes_[sub_mix_index];
// Pointers to audio elements in this sub mix; useful later.
std::vector<const AudioElementWithData*> audio_elements_in_sub_mix;
RETURN_IF_NOT_OK(CollectAudioElementsInSubMix(
audio_elements, sub_mix.audio_elements, audio_elements_in_sub_mix));
submix_rendering_metadata.audio_elements_in_sub_mix =
sub_mix.audio_elements;
submix_rendering_metadata.mix_gain =
std::make_unique<MixGainParamDefinition>(sub_mix.output_mix_gain);
// Data common to all audio elements and layouts.
bool requires_resampling;
uint32_t common_num_samples_per_frame;
uint8_t rendering_bit_depth;
RETURN_IF_NOT_OK(GetCommonCodecConfigPropertiesFromAudioElementIds(
audio_elements_in_sub_mix, submix_rendering_metadata.common_sample_rate,
rendering_bit_depth, common_num_samples_per_frame,
requires_resampling));
if (requires_resampling) {
// Detected multiple Codec Config OBUs with different sample rates or
// bit-depths. As of IAMF v1.1.0, multiple Codec Config OBUs in the same
// IA sequence are never permitted. The spec implies we would have to
// resample to a common sample rate and/or bit-depth.
return absl::UnimplementedError(
"This implementation does not support mixing Codec Config OBUs with "
"different sample rates or bit-depths.");
}
RETURN_IF_NOT_OK(GenerateRenderingMetadataForLayouts(
renderer_factory, loudness_calculator_factory, sample_processor_factory,
mix_presentation_id, sub_mix, sub_mix_index, audio_elements_in_sub_mix,
submix_rendering_metadata.common_sample_rate, rendering_bit_depth,
common_num_samples_per_frame,
submix_rendering_metadata.layout_rendering_metadata));
}
return absl::OkStatus();
}
absl::Status FlushPostProcessors(
std::vector<SubmixRenderingMetadata>& rendering_metadata) {
for (auto& submix_rendering_metadata : rendering_metadata) {
for (auto& layout_rendering_metadata :
submix_rendering_metadata.layout_rendering_metadata) {
if (layout_rendering_metadata.sample_processor != nullptr) {
RETURN_IF_NOT_OK(layout_rendering_metadata.sample_processor->Flush());
}
}
}
return absl::OkStatus();
}
absl::Status FillLoudnessForMixPresentation(
bool validate_loudness,
std::vector<SubmixRenderingMetadata>& rendering_metadata,
MixPresentationObu& mix_presentation_obu) {
bool loudness_matches_user_data = true;
int submix_index = 0;
for (auto& submix_rendering_metadata : rendering_metadata) {
int layout_index = 0;
for (auto& layout_rendering_metadata :
submix_rendering_metadata.layout_rendering_metadata) {
if (layout_rendering_metadata.loudness_calculator != nullptr) {
RETURN_IF_NOT_OK(UpdateLoudnessInfoForLayout(
validate_loudness,
mix_presentation_obu.sub_mixes_[submix_index]
.layouts[layout_index]
.loudness,
mix_presentation_obu.GetMixPresentationId(), submix_index,
layout_index, loudness_matches_user_data,
std::move(layout_rendering_metadata.loudness_calculator),
mix_presentation_obu.sub_mixes_[submix_index]
.layouts[layout_index]
.loudness));
}
layout_index++;
}
submix_index++;
}
if (!loudness_matches_user_data) {
return absl::InvalidArgumentError("Loudness does not match user data.");
}
return absl::OkStatus();
}
// Renders all submixes, layouts, and audio elements for a temporal unit. It
// then optionally writes the rendered samples to a wav file and/or calculates
// the loudness of the rendered samples.
absl::Status RenderWriteAndCalculateLoudnessForTemporalUnit(
const IdLabeledFrameMap& id_to_labeled_frame,
InternalTimestamp start_timestamp, InternalTimestamp end_timestamp,
const std::list<ParameterBlockWithData>& parameter_blocks,
std::vector<SubmixRenderingMetadata>& rendering_metadata) {
for (auto& submix_rendering_metadata : rendering_metadata) {
for (auto& layout_rendering_metadata :
submix_rendering_metadata.layout_rendering_metadata) {
if (!layout_rendering_metadata.can_render) {
continue;
}
if (submix_rendering_metadata.mix_gain == nullptr) {
return absl::InvalidArgumentError("Submix mix gain is null");
}
RETURN_IF_NOT_OK(RenderAllFramesForLayout(
layout_rendering_metadata.num_channels,
submix_rendering_metadata.audio_elements_in_sub_mix,
*submix_rendering_metadata.mix_gain, id_to_labeled_frame,
layout_rendering_metadata.audio_element_rendering_metadata,
start_timestamp, end_timestamp, parameter_blocks,
submix_rendering_metadata.common_sample_rate,
layout_rendering_metadata.rendered_samples,
layout_rendering_metadata.valid_rendered_samples));
// Calculate loudness based on the original rendered samples; we do not
// know what post-processing the end user will have.
if (layout_rendering_metadata.loudness_calculator != nullptr) {
RETURN_IF_NOT_OK(
layout_rendering_metadata.loudness_calculator
->AccumulateLoudnessForSamples(
layout_rendering_metadata.valid_rendered_samples));
}
// Perform any post-processing.
if (layout_rendering_metadata.sample_processor != nullptr) {
RETURN_IF_NOT_OK(layout_rendering_metadata.sample_processor->PushFrame(
layout_rendering_metadata.valid_rendered_samples));
}
}
}
return absl::OkStatus();
}
absl::StatusOr<const LayoutRenderingMetadata*>
GetRenderedSamplesAndPostProcessor(
const absl::flat_hash_map<DecodedUleb128,
std::vector<SubmixRenderingMetadata>>&
mix_presentation_id_to_sub_mix_rendering_metadata,
DecodedUleb128 mix_presentation_id, size_t sub_mix_index,
size_t layout_index) {
// Lookup the requested layout in the requested mix presentation.
const auto sub_mix_rendering_metadata_it =
mix_presentation_id_to_sub_mix_rendering_metadata.find(
mix_presentation_id);
const auto mix_presentation_id_error_message =
absl::StrCat(" Mix Presentation ID ", mix_presentation_id);
if (sub_mix_rendering_metadata_it ==
mix_presentation_id_to_sub_mix_rendering_metadata.end()) {
return absl::NotFoundError(
absl::StrCat(mix_presentation_id_error_message,
" not found in rendering metadata."));
}
// Validate the sub mix and layout are in bounds, then retrieve it.
const auto& [unused_mix_presentation_id, sub_mix_rendering_metadatas] =
*sub_mix_rendering_metadata_it;
RETURN_IF_NOT_OK(Validate(
sub_mix_index, std::less<size_t>(), sub_mix_rendering_metadatas.size(),
absl::StrCat(mix_presentation_id_error_message, " sub_mix_index <")));
RETURN_IF_NOT_OK(Validate(
layout_index, std::less<size_t>(),
sub_mix_rendering_metadatas[sub_mix_index]
.layout_rendering_metadata.size(),
absl::StrCat(mix_presentation_id_error_message, " layout_index <")));
return &sub_mix_rendering_metadatas[sub_mix_index]
.layout_rendering_metadata[layout_index];
}
} // namespace
absl::StatusOr<RenderingMixPresentationFinalizer>
RenderingMixPresentationFinalizer::Create(
absl::Nullable<const RendererFactoryBase*> renderer_factory,
absl::Nullable<const LoudnessCalculatorFactoryBase*>
loudness_calculator_factory,
const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
const SampleProcessorFactory& sample_processor_factory,
const std::list<MixPresentationObu>& mix_presentation_obus) {
const bool rendering_enabled = renderer_factory != nullptr;
if (!rendering_enabled) {
LOG(INFO) << "Rendering is safely disabled.";
}
if (loudness_calculator_factory == nullptr) {
LOG(INFO) << "Loudness calculator factory is null so loudness will not be "
"calculated.";
}
absl::flat_hash_map<DecodedUleb128, std::vector<SubmixRenderingMetadata>>
mix_presentation_id_to_rendering_metadata;
std::list<MixPresentationObu> mix_presentation_obus_to_render;
for (const auto& mix_presentation_obu : mix_presentation_obus) {
// Copy all mix presentation OBUs, so they can be echoed back, even when
// rendering is disabled.
mix_presentation_obus_to_render.emplace_back(mix_presentation_obu);
// Fill in rendering metadata if rendering is enabled, and at least one
// layout can be rendered.
if (rendering_enabled) {
std::vector<SubmixRenderingMetadata> temp_sub_mix_rendering_metadata;
RETURN_IF_NOT_OK(GenerateRenderingMetadataForSubmixes(
*renderer_factory, loudness_calculator_factory,
sample_processor_factory, audio_elements, mix_presentation_obu,
temp_sub_mix_rendering_metadata));
if (CanRenderAnyLayout(temp_sub_mix_rendering_metadata)) {
mix_presentation_id_to_rendering_metadata.emplace(
mix_presentation_obu.GetMixPresentationId(),
std::move(temp_sub_mix_rendering_metadata));
}
}
}
return RenderingMixPresentationFinalizer(
std::move(mix_presentation_id_to_rendering_metadata),
std::move(mix_presentation_obus_to_render));
}
absl::Status RenderingMixPresentationFinalizer::PushTemporalUnit(
const IdLabeledFrameMap& id_to_labeled_frame,
InternalTimestamp start_timestamp, InternalTimestamp end_timestamp,
const std::list<ParameterBlockWithData>& parameter_blocks) {
switch (state_) {
case kAcceptingTemporalUnits:
// Ok to push.
break;
case kFinalizePushTemporalUnitCalled:
return absl::FailedPreconditionError(
"PushTemporalUnit() should not be called after "
"FinalizePushingTemporalUnits() has been called.");
case kFlushedFinalizedMixPresentationObus:
return absl::FailedPreconditionError(
"PushTemporalUnit() should not be called after "
"GetFinalizedMixPresentationOBUs() has been called.");
}
for (auto& [mix_presentation_ids, sub_mix_rendering_metadata] :
mix_presentation_id_to_sub_mix_rendering_metadata_) {
RETURN_IF_NOT_OK(RenderWriteAndCalculateLoudnessForTemporalUnit(
id_to_labeled_frame, start_timestamp, end_timestamp, parameter_blocks,
sub_mix_rendering_metadata));
}
return absl::OkStatus();
}
absl::StatusOr<absl::Span<const std::vector<int32_t>>>
RenderingMixPresentationFinalizer::GetPostProcessedSamplesAsSpan(
DecodedUleb128 mix_presentation_id, size_t sub_mix_index,
size_t layout_index) const {
const auto layout_rendering_metadata = GetRenderedSamplesAndPostProcessor(
mix_presentation_id_to_sub_mix_rendering_metadata_, mix_presentation_id,
sub_mix_index, layout_index);
if (!layout_rendering_metadata.ok()) {
return layout_rendering_metadata.status();
}
// `absl::StatusOr<const T*> cannot hold a nullptr.
CHECK_NE(*layout_rendering_metadata, nullptr);
// Prioritize returning the post-processed samples if a post-processor is
// available. Otherwise, return the rendered samples.
return (*layout_rendering_metadata)->sample_processor != nullptr
? (*layout_rendering_metadata)
->sample_processor->GetOutputSamplesAsSpan()
: (*layout_rendering_metadata)->valid_rendered_samples;
}
absl::Status RenderingMixPresentationFinalizer::FinalizePushingTemporalUnits() {
switch (state_) {
case kAcceptingTemporalUnits:
state_ = kFinalizePushTemporalUnitCalled;
break;
case kFinalizePushTemporalUnitCalled:
case kFlushedFinalizedMixPresentationObus:
return absl::FailedPreconditionError(
"FinalizePushingTemporalUnits() should not be called twice.");
}
for (auto& [mix_presentation_id, sub_mix_rendering_metadata] :
mix_presentation_id_to_sub_mix_rendering_metadata_) {
RETURN_IF_NOT_OK(FlushPostProcessors(sub_mix_rendering_metadata));
}
return absl::OkStatus();
}
absl::StatusOr<std::list<MixPresentationObu>>
RenderingMixPresentationFinalizer::GetFinalizedMixPresentationObus(
bool validate_loudness) {
switch (state_) {
case kAcceptingTemporalUnits:
return absl::FailedPreconditionError(
"FinalizePushingTemporalUnits() should be called before "
"GetFinalizedMixPresentationOBUs().");
case kFinalizePushTemporalUnitCalled:
// Ok to finalize.
break;
case kFlushedFinalizedMixPresentationObus:
return absl::FailedPreconditionError(
"GetFinalizedMixPresentationOBUs() should not be called twice.");
}
// Finalize the OBUs in place.
for (auto& mix_presentation_obu : mix_presentation_obus_) {
const auto sub_mix_rendering_metadata_it =
mix_presentation_id_to_sub_mix_rendering_metadata_.find(
mix_presentation_obu.GetMixPresentationId());
if (sub_mix_rendering_metadata_it ==
mix_presentation_id_to_sub_mix_rendering_metadata_.end()) {
LOG(INFO) << "Rendering was disabled for Mix Presentation ID= "
<< mix_presentation_obu.GetMixPresentationId()
<< " echoing the input OBU.";
continue;
}
RETURN_IF_NOT_OK(FillLoudnessForMixPresentation(
validate_loudness, sub_mix_rendering_metadata_it->second,
mix_presentation_obu));
mix_presentation_obu.PrintObu();
}
// Flush the finalized OBUs and mark that this class should not use them
// again.
state_ = kFlushedFinalizedMixPresentationObus;
return std::move(mix_presentation_obus_);
}
} // namespace iamf_tools