blob: 5863447e8dce85c1c7dfc1c6793b09cf4e5d6e78 [file] [log] [blame]
/*
* Copyright (c) 2023, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear License
* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
* License was not distributed with this source code in the LICENSE file, you
* can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the
* Alliance for Open Media Patent License 1.0 was not distributed with this
* source code in the PATENTS file, you can obtain it at
* www.aomedia.org/license/patent.
*/
#include "iamf/cli/encoder_main_lib.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <list>
#include <memory>
#include <optional>
#include <string>
#include <system_error>
#include "absl/container/flat_hash_map.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/audio_frame_with_data.h"
#include "iamf/cli/demixing_module.h"
#include "iamf/cli/iamf_components.h"
#include "iamf/cli/iamf_encoder.h"
#include "iamf/cli/obu_sequencer_base.h"
#include "iamf/cli/parameter_block_partitioner.h"
#include "iamf/cli/parameter_block_with_data.h"
#include "iamf/cli/proto/temporal_delimiter.pb.h"
#include "iamf/cli/proto/test_vector_metadata.pb.h"
#include "iamf/cli/proto/user_metadata.pb.h"
#include "iamf/cli/wav_sample_provider.h"
#include "iamf/cli/wav_writer.h"
#include "iamf/common/utils/macros.h"
#include "iamf/obu/arbitrary_obu.h"
#include "iamf/obu/codec_config.h"
#include "iamf/obu/ia_sequence_header.h"
#include "iamf/obu/mix_presentation.h"
#include "iamf/obu/types.h"
#include "src/google/protobuf/repeated_ptr_field.h"
namespace iamf_tools {
namespace {
using iamf_tools_cli_proto::ParameterBlockObuMetadata;
using iamf_tools_cli_proto::UserMetadata;
absl::Status PartitionParameterMetadata(UserMetadata& user_metadata) {
uint32_t partition_duration = 0;
if (user_metadata.ia_sequence_header_metadata().empty() ||
user_metadata.codec_config_metadata().empty()) {
return absl::InvalidArgumentError(
"Determining the partition duration requires at least one "
"`ia_sequence_header_metadata` and one `codec_config_metadata`");
}
std::list<ParameterBlockObuMetadata> partitioned_parameter_blocks;
RETURN_IF_NOT_OK(ParameterBlockPartitioner::FindPartitionDuration(
user_metadata.ia_sequence_header_metadata(0).primary_profile(),
user_metadata.codec_config_metadata(0), partition_duration));
for (const auto& parameter_block_metadata :
user_metadata.parameter_block_metadata()) {
RETURN_IF_NOT_OK(ParameterBlockPartitioner::PartitionFrameAligned(
partition_duration, parameter_block_metadata,
partitioned_parameter_blocks));
}
// Replace the original parameter block metadata.
user_metadata.clear_parameter_block_metadata();
for (const auto& partitioned_parameter_block : partitioned_parameter_blocks) {
*user_metadata.add_parameter_block_metadata() = partitioned_parameter_block;
}
return absl::OkStatus();
}
// Mapping from the start timestamps to lists of parameter block metadata.
typedef absl::flat_hash_map<int32_t, std::list<ParameterBlockObuMetadata>>
TimeParameterBlockMetadataMap;
absl::Status OrganizeParameterBlockMetadata(
const google::protobuf::RepeatedPtrField<ParameterBlockObuMetadata>&
parameter_block_metadata,
TimeParameterBlockMetadataMap& time_parameter_block_metadata) {
for (const auto& metadata : parameter_block_metadata) {
time_parameter_block_metadata[metadata.start_timestamp()].push_back(
metadata);
}
return absl::OkStatus();
}
absl::Status CollectLabeledSamplesForAudioElements(
const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>&
audio_elements,
WavSampleProvider& wav_sample_provider,
absl::flat_hash_map<DecodedUleb128, LabelSamplesMap>& id_to_labeled_samples,
bool& no_more_real_samples) {
for (const auto& [audio_element_id, unused_audio_element] : audio_elements) {
RETURN_IF_NOT_OK(wav_sample_provider.ReadFrames(
audio_element_id, id_to_labeled_samples[audio_element_id],
no_more_real_samples));
}
return absl::OkStatus();
}
void PrintAudioFrames(const std::list<AudioFrameWithData>& audio_frames) {
// Print the first, last, and any audio frames with `trimming_status_flag`
// set.
int i = 0;
for (const auto& audio_frame_with_data : audio_frames) {
if (i == 0 || i == audio_frames.size() - 1 ||
audio_frame_with_data.obu.header_.obu_trimming_status_flag) {
LOG(INFO) << "Audio Frame OBU[" << i << "]";
audio_frame_with_data.obu.PrintObu();
LOG(INFO) << " audio frame.start_timestamp= "
<< audio_frame_with_data.start_timestamp;
LOG(INFO) << " audio frame.end_timestamp= "
<< audio_frame_with_data.end_timestamp;
}
i++;
}
}
absl::Status CreateOutputDirectory(const std::string& output_directory) {
if (output_directory.empty() ||
std::filesystem::is_directory(output_directory) ||
std::filesystem::is_character_file(output_directory)) {
return absl::OkStatus();
}
std::error_code error_code;
if (!std::filesystem::create_directories(output_directory, error_code)) {
return absl::UnknownError(
absl::StrCat("Failed to create output directory = ", output_directory));
}
return absl::OkStatus();
}
absl::Status GenerateTemporalUnitObus(
const UserMetadata& user_metadata, const std::string& input_wav_directory,
IamfEncoder& iamf_encoder,
absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements,
std::list<AudioFrameWithData>& audio_frames,
std::list<ParameterBlockWithData>& parameter_blocks) {
auto wav_sample_provider =
WavSampleProvider::Create(user_metadata.audio_frame_metadata(),
input_wav_directory, audio_elements);
if (!wav_sample_provider.ok()) {
return wav_sample_provider.status();
}
// Parameter blocks.
TimeParameterBlockMetadataMap time_parameter_block_metadata;
RETURN_IF_NOT_OK(OrganizeParameterBlockMetadata(
user_metadata.parameter_block_metadata(), time_parameter_block_metadata));
// TODO(b/329375123): Make two while loops that run on two threads: one for
// adding samples and parameter block metadata, and one for
// outputing OBUs.
int data_obus_iteration = 0; // Just for logging purposes.
while (iamf_encoder.GeneratingDataObus()) {
LOG_EVERY_N_SEC(INFO, 5)
<< "\n\n============================= Generating Data OBUs Iter #"
<< data_obus_iteration++ << " =============================\n";
iamf_encoder.BeginTemporalUnit();
int32_t input_timestamp = 0;
RETURN_IF_NOT_OK(iamf_encoder.GetInputTimestamp(input_timestamp));
// Add audio samples.
absl::flat_hash_map<DecodedUleb128, LabelSamplesMap> id_to_labeled_samples;
bool no_more_real_samples = false;
RETURN_IF_NOT_OK(CollectLabeledSamplesForAudioElements(
audio_elements, *wav_sample_provider, id_to_labeled_samples,
no_more_real_samples));
for (const auto& [audio_element_id, labeled_samples] :
id_to_labeled_samples) {
for (const auto& [channel_label, samples] : labeled_samples) {
iamf_encoder.AddSamples(audio_element_id, channel_label, samples);
}
}
// In this program we always use up all samples from a WAV file, so we
// call `IamfEncoder::FinalizeAddSamples()` only when there is no more
// real samples. In other applications, the user may decide to stop adding
// audio samples based on other criteria.
if (no_more_real_samples) {
iamf_encoder.FinalizeAddSamples();
}
// Add parameter block metadata.
for (const auto& metadata :
time_parameter_block_metadata[input_timestamp]) {
RETURN_IF_NOT_OK(iamf_encoder.AddParameterBlockMetadata(metadata));
}
std::list<AudioFrameWithData> temp_audio_frames;
std::list<ParameterBlockWithData> temp_parameter_blocks;
IdLabeledFrameMap id_to_labeled_frame;
RETURN_IF_NOT_OK(iamf_encoder.OutputTemporalUnit(temp_audio_frames,
temp_parameter_blocks));
if (temp_audio_frames.empty()) {
// Some audio codec will only output an encoded frame after the next
// frame "pushes" the old one out. So we wait till the next iteration to
// retrieve it.
LOG(INFO) << "No audio frame generated in this iteration; continue.";
continue;
}
audio_frames.splice(audio_frames.end(), temp_audio_frames);
parameter_blocks.splice(parameter_blocks.end(), temp_parameter_blocks);
}
LOG(INFO) << "\n============================= END of Generating Data OBUs"
<< " =============================\n\n";
PrintAudioFrames(audio_frames);
return absl::OkStatus();
}
// TODO(b/390392510): Update control of output wav file bit-depth.
std::optional<uint8_t> GetOverrideBitDepth(uint32_t requested_bit_depth) {
if (requested_bit_depth == 0) {
return std::nullopt;
}
// Clamp the bit-depth to something supported by wav files.
constexpr uint32_t kMinWavFileBitDepth = 16;
constexpr uint32_t kMaxWavFileBitDepth = 32;
const uint32_t clamped_bit_depth =
std::clamp(requested_bit_depth, kMinWavFileBitDepth, kMaxWavFileBitDepth);
return static_cast<uint8_t>(clamped_bit_depth);
}
absl::Status WriteObus(
const UserMetadata& user_metadata, const std::string& output_iamf_directory,
const IASequenceHeaderObu& ia_sequence_header_obu,
const absl::flat_hash_map<uint32_t, CodecConfigObu>& codec_config_obus,
const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements,
const std::list<MixPresentationObu>& mix_presentation_obus,
const std::list<AudioFrameWithData>& audio_frames,
const std::list<ParameterBlockWithData>& parameter_blocks,
const std::list<ArbitraryObu>& arbitrary_obus) {
const bool include_temporal_delimiters =
user_metadata.temporal_delimiter_metadata().enable_temporal_delimiters();
// TODO(b/349271859): Move the OBU sequencer inside `IamfEncoder`.
auto obu_sequencers = CreateObuSequencers(
user_metadata, output_iamf_directory, include_temporal_delimiters);
for (auto& obu_sequencer : obu_sequencers) {
RETURN_IF_NOT_OK(obu_sequencer->PickAndPlace(
ia_sequence_header_obu, codec_config_obus, audio_elements,
mix_presentation_obus, audio_frames, parameter_blocks, arbitrary_obus));
}
return absl::OkStatus();
}
} // namespace
absl::Status TestMain(const UserMetadata& input_user_metadata,
const std::string& input_wav_directory,
const std::string& output_iamf_directory) {
// Make a copy before modifying.
UserMetadata user_metadata(input_user_metadata);
std::optional<IASequenceHeaderObu> ia_sequence_header_obu;
absl::flat_hash_map<uint32_t, CodecConfigObu> codec_config_obus;
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
std::list<MixPresentationObu> preliminary_mix_presentation_obus;
std::list<AudioFrameWithData> audio_frames;
std::list<ParameterBlockWithData> parameter_blocks;
std::list<ArbitraryObu> arbitrary_obus;
// Create output directories.
RETURN_IF_NOT_OK(CreateOutputDirectory(output_iamf_directory));
// Partition parameter block metadata if necessary. This will overwrite
// `user_metadata.mutable_parameter_block_metadata()`.
if (user_metadata.test_vector_metadata()
.partition_mix_gain_parameter_blocks()) {
RETURN_IF_NOT_OK(PartitionParameterMetadata(user_metadata));
}
// We want to hold the `IamfEncoder` until all OBUs have been written.
// Write the output audio streams which were used to measure loudness to the
// same directory as the IAMF file.
const std::string output_wav_file_prefix =
(std::filesystem::path(output_iamf_directory) /
user_metadata.test_vector_metadata().file_name_prefix())
.string();
const std::optional<uint8_t> override_bit_depth =
GetOverrideBitDepth(user_metadata.test_vector_metadata()
.output_wav_file_bit_depth_override());
LOG(INFO) << "output_wav_file_prefix = " << output_wav_file_prefix;
const auto ProduceAllWavWriters =
[output_wav_file_prefix, override_bit_depth](
DecodedUleb128 mix_presentation_id, int sub_mix_index,
int layout_index, const Layout&, int num_channels, int sample_rate,
int bit_depth,
size_t max_input_samples_per_frame) -> std::unique_ptr<WavWriter> {
const auto wav_path = absl::StrCat(
output_wav_file_prefix, "_rendered_id_", mix_presentation_id,
"_sub_mix_", sub_mix_index, "_layout_", layout_index, ".wav");
// Obey the override bit depth. But if it is not set, we can infer a good
// bit-depth from the input audio.
const uint8_t wav_file_bit_depth = override_bit_depth.value_or(bit_depth);
return WavWriter::Create(wav_path, num_channels, sample_rate,
wav_file_bit_depth, max_input_samples_per_frame);
};
auto iamf_encoder = IamfEncoder::Create(
user_metadata, CreateRendererFactory().get(),
CreateLoudnessCalculatorFactory().get(), ProduceAllWavWriters,
ia_sequence_header_obu, codec_config_obus, audio_elements,
preliminary_mix_presentation_obus, arbitrary_obus);
if (!iamf_encoder.ok()) {
return iamf_encoder.status();
}
// Discard the "preliminary" mix presentation OBUs. We only care about the
// finalized ones, which are not possible to know until audio encoding is
// complete.
preliminary_mix_presentation_obus.clear();
RETURN_IF_NOT_OK(GenerateTemporalUnitObus(user_metadata, input_wav_directory,
*iamf_encoder, audio_elements,
audio_frames, parameter_blocks));
// Audio encoding is complete. Retrieve the OBUs with have the finalized
// loudness information.
const auto finalized_mix_presentation_obus =
iamf_encoder->GetFinalizedMixPresentationObus();
if (!finalized_mix_presentation_obus.ok()) {
return finalized_mix_presentation_obus.status();
}
RETURN_IF_NOT_OK(WriteObus(user_metadata, output_iamf_directory,
ia_sequence_header_obu.value(), codec_config_obus,
audio_elements, *finalized_mix_presentation_obus,
audio_frames, parameter_blocks, arbitrary_obus));
return absl::OkStatus();
}
} // namespace iamf_tools