| /* |
| * Copyright (c) 2024, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 3-Clause Clear License |
| * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear |
| * License was not distributed with this source code in the LICENSE file, you |
| * can obtain it at www.aomedia.org/license/software-license/bsd-3-c-c. If the |
| * Alliance for Open Media Patent License 1.0 was not distributed with this |
| * source code in the PATENTS file, you can obtain it at |
| * www.aomedia.org/license/patent. |
| */ |
| |
| #include "iamf/cli/obu_processor.h" |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <list> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/container/flat_hash_map.h" |
| #include "absl/container/flat_hash_set.h" |
| #include "absl/log/check.h" |
| #include "absl/log/log.h" |
| #include "absl/memory/memory.h" |
| #include "absl/status/status.h" |
| #include "absl/status/statusor.h" |
| #include "absl/strings/str_cat.h" |
| #include "absl/strings/string_view.h" |
| #include "absl/types/span.h" |
| #include "iamf/cli/audio_element_with_data.h" |
| #include "iamf/cli/audio_frame_with_data.h" |
| #include "iamf/cli/cli_util.h" |
| #include "iamf/cli/demixing_module.h" |
| #include "iamf/cli/global_timing_module.h" |
| #include "iamf/cli/obu_with_data_generator.h" |
| #include "iamf/cli/parameter_block_with_data.h" |
| #include "iamf/cli/parameters_manager.h" |
| #include "iamf/cli/profile_filter.h" |
| #include "iamf/cli/renderer_factory.h" |
| #include "iamf/cli/rendering_mix_presentation_finalizer.h" |
| #include "iamf/cli/sample_processor_base.h" |
| #include "iamf/common/read_bit_buffer.h" |
| #include "iamf/common/utils/macros.h" |
| #include "iamf/common/utils/validation_utils.h" |
| #include "iamf/obu/audio_element.h" |
| #include "iamf/obu/audio_frame.h" |
| #include "iamf/obu/codec_config.h" |
| #include "iamf/obu/ia_sequence_header.h" |
| #include "iamf/obu/mix_presentation.h" |
| #include "iamf/obu/obu_header.h" |
| #include "iamf/obu/param_definition_variant.h" |
| #include "iamf/obu/parameter_block.h" |
| #include "iamf/obu/temporal_delimiter.h" |
| #include "iamf/obu/types.h" |
| |
| namespace iamf_tools { |
| |
| namespace { |
| |
| // Gets a CodecConfigObu from `read_bit_buffer` and stores it into |
| // `codec_config_obu_map`, using the `codec_config_id` as the unique key. |
| absl::Status GetAndStoreCodecConfigObu( |
| const ObuHeader& header, int64_t payload_size, |
| absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& codec_config_obu_map, |
| ReadBitBuffer& read_bit_buffer) { |
| absl::StatusOr<CodecConfigObu> codec_config_obu = |
| CodecConfigObu::CreateFromBuffer(header, payload_size, read_bit_buffer); |
| if (!codec_config_obu.ok()) { |
| return codec_config_obu.status(); |
| } |
| codec_config_obu->PrintObu(); |
| codec_config_obu_map.insert( |
| {codec_config_obu->GetCodecConfigId(), *std::move(codec_config_obu)}); |
| return absl::OkStatus(); |
| } |
| |
| absl::Status GetAndStoreAudioElementObu( |
| const ObuHeader& header, int64_t payload_size, |
| absl::flat_hash_map<DecodedUleb128, AudioElementObu>& audio_element_obu_map, |
| ReadBitBuffer& read_bit_buffer) { |
| absl::StatusOr<AudioElementObu> audio_element_obu = |
| AudioElementObu::CreateFromBuffer(header, payload_size, read_bit_buffer); |
| if (!audio_element_obu.ok()) { |
| return audio_element_obu.status(); |
| } |
| audio_element_obu->PrintObu(); |
| audio_element_obu_map.insert( |
| {audio_element_obu->GetAudioElementId(), *std::move(audio_element_obu)}); |
| return absl::OkStatus(); |
| } |
| |
| absl::Status GetAndStoreMixPresentationObu( |
| const ObuHeader& header, int64_t payload_size, |
| std::list<MixPresentationObu>& mix_presentation_obus, |
| ReadBitBuffer& read_bit_buffer) { |
| absl::StatusOr<MixPresentationObu> mix_presentation_obu = |
| MixPresentationObu::CreateFromBuffer(header, payload_size, |
| read_bit_buffer); |
| if (!mix_presentation_obu.ok()) { |
| return mix_presentation_obu.status(); |
| } |
| LOG(INFO) << "Mix Presentation OBU successfully parsed."; |
| mix_presentation_obu->PrintObu(); |
| mix_presentation_obus.push_back(*std::move(mix_presentation_obu)); |
| return absl::OkStatus(); |
| } |
| |
| absl::Status UpdateParameterStatesIfNeeded( |
| const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& |
| audio_elements_with_data, |
| const GlobalTimingModule& global_timing_module, |
| ParametersManager& parameters_manager) { |
| std::optional<InternalTimestamp> global_timestamp; |
| RETURN_IF_NOT_OK( |
| global_timing_module.GetGlobalAudioFrameTimestamp(global_timestamp)); |
| // Not ready to update the states yet. |
| if (!global_timestamp.has_value()) { |
| return absl::OkStatus(); |
| } |
| |
| // The audio frames for all audio elements are finished; update the |
| // parameters manager. |
| for (const auto& [audio_element_id, unused_element] : |
| audio_elements_with_data) { |
| RETURN_IF_NOT_OK(parameters_manager.UpdateDemixingState(audio_element_id, |
| *global_timestamp)); |
| RETURN_IF_NOT_OK(parameters_manager.UpdateReconGainState( |
| audio_element_id, *global_timestamp)); |
| } |
| return absl::OkStatus(); |
| } |
| |
| absl::Status GetAndStoreAudioFrameWithData( |
| const ObuHeader& header, const int64_t payload_size, |
| const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& |
| audio_elements_with_data, |
| const absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>& |
| substream_id_to_audio_element, |
| ReadBitBuffer& read_bit_buffer, GlobalTimingModule& global_timing_module, |
| ParametersManager& parameters_manager, |
| std::optional<AudioFrameWithData>& output_audio_frame_with_data) { |
| output_audio_frame_with_data.reset(); |
| auto audio_frame_obu = |
| AudioFrameObu::CreateFromBuffer(header, payload_size, read_bit_buffer); |
| if (!audio_frame_obu.ok()) { |
| return audio_frame_obu.status(); |
| } |
| const auto substream_id = audio_frame_obu->GetSubstreamId(); |
| const auto audio_element_iter = |
| substream_id_to_audio_element.find(substream_id); |
| if (audio_element_iter == substream_id_to_audio_element.end()) { |
| return absl::InvalidArgumentError(absl::StrCat( |
| "No audio element found having substream ID: ", substream_id)); |
| } |
| const auto& audio_element_with_data = *audio_element_iter->second; |
| auto audio_frame_with_data = ObuWithDataGenerator::GenerateAudioFrameWithData( |
| audio_element_with_data, *audio_frame_obu, global_timing_module, |
| parameters_manager); |
| if (!audio_frame_with_data.ok()) { |
| return audio_frame_with_data.status(); |
| } |
| output_audio_frame_with_data = *audio_frame_with_data; |
| |
| RETURN_IF_NOT_OK(UpdateParameterStatesIfNeeded( |
| audio_elements_with_data, global_timing_module, parameters_manager)); |
| |
| return absl::OkStatus(); |
| } |
| |
| absl::Status GetAndStoreParameterBlockWithData( |
| const ObuHeader& header, const int64_t payload_size, |
| const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>& |
| param_definition_variants, |
| ReadBitBuffer& read_bit_buffer, GlobalTimingModule& global_timing_module, |
| std::optional<ParameterBlockWithData>& output_parameter_block_with_data) { |
| auto parameter_block_obu = ParameterBlockObu::CreateFromBuffer( |
| header, payload_size, param_definition_variants, read_bit_buffer); |
| if (!parameter_block_obu.ok()) { |
| return parameter_block_obu.status(); |
| } |
| |
| std::optional<InternalTimestamp> global_timestamp; |
| RETURN_IF_NOT_OK( |
| global_timing_module.GetGlobalAudioFrameTimestamp(global_timestamp)); |
| if (!global_timestamp.has_value()) { |
| return absl::InvalidArgumentError( |
| "Global timestamp has no value while generating a parameter " |
| "block"); |
| } |
| |
| // Process the newly parsed parameter block OBU. |
| auto parameter_block_with_data = |
| ObuWithDataGenerator::GenerateParameterBlockWithData( |
| *global_timestamp, global_timing_module, |
| std::move(*parameter_block_obu)); |
| if (!parameter_block_with_data.ok()) { |
| return parameter_block_with_data.status(); |
| } |
| output_parameter_block_with_data = std::move(*parameter_block_with_data); |
| |
| return absl::OkStatus(); |
| } |
| |
| // Returns a list of pointers to the supported mix presentations. Empty if none |
| // are supported. |
| std::list<MixPresentationObu*> GetSupportedMixPresentations( |
| const absl::flat_hash_map<uint32_t, AudioElementWithData>& audio_elements, |
| std::list<MixPresentationObu>& mix_presentation_obus) { |
| // TODO(b/377554944): Support `ProfileVersion::kIamfBaseEnhancedProfile`. |
| // Only permit certain profiles to be used. |
| const absl::flat_hash_set<ProfileVersion> kSupportedProfiles = { |
| ProfileVersion::kIamfSimpleProfile, ProfileVersion::kIamfBaseProfile}; |
| |
| std::list<MixPresentationObu*> supported_mix_presentations; |
| std::string cumulative_error_message; |
| for (auto iter = mix_presentation_obus.begin(); |
| iter != mix_presentation_obus.end(); ++iter) { |
| auto profiles = kSupportedProfiles; |
| const auto status = ProfileFilter::FilterProfilesForMixPresentation( |
| audio_elements, *iter, profiles); |
| if (status.ok()) { |
| supported_mix_presentations.push_back(&*iter); |
| } |
| absl::StrAppend(&cumulative_error_message, status.message(), "\n"); |
| } |
| LOG(INFO) << "Filtered mix presentations: " << cumulative_error_message; |
| return supported_mix_presentations; |
| } |
| |
| // Searches for the desired layout in the supported mix presentations. If found, |
| // the output_playback_layout is the same as the desired_layout. Otherwise, we |
| // default to the first layout in the first unsupported mix presentation. |
| absl::StatusOr<MixPresentationObu*> GetPlaybackLayoutAndMixPresentation( |
| const std::list<MixPresentationObu*>& supported_mix_presentations, |
| const Layout& desired_layout, Layout& output_playback_layout) { |
| for (const auto& mix_presentation : supported_mix_presentations) { |
| for (const auto& sub_mix : mix_presentation->sub_mixes_) { |
| for (const auto& layout : sub_mix.layouts) { |
| if (layout.loudness_layout == desired_layout) { |
| output_playback_layout = layout.loudness_layout; |
| return mix_presentation; |
| } |
| } |
| } |
| } |
| // If we get here, we didn't find the desired layout in any of the supported |
| // mix presentations. We default to the first layout in the first mix |
| // presentation. |
| MixPresentationObu* output_mix_presentation = |
| supported_mix_presentations.front(); |
| if (output_mix_presentation->sub_mixes_.empty()) { |
| return absl::InvalidArgumentError( |
| "No submixes found in the first mix presentation."); |
| } |
| if (output_mix_presentation->sub_mixes_.front().layouts.empty()) { |
| return absl::InvalidArgumentError( |
| "No layouts found in the first submix of the first mix presentation."); |
| } |
| output_playback_layout = output_mix_presentation->sub_mixes_.front() |
| .layouts.front() |
| .loudness_layout; |
| return output_mix_presentation; |
| } |
| |
| // Resets the buffer to `start_position` and sets the `insufficient_data` |
| // flag to `true`. Clears the output maps. |
| absl::Status InsufficientDataReset( |
| ReadBitBuffer& read_bit_buffer, const int64_t start_position, |
| bool& insufficient_data, |
| absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& |
| output_codec_config_obus, |
| absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& |
| output_audio_elements_with_data, |
| std::list<MixPresentationObu>& output_mix_presentation_obus) { |
| LOG(INFO) << "Insufficient data to process all descriptor OBUs."; |
| insufficient_data = true; |
| output_codec_config_obus.clear(); |
| output_audio_elements_with_data.clear(); |
| output_mix_presentation_obus.clear(); |
| RETURN_IF_NOT_OK(read_bit_buffer.Seek(start_position)); |
| LOG(INFO) << "Reset the buffer to the beginning."; |
| return absl::ResourceExhaustedError( |
| "Insufficient data to process all descriptor OBUs. Please provide " |
| "more data and try again."); |
| } |
| |
| void GetSampleRateAndFrameSize( |
| const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& |
| output_codec_config_obus, |
| std::optional<uint32_t>& output_sample_rate, |
| std::optional<uint32_t>& output_frame_size) { |
| if (output_codec_config_obus.size() != 1) { |
| LOG(WARNING) << "Expected exactly one codec config OBUs, but found " |
| << output_codec_config_obus.size(); |
| return; |
| } |
| const auto& first_codec_config_obu = output_codec_config_obus.begin()->second; |
| output_sample_rate = first_codec_config_obu.GetOutputSampleRate(); |
| output_frame_size = first_codec_config_obu.GetNumSamplesPerFrame(); |
| } |
| |
| } // namespace |
| |
| absl::Status ObuProcessor::InitializeInternal(bool is_exhaustive_and_exact, |
| bool& output_insufficient_data) { |
| // Process the descriptor OBUs. |
| LOG(INFO) << "Starting Descriptor OBU processing"; |
| RETURN_IF_NOT_OK(ObuProcessor::ProcessDescriptorObus( |
| is_exhaustive_and_exact, *read_bit_buffer_, ia_sequence_header_, |
| codec_config_obus_, audio_elements_, mix_presentations_, |
| output_insufficient_data)); |
| LOG(INFO) << "Processed Descriptor OBUs"; |
| RETURN_IF_NOT_OK(CollectAndValidateParamDefinitions( |
| audio_elements_, mix_presentations_, param_definition_variants_)); |
| GetSampleRateAndFrameSize(codec_config_obus_, output_sample_rate_, |
| output_frame_size_); |
| // Mapping from substream IDs to pointers to audio element with data. |
| for (const auto& [audio_element_id, audio_element_with_data] : |
| audio_elements_) { |
| for (const auto& [substream_id, unused_labels] : |
| audio_element_with_data.substream_id_to_labels) { |
| auto [unused_iter, inserted] = substream_id_to_audio_element_.insert( |
| {substream_id, &audio_element_with_data}); |
| if (!inserted) { |
| return absl::InvalidArgumentError(absl::StrCat( |
| "Duplicated substream ID: ", substream_id, |
| " associated with audio element ID: ", audio_element_id)); |
| } |
| } |
| } |
| global_timing_module_ = |
| GlobalTimingModule::Create(audio_elements_, param_definition_variants_); |
| if (global_timing_module_ == nullptr) { |
| return absl::InvalidArgumentError( |
| "Failed to initialize the global timing module"); |
| } |
| parameters_manager_.emplace(audio_elements_); |
| RETURN_IF_NOT_OK(parameters_manager_->Initialize()); |
| return absl::OkStatus(); |
| } |
| |
| absl::Status ObuProcessor::ProcessDescriptorObus( |
| bool is_exhaustive_and_exact, ReadBitBuffer& read_bit_buffer, |
| IASequenceHeaderObu& output_sequence_header, |
| absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& |
| output_codec_config_obus, |
| absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& |
| output_audio_elements_with_data, |
| std::list<MixPresentationObu>& output_mix_presentation_obus, |
| bool& output_insufficient_data) { |
| // `output_insufficient_data` indicates a specific error condition and so is |
| // true iff we've received valid data but need more of it. |
| output_insufficient_data = false; |
| auto audio_element_obu_map = |
| absl::flat_hash_map<DecodedUleb128, AudioElementObu>(); |
| const int64_t global_position_before_all_obus = read_bit_buffer.Tell(); |
| bool processed_ia_header = false; |
| bool continue_processing = true; |
| while (continue_processing) { |
| auto header_metadata = |
| ObuHeader::PeekObuTypeAndTotalObuSize(read_bit_buffer); |
| if (!header_metadata.ok()) { |
| if (header_metadata.status().code() == |
| absl::StatusCode::kResourceExhausted) { |
| // Can't read header because there is not enough data. |
| return InsufficientDataReset( |
| read_bit_buffer, global_position_before_all_obus, |
| output_insufficient_data, output_codec_config_obus, |
| output_audio_elements_with_data, output_mix_presentation_obus); |
| } else { |
| // Some other error occurred, propagate it. |
| return header_metadata.status(); |
| } |
| } |
| |
| // Now, we know we were at least able to read obu_type and the total size of |
| // the obu. |
| if (ObuHeader::IsTemporalUnitObuType(header_metadata->obu_type)) { |
| if (is_exhaustive_and_exact) { |
| auto error_status = absl::InvalidArgumentError( |
| "Descriptor OBUs must not contain a temporal unit OBU when " |
| "is_exhaustive_and_exact is true."); |
| LOG(ERROR) << error_status; |
| RETURN_IF_NOT_OK(read_bit_buffer.Seek(global_position_before_all_obus)); |
| return error_status; |
| } |
| // Since it's a temporal unit, we know we are done reading descriptor |
| // OBUs. Since we've only peeked on this iteration of the loop, no need to |
| // rewind the buffer. |
| // Check that we've processed an IA header to ensure it's a valid IA |
| // Sequence. |
| if (!processed_ia_header) { |
| return absl::InvalidArgumentError( |
| "An IA Sequence and/or descriptor OBUs must always start with an " |
| "IA Header."); |
| } |
| // Break out of the while loop since we've reached the end of the |
| // descriptor OBUs; should not seek back to the beginning of the buffer |
| // since this is a successful termination. |
| break; |
| } |
| |
| // Now, we know that this is not a temporal unit OBU. |
| if (!read_bit_buffer.CanReadBytes(header_metadata->total_obu_size)) { |
| // This is a descriptor OBU for which we don't have enough data. |
| return InsufficientDataReset( |
| read_bit_buffer, global_position_before_all_obus, |
| output_insufficient_data, output_codec_config_obus, |
| output_audio_elements_with_data, output_mix_presentation_obus); |
| } |
| // Now we know we can read the entire obu. |
| const int64_t position_before_header = read_bit_buffer.Tell(); |
| ObuHeader header; |
| // Note that `payload_size` is different from the total obu size calculated |
| // by `PeekObuTypeAndTotalObuSize`. |
| int64_t payload_size; |
| RETURN_IF_NOT_OK(header.ReadAndValidate(read_bit_buffer, payload_size)); |
| switch (header.obu_type) { |
| case kObuIaSequenceHeader: { |
| if (processed_ia_header && !header.obu_redundant_copy) { |
| LOG(WARNING) << "Detected an IA Sequence without temporal units."; |
| continue_processing = false; |
| break; |
| } |
| auto ia_sequence_header_obu = IASequenceHeaderObu::CreateFromBuffer( |
| header, payload_size, read_bit_buffer); |
| if (!ia_sequence_header_obu.ok()) { |
| return ia_sequence_header_obu.status(); |
| } |
| output_sequence_header = *std::move(ia_sequence_header_obu); |
| output_sequence_header.PrintObu(); |
| processed_ia_header = true; |
| break; |
| } |
| case kObuIaCodecConfig: { |
| RETURN_IF_NOT_OK(GetAndStoreCodecConfigObu( |
| header, payload_size, output_codec_config_obus, read_bit_buffer)); |
| break; |
| } |
| case kObuIaAudioElement: { |
| RETURN_IF_NOT_OK(GetAndStoreAudioElementObu( |
| header, payload_size, audio_element_obu_map, read_bit_buffer)); |
| break; |
| } |
| case kObuIaMixPresentation: { |
| RETURN_IF_NOT_OK(GetAndStoreMixPresentationObu( |
| header, payload_size, output_mix_presentation_obus, |
| read_bit_buffer)); |
| break; |
| } |
| case kObuIaReserved24: |
| case kObuIaReserved25: |
| case kObuIaReserved26: |
| case kObuIaReserved27: |
| case kObuIaReserved28: |
| case kObuIaReserved29: |
| case kObuIaReserved30: { |
| // Reserved OBUs may occur in the sequence of Descriptor OBUs. For |
| // now, ignore any reserved OBUs by skipping over their bits in the |
| // buffer. |
| continue_processing = true; |
| LOG(INFO) << "Detected a reserved OBU while parsing Descriptor OBUs. " |
| << "Safely ignoring it."; |
| std::vector<uint8_t> buffer_to_discard(payload_size); |
| RETURN_IF_NOT_OK( |
| read_bit_buffer.ReadUint8Span(absl::MakeSpan(buffer_to_discard))); |
| break; |
| } |
| default: |
| /// TODO(b/387550488): Handle reserved OBUs. |
| continue_processing = false; |
| break; |
| } |
| if (!continue_processing) { |
| // Rewind the position to before the last header was read. |
| LOG(INFO) << "position_before_header: " << position_before_header; |
| RETURN_IF_NOT_OK(read_bit_buffer.Seek(position_before_header)); |
| } |
| if (!processed_ia_header) { |
| return absl::InvalidArgumentError( |
| "An IA Sequence and/or descriptor OBUs must always start with an IA " |
| "Header."); |
| } |
| if (is_exhaustive_and_exact && !read_bit_buffer.IsDataAvailable()) { |
| // We've reached the end of the bitstream and we've processed all |
| // descriptor OBUs. |
| break; |
| } |
| } |
| if (!audio_element_obu_map.empty()) { |
| auto audio_elements_with_data = |
| ObuWithDataGenerator::GenerateAudioElementsWithData( |
| output_codec_config_obus, audio_element_obu_map); |
| if (!audio_elements_with_data.ok()) { |
| return audio_elements_with_data.status(); |
| } |
| output_audio_elements_with_data = std::move(*audio_elements_with_data); |
| } |
| return absl::OkStatus(); |
| } |
| |
| absl::Status ObuProcessor::ProcessTemporalUnitObu( |
| const absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& |
| audio_elements_with_data, |
| const absl::flat_hash_map<DecodedUleb128, CodecConfigObu>& |
| codec_config_obus, |
| const absl::flat_hash_map<DecodedUleb128, const AudioElementWithData*>& |
| substream_id_to_audio_element, |
| const absl::flat_hash_map<DecodedUleb128, ParamDefinitionVariant>& |
| param_definition_variants, |
| ParametersManager& parameters_manager, ReadBitBuffer& read_bit_buffer, |
| GlobalTimingModule& global_timing_module, |
| std::optional<AudioFrameWithData>& output_audio_frame_with_data, |
| std::optional<ParameterBlockWithData>& output_parameter_block_with_data, |
| std::optional<TemporalDelimiterObu>& output_temporal_delimiter, |
| bool& continue_processing) { |
| continue_processing = true; |
| output_audio_frame_with_data.reset(); |
| output_parameter_block_with_data.reset(); |
| output_temporal_delimiter.reset(); |
| |
| auto header_metadata = ObuHeader::PeekObuTypeAndTotalObuSize(read_bit_buffer); |
| if (!header_metadata.ok()) { |
| if (header_metadata.status().code() == |
| absl::StatusCode::kResourceExhausted) { |
| // Can't read header because there is not enough data. This is not an |
| // error, but we're done processing for now. |
| continue_processing = false; |
| return absl::OkStatus(); |
| } else { |
| // Some other error occurred, propagate it. |
| return header_metadata.status(); |
| } |
| } |
| |
| if (!read_bit_buffer.CanReadBytes(header_metadata->total_obu_size)) { |
| // This is a temporal unit OBU for which we don't have enough data. This is |
| // not an error, but we're done processing for now. |
| continue_processing = false; |
| return absl::OkStatus(); |
| } |
| |
| const int64_t position_before_header = read_bit_buffer.Tell(); |
| |
| // Read in the header and determines the size of the payload in bytes. |
| ObuHeader header; |
| int64_t payload_size; |
| RETURN_IF_NOT_OK(header.ReadAndValidate(read_bit_buffer, payload_size)); |
| |
| // Typically we should expect {`kObuIaAudioFrameX`,`kObuIaParameterBlock`, |
| // `kObuIaTemporalDelimiter`}. We also want to detect an `kIaSequenceHeader` |
| // which would signal the start of a new IA Sequence, and to gracefully |
| // handle "reserved" OBUs. |
| switch (header.obu_type) { |
| case kObuIaAudioFrame: |
| case kObuIaAudioFrameId0: |
| case kObuIaAudioFrameId1: |
| case kObuIaAudioFrameId2: |
| case kObuIaAudioFrameId3: |
| case kObuIaAudioFrameId4: |
| case kObuIaAudioFrameId5: |
| case kObuIaAudioFrameId6: |
| case kObuIaAudioFrameId7: |
| case kObuIaAudioFrameId8: |
| case kObuIaAudioFrameId9: |
| case kObuIaAudioFrameId10: |
| case kObuIaAudioFrameId11: |
| case kObuIaAudioFrameId12: |
| case kObuIaAudioFrameId13: |
| case kObuIaAudioFrameId14: |
| case kObuIaAudioFrameId15: |
| case kObuIaAudioFrameId16: |
| case kObuIaAudioFrameId17: { |
| RETURN_IF_NOT_OK(GetAndStoreAudioFrameWithData( |
| header, payload_size, audio_elements_with_data, |
| substream_id_to_audio_element, read_bit_buffer, global_timing_module, |
| parameters_manager, output_audio_frame_with_data)); |
| break; |
| } |
| case kObuIaParameterBlock: { |
| RETURN_IF_NOT_OK(GetAndStoreParameterBlockWithData( |
| header, payload_size, param_definition_variants, read_bit_buffer, |
| global_timing_module, output_parameter_block_with_data)); |
| break; |
| } |
| case kObuIaTemporalDelimiter: { |
| // This implementation does not process by temporal unit. Safely ignore |
| // it. |
| const auto& temporal_delimiter = TemporalDelimiterObu::CreateFromBuffer( |
| header, payload_size, read_bit_buffer); |
| if (!temporal_delimiter.ok()) { |
| return temporal_delimiter.status(); |
| } |
| output_temporal_delimiter = *temporal_delimiter; |
| break; |
| } |
| case kObuIaSequenceHeader: |
| if (!header.obu_redundant_copy) { |
| // OK. The user of this function will need to reconfigure its state to |
| // process the next IA sequence. |
| LOG(INFO) << "Detected the start of the next IA Sequence."; |
| continue_processing = false; |
| break; |
| } |
| // Ok for any IAMF v1.1.0 descriptor OBUs we can skip over redundant |
| // copies. |
| [[fallthrough]]; |
| case kObuIaCodecConfig: |
| case kObuIaAudioElement: |
| case kObuIaMixPresentation: |
| if (!header.obu_redundant_copy) { |
| return absl::InvalidArgumentError(absl::StrCat( |
| "Unexpected non-reserved OBU obu_type= ", header.obu_type)); |
| } |
| // Consume and discard the OBU. IAMF allows us to ignore it (even if the |
| // redundant flag is misleading). |
| [[fallthrough]]; |
| default: |
| // TODO(b/329705373): Read in the data as an `ArbitraryOBU` and output |
| // it from this function. |
| LOG(INFO) << "Detected a reserved or redundant OBU. Safely ignoring it."; |
| std::vector<uint8_t> buffer_to_discard(payload_size); |
| RETURN_IF_NOT_OK( |
| read_bit_buffer.ReadUint8Span(absl::MakeSpan(buffer_to_discard))); |
| break; |
| } |
| |
| if (!continue_processing) { |
| // Rewind the position to before the last header was read. |
| LOG(INFO) << "position_before_header: " << position_before_header; |
| RETURN_IF_NOT_OK(read_bit_buffer.Seek(position_before_header)); |
| } |
| |
| return absl::OkStatus(); |
| } |
| |
| std::unique_ptr<ObuProcessor> ObuProcessor::Create( |
| bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer, |
| bool& output_insufficient_data) { |
| // `output_insufficient_data` indicates a specific error condition and so is |
| // true iff we've received valid data but need more of it. |
| output_insufficient_data = false; |
| if (read_bit_buffer == nullptr) { |
| return nullptr; |
| } |
| std::unique_ptr<ObuProcessor> obu_processor = |
| absl::WrapUnique(new ObuProcessor(read_bit_buffer)); |
| if (const auto status = obu_processor->InitializeInternal( |
| is_exhaustive_and_exact, output_insufficient_data); |
| !status.ok()) { |
| LOG(ERROR) << status; |
| return nullptr; |
| } |
| return obu_processor; |
| } |
| |
| std::unique_ptr<ObuProcessor> ObuProcessor::CreateForRendering( |
| const Layout& desired_layout, |
| const RenderingMixPresentationFinalizer::SampleProcessorFactory& |
| sample_processor_factory, |
| bool is_exhaustive_and_exact, ReadBitBuffer* read_bit_buffer, |
| Layout& output_layout, bool& output_insufficient_data) { |
| // `output_insufficient_data` indicates a specific error condition and so is |
| // true iff we've received valid data but need more of it. |
| output_insufficient_data = false; |
| if (read_bit_buffer == nullptr) { |
| return nullptr; |
| } |
| std::unique_ptr<ObuProcessor> obu_processor = |
| absl::WrapUnique(new ObuProcessor(read_bit_buffer)); |
| if (const auto status = obu_processor->InitializeInternal( |
| is_exhaustive_and_exact, output_insufficient_data); |
| !status.ok()) { |
| LOG(ERROR) << status; |
| return nullptr; |
| } |
| |
| if (const auto status = obu_processor->InitializeForRendering( |
| desired_layout, sample_processor_factory, output_layout); |
| !status.ok()) { |
| LOG(ERROR) << status; |
| return nullptr; |
| } |
| return obu_processor; |
| } |
| |
| absl::StatusOr<uint32_t> ObuProcessor::GetOutputSampleRate() const { |
| RETURN_IF_NOT_OK( |
| ValidateHasValue(output_sample_rate_, |
| "Output sample rate, was this a trivial IA Sequence?")); |
| return *output_sample_rate_; |
| } |
| |
| absl::StatusOr<uint32_t> ObuProcessor::GetOutputFrameSize() const { |
| RETURN_IF_NOT_OK( |
| ValidateHasValue(output_frame_size_, |
| "Output frame size, was this a trivial IA Sequence?")); |
| return *output_frame_size_; |
| } |
| |
| absl::Status ObuProcessor::InitializeForRendering( |
| const Layout& desired_layout, |
| const RenderingMixPresentationFinalizer::SampleProcessorFactory& |
| sample_processor_factory, |
| Layout& output_layout) { |
| if (mix_presentations_.empty()) { |
| return absl::InvalidArgumentError("No mix presentation OBUs found."); |
| } |
| if (audio_elements_.empty()) { |
| return absl::InvalidArgumentError("No audio element OBUs found."); |
| } |
| |
| // TODO(b/377747704): Decode only the frames selected for the playback |
| // layout. |
| audio_frame_decoder_.emplace(); |
| for (const auto& [unused_id, audio_element_with_data] : audio_elements_) { |
| RETURN_IF_NOT_OK(audio_frame_decoder_->InitDecodersForSubstreams( |
| audio_element_with_data.substream_id_to_labels, |
| *audio_element_with_data.codec_config)); |
| } |
| { |
| auto temp_demixing_module = |
| DemixingModule::CreateForReconstruction(audio_elements_); |
| if (!temp_demixing_module.ok()) { |
| return temp_demixing_module.status(); |
| } |
| demixing_module_.emplace(*std::move(temp_demixing_module)); |
| } |
| |
| // TODO(b/340289717): Add a way to select the mix presentation if multiple |
| // are supported. |
| const std::list<MixPresentationObu*> supported_mix_presentations = |
| GetSupportedMixPresentations(audio_elements_, mix_presentations_); |
| if (supported_mix_presentations.empty()) { |
| return absl::NotFoundError("No supported mix presentation OBUs found."); |
| } |
| Layout playback_layout; |
| auto mix_presentation_to_render = GetPlaybackLayoutAndMixPresentation( |
| supported_mix_presentations, desired_layout, output_layout); |
| if (!mix_presentation_to_render.ok()) { |
| return mix_presentation_to_render.status(); |
| } |
| int playback_sub_mix_index; |
| int playback_layout_index; |
| RETURN_IF_NOT_OK(GetIndicesForLayout( |
| (*mix_presentation_to_render)->sub_mixes_, output_layout, |
| playback_sub_mix_index, playback_layout_index)); |
| decoding_layout_info_ = { |
| .mix_presentation_id = |
| (*mix_presentation_to_render)->GetMixPresentationId(), |
| .sub_mix_index = playback_sub_mix_index, |
| .layout_index = playback_layout_index, |
| }; |
| auto forward_on_desired_layout = |
| [&sample_processor_factory, mix_presentation_to_render, |
| playback_sub_mix_index, playback_layout_index]( |
| DecodedUleb128 mix_presentation_id, int sub_mix_index, |
| int layout_index, const Layout& layout, int num_channels, |
| int sample_rate, int bit_depth, size_t max_input_samples_per_frame) |
| -> std::unique_ptr<SampleProcessorBase> { |
| if (mix_presentation_id == |
| (*mix_presentation_to_render)->GetMixPresentationId() && |
| playback_sub_mix_index == sub_mix_index && |
| playback_layout_index == layout_index) { |
| return sample_processor_factory( |
| mix_presentation_id, sub_mix_index, layout_index, layout, |
| num_channels, sample_rate, bit_depth, max_input_samples_per_frame); |
| } |
| return nullptr; |
| }; |
| |
| // Create the mix presentation finalizer which is used to render the output |
| // files. We neither trust the user-provided loudness, nor care about the |
| // calculated loudness. |
| const RendererFactory renderer_factory; |
| absl::StatusOr<RenderingMixPresentationFinalizer> mix_presentation_finalizer = |
| RenderingMixPresentationFinalizer::Create( |
| /*renderer_factory=*/&renderer_factory, |
| /*loudness_calculator_factory=*/nullptr, audio_elements_, |
| forward_on_desired_layout, mix_presentations_); |
| if (!mix_presentation_finalizer.ok()) { |
| return mix_presentation_finalizer.status(); |
| } |
| mix_presentation_finalizer_.emplace(*std::move(mix_presentation_finalizer)); |
| |
| return absl::OkStatus(); |
| } |
| |
| absl::Status ObuProcessor::ProcessTemporalUnitObu( |
| std::optional<AudioFrameWithData>& output_audio_frame_with_data, |
| std::optional<ParameterBlockWithData>& output_parameter_block_with_data, |
| std::optional<TemporalDelimiterObu>& output_temporal_delimiter, |
| bool& continue_processing) { |
| if (!parameters_manager_.has_value()) { |
| return absl::InvalidArgumentError( |
| "Parameters manager is not constructed; " |
| "remember to call `Initialize()` first."); |
| } |
| if (global_timing_module_ == nullptr) { |
| return absl::InvalidArgumentError( |
| "Global timing module is not constructed; " |
| "remember to call `Initialize()` first."); |
| } |
| if (read_bit_buffer_ == nullptr) { |
| return absl::InvalidArgumentError( |
| "Read bit buffer is not constructed; " |
| "remember to call `Initialize()` first."); |
| } |
| |
| return ObuProcessor::ProcessTemporalUnitObu( |
| audio_elements_, codec_config_obus_, substream_id_to_audio_element_, |
| param_definition_variants_, *parameters_manager_, *read_bit_buffer_, |
| *global_timing_module_, output_audio_frame_with_data, |
| output_parameter_block_with_data, output_temporal_delimiter, |
| continue_processing); |
| } |
| |
| absl::Status ObuProcessor::ProcessTemporalUnit( |
| bool eos_is_end_of_sequence, |
| std::optional<OutputTemporalUnit>& output_temporal_unit, |
| bool& continue_processing) { |
| continue_processing = true; |
| while (continue_processing) { |
| std::optional<AudioFrameWithData> audio_frame_with_data; |
| std::optional<ParameterBlockWithData> parameter_block_with_data; |
| std::optional<TemporalDelimiterObu> temporal_delimiter; |
| RETURN_IF_NOT_OK( |
| ProcessTemporalUnitObu(audio_frame_with_data, parameter_block_with_data, |
| temporal_delimiter, continue_processing)); |
| |
| // Collect OBUs into a temporal unit. |
| if (audio_frame_with_data.has_value()) { |
| TemporalUnitData::AddDataToCorrectTemporalUnit( |
| current_temporal_unit_, next_temporal_unit_, |
| *std::move(audio_frame_with_data)); |
| } else if (parameter_block_with_data.has_value()) { |
| TemporalUnitData::AddDataToCorrectTemporalUnit( |
| current_temporal_unit_, next_temporal_unit_, |
| *std::move(parameter_block_with_data)); |
| } else if (temporal_delimiter.has_value()) { |
| current_temporal_unit_.temporal_delimiter = *temporal_delimiter; |
| } |
| |
| // The current temporal unit is considered finished if any of the |
| // following conditions is met: |
| // - The end of sequence is reached. |
| // - The timestamp has advanced (i.e. when the next temporal unit gets its |
| // timestamp). |
| // - A temporal delimiter is encountered. |
| if ((!continue_processing && eos_is_end_of_sequence) || |
| next_temporal_unit_.timestamp.has_value() || |
| current_temporal_unit_.temporal_delimiter.has_value()) { |
| output_temporal_unit = OutputTemporalUnit(); |
| output_temporal_unit->output_audio_frames = |
| std::move(current_temporal_unit_.audio_frames); |
| output_temporal_unit->output_parameter_blocks = |
| std::move(current_temporal_unit_.parameter_blocks); |
| if (current_temporal_unit_.timestamp.has_value()) { |
| output_temporal_unit->output_timestamp = |
| current_temporal_unit_.timestamp.value(); |
| } |
| current_temporal_unit_ = std::move(next_temporal_unit_); |
| next_temporal_unit_ = TemporalUnitData(); |
| break; |
| } |
| } |
| |
| return absl::OkStatus(); |
| } |
| |
| absl::Status ObuProcessor::RenderTemporalUnitAndMeasureLoudness( |
| InternalTimestamp start_timestamp, |
| const std::list<AudioFrameWithData>& audio_frames, |
| const std::list<ParameterBlockWithData>& parameter_blocks, |
| absl::Span<const std::vector<int32_t>>& output_rendered_pcm_samples) { |
| if (audio_frames.empty()) { |
| // Nothing to decode, render, or measure loudness of. |
| return absl::OkStatus(); |
| } |
| |
| if (!audio_frame_decoder_.has_value()) { |
| return absl::InvalidArgumentError( |
| "Audio frame decoder is not constructed; " |
| "remember to call `InitializeForRendering()` first."); |
| } |
| if (!demixing_module_.has_value()) { |
| return absl::InvalidArgumentError( |
| "Demxing module is not constructed; " |
| "remember to call `InitializeForRendering()` first."); |
| } |
| if (!mix_presentation_finalizer_.has_value()) { |
| return absl::InvalidArgumentError( |
| "Mix presentation finalizer is not constructed; " |
| "remember to call `InitializeForRendering()` first."); |
| } |
| |
| // Decode the temporal unit. |
| std::optional<InternalTimestamp> end_timestamp; |
| |
| // This resizing should happen only once per IA sequence, since all the |
| // temporal units should contain the same number of audio frames. |
| decoded_frames_for_temporal_unit_.resize(audio_frames.size()); |
| auto decoded_frames_iter = decoded_frames_for_temporal_unit_.begin(); |
| for (const auto& audio_frame : audio_frames) { |
| if (!end_timestamp.has_value()) { |
| end_timestamp = audio_frame.end_timestamp; |
| } |
| RETURN_IF_NOT_OK( |
| CompareTimestamps(start_timestamp, audio_frame.start_timestamp, |
| "Audio frame has a different start timestamp than " |
| "the temporal unit: ")); |
| RETURN_IF_NOT_OK(CompareTimestamps(*end_timestamp, |
| audio_frame.end_timestamp, |
| "Audio frame has a different end " |
| "timestamp than the temporal unit: ")); |
| auto decoded_frame = audio_frame_decoder_->Decode(audio_frame); |
| if (!decoded_frame.ok()) { |
| return decoded_frame.status(); |
| } |
| *decoded_frames_iter = std::move(*decoded_frame); |
| decoded_frames_iter++; |
| } |
| |
| // Reconstruct the temporal unit and store the result in the output map. |
| const auto decoded_labeled_frames_for_temporal_unit = |
| demixing_module_->DemixDecodedAudioSamples( |
| decoded_frames_for_temporal_unit_); |
| if (!decoded_labeled_frames_for_temporal_unit.ok()) { |
| return decoded_labeled_frames_for_temporal_unit.status(); |
| } |
| |
| RETURN_IF_NOT_OK(mix_presentation_finalizer_->PushTemporalUnit( |
| *decoded_labeled_frames_for_temporal_unit, start_timestamp, |
| *end_timestamp, parameter_blocks)); |
| |
| auto rendered_samples = |
| mix_presentation_finalizer_->GetPostProcessedSamplesAsSpan( |
| decoding_layout_info_.mix_presentation_id, |
| decoding_layout_info_.sub_mix_index, |
| decoding_layout_info_.layout_index); |
| if (!rendered_samples.ok()) { |
| return rendered_samples.status(); |
| } |
| output_rendered_pcm_samples = *rendered_samples; |
| |
| // TODO(b/379122580): Add a call to `FinalizePushingTemporalUnits`, then a |
| // final call to `GetPostProcessedSamplesAsSpan` when there |
| // are no more temporal units to push. Those calls may |
| // belong elsewhere in the class depending on the |
| // interface. |
| |
| return absl::OkStatus(); |
| } |
| |
| } // namespace iamf_tools |