| #include "iamf/cli/iamf_encoder.h" |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <filesystem> |
| #include <list> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/container/flat_hash_map.h" |
| #include "absl/log/log.h" |
| #include "absl/status/status_matchers.h" |
| #include "absl/strings/string_view.h" |
| #include "gmock/gmock.h" |
| #include "gtest/gtest.h" |
| #include "iamf/cli/audio_element_with_data.h" |
| #include "iamf/cli/audio_frame_with_data.h" |
| #include "iamf/cli/channel_label.h" |
| #include "iamf/cli/demixing_module.h" |
| #include "iamf/cli/iamf_components.h" |
| #include "iamf/cli/iamf_encoder.h" |
| #include "iamf/cli/loudness_calculator_factory_base.h" |
| #include "iamf/cli/parameter_block_with_data.h" |
| #include "iamf/cli/proto/arbitrary_obu.pb.h" |
| #include "iamf/cli/proto/audio_element.pb.h" |
| #include "iamf/cli/proto/codec_config.pb.h" |
| #include "iamf/cli/proto/ia_sequence_header.pb.h" |
| #include "iamf/cli/proto/mix_presentation.pb.h" |
| #include "iamf/cli/proto/test_vector_metadata.pb.h" |
| #include "iamf/cli/proto/user_metadata.pb.h" |
| #include "iamf/cli/renderer_factory.h" |
| #include "iamf/cli/rendering_mix_presentation_finalizer.h" |
| #include "iamf/cli/tests/cli_test_utils.h" |
| #include "iamf/cli/user_metadata_builder/audio_element_metadata_builder.h" |
| #include "iamf/cli/user_metadata_builder/iamf_input_layout.h" |
| #include "iamf/cli/wav_writer.h" |
| #include "iamf/obu/arbitrary_obu.h" |
| #include "iamf/obu/codec_config.h" |
| #include "iamf/obu/ia_sequence_header.h" |
| #include "iamf/obu/mix_presentation.h" |
| #include "iamf/obu/types.h" |
| #include "src/google/protobuf/text_format.h" |
| |
| namespace iamf_tools { |
| namespace { |
| |
| using ::absl_testing::IsOk; |
| using ::absl_testing::IsOkAndHolds; |
| using ::iamf_tools_cli_proto::UserMetadata; |
| using ::testing::_; |
| using ::testing::Contains; |
| using ::testing::IsEmpty; |
| using ::testing::Not; |
| using ::testing::Return; |
| |
| constexpr DecodedUleb128 kCodecConfigId = 200; |
| constexpr DecodedUleb128 kAudioElementId = 300; |
| constexpr uint32_t kNumSamplesPerFrame = 8; |
| constexpr int kExpectedPcmBitDepth = 16; |
| |
| const auto kOmitOutputWavFiles = |
| RenderingMixPresentationFinalizer::ProduceNoSampleProcessors; |
| |
| void AddIaSequenceHeader(UserMetadata& user_metadata) { |
| ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString( |
| R"pb( |
| primary_profile: PROFILE_VERSION_SIMPLE |
| additional_profile: PROFILE_VERSION_BASE |
| )pb", |
| user_metadata.add_ia_sequence_header_metadata())); |
| } |
| |
| void AddCodecConfig(UserMetadata& user_metadata) { |
| auto* new_codec_config = user_metadata.add_codec_config_metadata(); |
| ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString( |
| R"pb( |
| codec_config_id: 200 |
| codec_config { |
| codec_id: CODEC_ID_LPCM |
| num_samples_per_frame: 8 |
| audio_roll_distance: 0 |
| decoder_config_lpcm { |
| sample_format_flags: LPCM_LITTLE_ENDIAN |
| sample_rate: 48000 |
| } |
| } |
| )pb", |
| new_codec_config)); |
| new_codec_config->mutable_codec_config() |
| ->mutable_decoder_config_lpcm() |
| ->set_sample_size(kExpectedPcmBitDepth); |
| } |
| |
| void AddAudioElement(UserMetadata& user_metadata) { |
| AudioElementMetadataBuilder builder; |
| ASSERT_THAT(builder.PopulateAudioElementMetadata( |
| kAudioElementId, kCodecConfigId, IamfInputLayout::kStereo, |
| *user_metadata.add_audio_element_metadata()), |
| IsOk()); |
| } |
| |
| void AddMixPresentation(UserMetadata& user_metadata) { |
| ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString( |
| R"pb( |
| mix_presentation_id: 42 |
| count_label: 0 |
| sub_mixes { |
| audio_elements { |
| audio_element_id: 300 |
| rendering_config { |
| headphones_rendering_mode: HEADPHONES_RENDERING_MODE_STEREO |
| } |
| element_mix_gain { |
| param_definition { |
| parameter_id: 100 |
| parameter_rate: 16000 |
| param_definition_mode: 1 |
| reserved: 0 |
| } |
| default_mix_gain: 0 |
| } |
| } |
| output_mix_gain { |
| param_definition { |
| parameter_id: 100 |
| parameter_rate: 16000 |
| param_definition_mode: 1 |
| reserved: 0 |
| } |
| default_mix_gain: 0 |
| } |
| layouts { |
| loudness_layout { |
| layout_type: LAYOUT_TYPE_LOUDSPEAKERS_SS_CONVENTION |
| ss_layout { sound_system: SOUND_SYSTEM_A_0_2_0 reserved: 0 } |
| } |
| loudness { |
| info_type_bit_masks: [] |
| integrated_loudness: 0 |
| digital_peak: 0 |
| } |
| } |
| } |
| )pb", |
| user_metadata.add_mix_presentation_metadata())); |
| } |
| |
| void AddArbitraryObu(UserMetadata& user_metadata) { |
| ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString( |
| R"pb( |
| insertion_hook: INSERTION_HOOK_AFTER_AUDIO_ELEMENTS |
| obu_type: OBU_IA_RESERVED_26 |
| payload: "Imaginary descriptor OBU between the audio element and mix presentation." |
| )pb", |
| user_metadata.add_arbitrary_obu_metadata())); |
| } |
| |
| void AddAudioFrame(UserMetadata& user_metadata) { |
| ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString( |
| R"pb( |
| samples_to_trim_at_end: 0 |
| samples_to_trim_at_start: 0 |
| audio_element_id: 300 |
| channel_ids: [ 0, 1 ] |
| channel_labels: [ "L2", "R2" ] |
| )pb", |
| user_metadata.add_audio_frame_metadata())); |
| } |
| |
| void AddParameterBlockAtTimestamp(InternalTimestamp start_timestamp, |
| UserMetadata& user_metadata) { |
| auto* metadata = user_metadata.add_parameter_block_metadata(); |
| ASSERT_TRUE(google::protobuf::TextFormat::ParseFromString( |
| R"pb( |
| parameter_id: 100 |
| duration: 8 |
| num_subblocks: 1 |
| constant_subblock_duration: 8 |
| subblocks: |
| [ { |
| mix_gain_parameter_data { |
| animation_type: ANIMATE_STEP |
| param_data { step { start_point_value: 0 } } |
| } |
| }] |
| )pb", |
| metadata)); |
| |
| // Overwrite `start_timestamp`. |
| metadata->set_start_timestamp(start_timestamp); |
| } |
| |
| std::string GetFirstSubmixFirstLayoutExpectedPath( |
| absl::string_view output_directory) { |
| return (std::filesystem::path(output_directory) / |
| std::filesystem::path("first_file.wav")) |
| .string(); |
| } |
| |
| auto GetWavWriterFactoryThatProducesFirstSubMixFirstLayout( |
| absl::string_view output_directory) { |
| const std::string output_wav_path = |
| GetFirstSubmixFirstLayoutExpectedPath(output_directory); |
| return [output_wav_path]( |
| DecodedUleb128 mix_presentation_id, int sub_mix_index, |
| int layout_index, const Layout&, int num_channels, int sample_rate, |
| int bit_depth, |
| size_t num_samples_per_frame) -> std::unique_ptr<WavWriter> { |
| if (sub_mix_index != 0 || layout_index != 0) { |
| return nullptr; |
| } |
| |
| return WavWriter::Create(output_wav_path, num_channels, sample_rate, |
| bit_depth, num_samples_per_frame); |
| }; |
| } |
| |
| class IamfEncoderTest : public ::testing::Test { |
| protected: |
| void SetupDescriptorObus() { |
| AddIaSequenceHeader(user_metadata_); |
| AddCodecConfig(user_metadata_); |
| AddAudioElement(user_metadata_); |
| AddMixPresentation(user_metadata_); |
| } |
| |
| IamfEncoder CreateExpectOk() { |
| auto iamf_encoder = IamfEncoder::Create( |
| user_metadata_, renderer_factory_.get(), |
| loudness_calculator_factory_.get(), sample_processor_factory_, |
| ia_sequence_header_obu_, codec_config_obus_, audio_elements_, |
| mix_presentation_obus_, arbitrary_obus_); |
| EXPECT_THAT(iamf_encoder, IsOk()); |
| return std::move(*iamf_encoder); |
| } |
| |
| UserMetadata user_metadata_; |
| std::optional<IASequenceHeaderObu> ia_sequence_header_obu_; |
| absl::flat_hash_map<uint32_t, CodecConfigObu> codec_config_obus_; |
| absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements_; |
| std::list<MixPresentationObu> mix_presentation_obus_; |
| std::list<ArbitraryObu> arbitrary_obus_; |
| // Default some dependencies to be based on the real `IamfComponents` |
| // implementations. And generally disable wav writing since it is not needed |
| // for most tests. |
| std::unique_ptr<RendererFactoryBase> renderer_factory_ = |
| CreateRendererFactory(); |
| std::unique_ptr<LoudnessCalculatorFactoryBase> loudness_calculator_factory_ = |
| CreateLoudnessCalculatorFactory(); |
| RenderingMixPresentationFinalizer::SampleProcessorFactory |
| sample_processor_factory_ = kOmitOutputWavFiles; |
| }; |
| |
| TEST_F(IamfEncoderTest, CreateFailsOnEmptyUserMetadata) { |
| user_metadata_.Clear(); |
| |
| EXPECT_FALSE(IamfEncoder::Create(user_metadata_, renderer_factory_.get(), |
| loudness_calculator_factory_.get(), |
| sample_processor_factory_, |
| ia_sequence_header_obu_, codec_config_obus_, |
| audio_elements_, mix_presentation_obus_, |
| arbitrary_obus_) |
| .ok()); |
| } |
| |
| TEST_F(IamfEncoderTest, CreateGeneratesDescriptorObus) { |
| SetupDescriptorObus(); |
| auto iamf_encoder = CreateExpectOk(); |
| |
| EXPECT_TRUE(ia_sequence_header_obu_.has_value()); |
| EXPECT_EQ(codec_config_obus_.size(), 1); |
| EXPECT_EQ(audio_elements_.size(), 1); |
| EXPECT_EQ(mix_presentation_obus_.size(), 1); |
| EXPECT_TRUE(arbitrary_obus_.empty()); |
| } |
| |
| TEST_F(IamfEncoderTest, CreateGeneratesArbitraryObus) { |
| SetupDescriptorObus(); |
| AddArbitraryObu(user_metadata_); |
| |
| auto iamf_encoder = CreateExpectOk(); |
| |
| EXPECT_EQ(arbitrary_obus_.size(), 1); |
| } |
| |
| TEST_F(IamfEncoderTest, BuildInformationTagIsPresentByDefault) { |
| SetupDescriptorObus(); |
| |
| auto iamf_encoder = CreateExpectOk(); |
| ASSERT_FALSE(mix_presentation_obus_.empty()); |
| |
| // We don't care which slot the build information tag is in. But we want it to |
| // be present by default, to help with debugging. |
| const auto& first_obu_tags = |
| mix_presentation_obus_.front().mix_presentation_tags_; |
| ASSERT_TRUE(first_obu_tags.has_value()); |
| EXPECT_THAT(first_obu_tags->tags, Contains(TagMatchesBuildInformation())); |
| } |
| |
| TEST_F(IamfEncoderTest, GenerateDataObusTwoIterationsSucceeds) { |
| SetupDescriptorObus(); |
| AddAudioFrame(user_metadata_); |
| AddParameterBlockAtTimestamp(0, user_metadata_); |
| AddParameterBlockAtTimestamp(8, user_metadata_); |
| auto iamf_encoder = CreateExpectOk(); |
| |
| // Temporary variables for one iteration. |
| const std::vector<InternalSampleType> zero_samples(kNumSamplesPerFrame, 0.0); |
| std::list<AudioFrameWithData> temp_audio_frames; |
| std::list<ParameterBlockWithData> temp_parameter_blocks; |
| IdLabeledFrameMap id_to_labeled_frame; |
| int iteration = 0; |
| while (iamf_encoder.GeneratingDataObus()) { |
| iamf_encoder.BeginTemporalUnit(); |
| iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kL2, zero_samples); |
| iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kR2, zero_samples); |
| |
| // Signal stopping adding samples at the second iteration. |
| if (iteration == 1) { |
| iamf_encoder.FinalizeAddSamples(); |
| } |
| |
| EXPECT_THAT(iamf_encoder.AddParameterBlockMetadata( |
| user_metadata_.parameter_block_metadata(iteration)), |
| IsOk()); |
| |
| // Output. |
| EXPECT_THAT(iamf_encoder.OutputTemporalUnit(temp_audio_frames, |
| temp_parameter_blocks), |
| IsOk()); |
| EXPECT_EQ(temp_audio_frames.size(), 1); |
| EXPECT_EQ(temp_parameter_blocks.size(), 1); |
| EXPECT_EQ(temp_audio_frames.front().start_timestamp, |
| iteration * kNumSamplesPerFrame); |
| |
| iteration++; |
| } |
| |
| EXPECT_EQ(iteration, 2); |
| } |
| |
| TEST_F(IamfEncoderTest, SafeToUseAfterMove) { |
| SetupDescriptorObus(); |
| AddAudioFrame(user_metadata_); |
| AddParameterBlockAtTimestamp(0, user_metadata_); |
| AddParameterBlockAtTimestamp(8, user_metadata_); |
| auto iamf_encoder_to_move_from = CreateExpectOk(); |
| |
| // Move the encoder, and use it. |
| IamfEncoder iamf_encoder = std::move(iamf_encoder_to_move_from); |
| |
| // Use many parts of the API, to make sure the move did not break anything. |
| EXPECT_TRUE(iamf_encoder.GeneratingDataObus()); |
| iamf_encoder.BeginTemporalUnit(); |
| const std::vector<InternalSampleType> kZeroSamples(kNumSamplesPerFrame, 0.0); |
| iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kL2, kZeroSamples); |
| iamf_encoder.AddSamples(kAudioElementId, ChannelLabel::kR2, kZeroSamples); |
| EXPECT_THAT(iamf_encoder.AddParameterBlockMetadata( |
| user_metadata_.parameter_block_metadata(0)), |
| IsOk()); |
| iamf_encoder.FinalizeAddSamples(); |
| std::list<AudioFrameWithData> temp_audio_frames; |
| std::list<ParameterBlockWithData> temp_parameter_blocks; |
| IdLabeledFrameMap id_to_labeled_frame; |
| EXPECT_THAT( |
| iamf_encoder.OutputTemporalUnit(temp_audio_frames, temp_parameter_blocks), |
| IsOk()); |
| EXPECT_EQ(temp_audio_frames.size(), 1); |
| EXPECT_EQ(temp_parameter_blocks.size(), 1); |
| } |
| |
| TEST_F(IamfEncoderTest, FinalizeMixPresentationObusSucceeds) { |
| SetupDescriptorObus(); |
| auto iamf_encoder = CreateExpectOk(); |
| |
| iamf_encoder.FinalizeAddSamples(); |
| |
| EXPECT_THAT(iamf_encoder.GetFinalizedMixPresentationObus(), IsOk()); |
| } |
| |
| TEST_F(IamfEncoderTest, CallingFinalizeMixPresentationObusTwiceFails) { |
| SetupDescriptorObus(); |
| auto iamf_encoder = CreateExpectOk(); |
| iamf_encoder.FinalizeAddSamples(); |
| |
| // The first call is OK. |
| EXPECT_THAT(iamf_encoder.GetFinalizedMixPresentationObus(), IsOk()); |
| |
| EXPECT_FALSE(iamf_encoder.GetFinalizedMixPresentationObus().ok()); |
| } |
| |
| TEST_F(IamfEncoderTest, |
| FinalizeMixPresentationObusDefaultsToPreservingUserLoudness) { |
| SetupDescriptorObus(); |
| // Configuring the encoder with null factories is permitted, which disables |
| // rendering and loudness measurements. |
| renderer_factory_ = nullptr; |
| loudness_calculator_factory_ = nullptr; |
| auto iamf_encoder = CreateExpectOk(); |
| const auto original_loudness = mix_presentation_obus_.front() |
| .sub_mixes_.front() |
| .layouts.front() |
| .loudness; |
| iamf_encoder.FinalizeAddSamples(); |
| |
| const auto finalized_mix_presentation_obus = |
| iamf_encoder.GetFinalizedMixPresentationObus(); |
| ASSERT_THAT(finalized_mix_presentation_obus, IsOk()); |
| |
| EXPECT_EQ(finalized_mix_presentation_obus->front() |
| .sub_mixes_.front() |
| .layouts.front() |
| .loudness, |
| original_loudness); |
| } |
| |
| TEST_F(IamfEncoderTest, |
| FinalizeMixPresentationObusFailsBeforeGeneratingDataObusIsFinished) { |
| SetupDescriptorObus(); |
| AddAudioFrame(user_metadata_); |
| auto iamf_encoder = CreateExpectOk(); |
| |
| // The encoder is still generating data OBUs, so it's not possible to know the |
| // final loudness values. |
| ASSERT_TRUE(iamf_encoder.GeneratingDataObus()); |
| |
| EXPECT_FALSE(iamf_encoder.GetFinalizedMixPresentationObus().ok()); |
| } |
| |
| TEST_F(IamfEncoderTest, FinalizeMixPresentationObuFillsInLoudness) { |
| SetupDescriptorObus(); |
| // Loudness measurement is done only when the signal can be rendered, and |
| // based on the resultant loudness calculators. |
| renderer_factory_ = std::make_unique<RendererFactory>(); |
| auto mock_loudness_calculator_factory = |
| std::make_unique<MockLoudnessCalculatorFactory>(); |
| auto mock_loudness_calculator = std::make_unique<MockLoudnessCalculator>(); |
| const LoudnessInfo kArbitraryLoudnessInfo = { |
| .info_type = LoudnessInfo::kTruePeak, |
| .integrated_loudness = 123, |
| .digital_peak = 456, |
| .true_peak = 789, |
| }; |
| ON_CALL(*mock_loudness_calculator, QueryLoudness()) |
| .WillByDefault(Return(kArbitraryLoudnessInfo)); |
| EXPECT_CALL(*mock_loudness_calculator_factory, |
| CreateLoudnessCalculator(_, _, _, _)) |
| .WillOnce(Return(std::move(mock_loudness_calculator))); |
| loudness_calculator_factory_ = std::move(mock_loudness_calculator_factory); |
| auto iamf_encoder = CreateExpectOk(); |
| iamf_encoder.FinalizeAddSamples(); |
| |
| const auto finalized_mix_presentation_obus = |
| iamf_encoder.GetFinalizedMixPresentationObus(); |
| ASSERT_THAT(finalized_mix_presentation_obus, IsOkAndHolds(Not(IsEmpty()))); |
| |
| EXPECT_EQ(finalized_mix_presentation_obus->front() |
| .sub_mixes_.front() |
| .layouts.front() |
| .loudness, |
| kArbitraryLoudnessInfo); |
| }; |
| |
| TEST_F(IamfEncoderTest, OutputSampleProcessorFactoryIgnoresBitDepthOverride) { |
| // The override bit-depth should be used at the `SampleProcessorFactory` |
| // level. |
| SetupDescriptorObus(); |
| constexpr uint32_t kExpectedSampleProcessorFactoryCalledBitDepth = |
| kExpectedPcmBitDepth; |
| constexpr uint32_t kIgnoredBitDepthOverride = 255; |
| user_metadata_.mutable_test_vector_metadata() |
| ->set_output_wav_file_bit_depth_override(kIgnoredBitDepthOverride); |
| // Wav file writing is done only when the signal can be rendered, based on the |
| // resultant wav writers. |
| renderer_factory_ = std::make_unique<RendererFactory>(); |
| MockSampleProcessorFactory mock_sample_processor_factory; |
| EXPECT_CALL( |
| mock_sample_processor_factory, |
| Call(_, _, _, _, _, _, kExpectedSampleProcessorFactoryCalledBitDepth, _)); |
| sample_processor_factory_ = mock_sample_processor_factory.AsStdFunction(); |
| |
| CreateExpectOk(); |
| }; |
| |
| // TODO(b/349321277): Add more tests. |
| |
| } // namespace |
| } // namespace iamf_tools |