blob: 503d258d3b52dbc74b87797d2368f4f58bcde9b1 [file] [log] [blame]
/*
* Copyright (c) 2023, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 3-Clause Clear
* License and the Alliance for Open Media Patent License 1.0. If the BSD
* 3-Clause Clear License was not distributed with this source code in the
* LICENSE file, you can obtain it at
* www.aomedia.org/license/software-license/bsd-3-c-c. If the Alliance for
* Open Media Patent License 1.0 was not distributed with this source code
* in the PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "iamf/cli/demixing_module.h"
#include <algorithm>
#include <array>
#include <cstdint>
#include <iterator>
#include <list>
#include <optional>
#include <utility>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "absl/status/status.h"
#include "absl/status/status_matchers.h"
#include "absl/status/statusor.h"
#include "absl/types/span.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "iamf/cli/audio_element_with_data.h"
#include "iamf/cli/audio_frame_decoder.h"
#include "iamf/cli/audio_frame_with_data.h"
#include "iamf/cli/channel_label.h"
#include "iamf/cli/proto/user_metadata.pb.h"
#include "iamf/cli/proto_conversion/channel_label_utils.h"
#include "iamf/cli/proto_conversion/downmixing_reconstruction_util.h"
#include "iamf/cli/tests/cli_test_utils.h"
#include "iamf/common/utils/numeric_utils.h"
#include "iamf/obu/audio_element.h"
#include "iamf/obu/audio_frame.h"
#include "iamf/obu/codec_config.h"
#include "iamf/obu/demixing_info_parameter_data.h"
#include "iamf/obu/obu_header.h"
#include "iamf/obu/recon_gain_info_parameter_data.h"
#include "iamf/obu/types.h"
namespace iamf_tools {
namespace {
using ::absl_testing::IsOk;
using ::absl_testing::IsOkAndHolds;
using enum ChannelLabel::Label;
using ::testing::DoubleEq;
using ::testing::DoubleNear;
using ::testing::IsEmpty;
using ::testing::Not;
using ::testing::Pointwise;
constexpr DecodedUleb128 kAudioElementId = 137;
constexpr std::array<uint8_t, 12> kReconGainValues = {
255, 0, 125, 200, 150, 255, 255, 255, 255, 255, 255, 255};
const uint32_t kZeroSamplesToTrimAtEnd = 0;
const uint32_t kZeroSamplesToTrimAtStart = 0;
const int kStartTimestamp = 0;
const int kEndTimestamp = 4;
const DecodedUleb128 kMonoSubstreamId = 0;
const DecodedUleb128 kL2SubstreamId = 1;
// TODO(b/305927287): Test computation of linear output gains. Test some cases
// of erroneous input.
TEST(FindSamplesOrDemixedSamples, FindsMatchingSamples) {
const std::vector<InternalSampleType> kSamplesToFind = {1, 2, 3};
const LabelSamplesMap kLabelToSamples = {{kL2, kSamplesToFind}};
const std::vector<InternalSampleType>* found_samples;
EXPECT_THAT(DemixingModule::FindSamplesOrDemixedSamples(kL2, kLabelToSamples,
&found_samples),
IsOk());
EXPECT_THAT(*found_samples, Pointwise(DoubleEq(), kSamplesToFind));
}
TEST(FindSamplesOrDemixedSamples, FindsMatchingDemixedSamples) {
const std::vector<InternalSampleType> kSamplesToFind = {1, 2, 3};
const LabelSamplesMap kLabelToSamples = {{kDemixedR2, kSamplesToFind}};
const std::vector<InternalSampleType>* found_samples;
EXPECT_THAT(DemixingModule::FindSamplesOrDemixedSamples(kR2, kLabelToSamples,
&found_samples),
IsOk());
EXPECT_THAT(*found_samples, Pointwise(DoubleEq(), kSamplesToFind));
}
TEST(FindSamplesOrDemixedSamples, InvalidWhenThereIsNoDemixingLabel) {
const std::vector<InternalSampleType> kSamplesToFind = {1, 2, 3};
const LabelSamplesMap kLabelToSamples = {{kDemixedR2, kSamplesToFind}};
const std::vector<InternalSampleType>* found_samples;
EXPECT_FALSE(DemixingModule::FindSamplesOrDemixedSamples(kL2, kLabelToSamples,
&found_samples)
.ok());
}
TEST(FindSamplesOrDemixedSamples, RegularSamplesTakePrecedence) {
const std::vector<InternalSampleType> kSamplesToFind = {1, 2, 3};
const std::vector<InternalSampleType> kDemixedSamplesToIgnore = {4, 5, 6};
const LabelSamplesMap kLabelToSamples = {
{kR2, kSamplesToFind}, {kDemixedR2, kDemixedSamplesToIgnore}};
const std::vector<InternalSampleType>* found_samples;
EXPECT_THAT(DemixingModule::FindSamplesOrDemixedSamples(kR2, kLabelToSamples,
&found_samples),
IsOk());
EXPECT_THAT(*found_samples, Pointwise(DoubleEq(), kSamplesToFind));
}
TEST(FindSamplesOrDemixedSamples, ErrorNoMatchingSamples) {
const std::vector<InternalSampleType> kSamplesToFind = {1, 2, 3};
const LabelSamplesMap kLabelToSamples = {{kL2, kSamplesToFind}};
const std::vector<InternalSampleType>* found_samples;
EXPECT_FALSE(DemixingModule::FindSamplesOrDemixedSamples(kL3, kLabelToSamples,
&found_samples)
.ok());
}
void InitAudioElementWithLabelsAndLayers(
const SubstreamIdLabelsMap& substream_id_to_labels,
const std::vector<ChannelAudioLayerConfig::LoudspeakerLayout>&
loudspeaker_layouts,
absl::flat_hash_map<DecodedUleb128, AudioElementWithData>& audio_elements) {
auto [iter, unused_inserted] = audio_elements.emplace(
kAudioElementId,
AudioElementWithData{
.obu = AudioElementObu(ObuHeader(), kAudioElementId,
AudioElementObu::kAudioElementChannelBased,
/*reserved=*/0,
/*codec_config_id=*/0),
.substream_id_to_labels = substream_id_to_labels,
});
auto& obu = iter->second.obu;
ASSERT_THAT(
obu.InitializeScalableChannelLayout(loudspeaker_layouts.size(), 0),
IsOk());
auto& config = std::get<ScalableChannelLayoutConfig>(obu.config_);
for (int i = 0; i < loudspeaker_layouts.size(); ++i) {
config.channel_audio_layer_configs[i].loudspeaker_layout =
loudspeaker_layouts[i];
}
}
TEST(CreateForDownMixingAndReconstruction, EmptyConfigMapIsOk) {
absl::flat_hash_map<DecodedUleb128,
DemixingModule::DownmixingAndReconstructionConfig>
id_to_config_map;
const auto demixing_module =
DemixingModule::CreateForDownMixingAndReconstruction(
std::move(id_to_config_map));
EXPECT_THAT(demixing_module, IsOk());
}
TEST(CreateForDownMixingAndReconstruction, ValidWithTwoLayerStereo) {
DecodedUleb128 id = 137;
DemixingModule::DownmixingAndReconstructionConfig config = {
.user_labels = {kL2, kR2},
.substream_id_to_labels = {{0, {kMono}}, {1, {kL2}}},
.label_to_output_gain = {}};
absl::flat_hash_map<DecodedUleb128,
DemixingModule::DownmixingAndReconstructionConfig>
id_to_config_map = {{id, config}};
const auto demixing_module =
DemixingModule::CreateForDownMixingAndReconstruction(
std::move(id_to_config_map));
EXPECT_THAT(demixing_module, IsOk());
}
TEST(InitializeForReconstruction, NeverCreatesDownMixers) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers({{0, {kMono}}, {1, {kL2}}},
{ChannelAudioLayerConfig::kLayoutMono,
ChannelAudioLayerConfig::kLayoutStereo},
audio_elements);
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
const std::list<Demixer>* down_mixers = nullptr;
EXPECT_THAT(demixing_module->GetDownMixers(kAudioElementId, down_mixers),
IsOk());
EXPECT_TRUE(down_mixers->empty());
}
TEST(CreateForReconstruction, CreatesOneDemixerForTwoLayerStereo) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers({{0, {kMono}}, {1, {kL2}}},
{ChannelAudioLayerConfig::kLayoutMono,
ChannelAudioLayerConfig::kLayoutStereo},
audio_elements);
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
const std::list<Demixer>* demixer = nullptr;
EXPECT_THAT(demixing_module->GetDemixers(kAudioElementId, demixer), IsOk());
EXPECT_EQ(demixer->size(), 1);
}
TEST(CreateForReconstruction, FailsForReservedLayout14) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers(
{{0, {kOmitted}}}, {ChannelAudioLayerConfig::kLayoutReserved14},
audio_elements);
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
EXPECT_FALSE(demixing_module.ok());
}
TEST(CreateForReconstruction, ValidForExpandedLayoutLFE) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers(
{{0, {kLFE}}}, {ChannelAudioLayerConfig::kLayoutExpanded},
audio_elements);
std::get<ScalableChannelLayoutConfig>(
audio_elements.at(kAudioElementId).obu.config_)
.channel_audio_layer_configs[0]
.expanded_loudspeaker_layout =
ChannelAudioLayerConfig::kExpandedLayoutLFE;
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
EXPECT_THAT(demixing_module, IsOk());
}
TEST(CreateForReconstruction, CreatesNoDemixersForSingleLayerChannelBased) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers({{0, {kL2, kR2}}},
{ChannelAudioLayerConfig::kLayoutStereo},
audio_elements);
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
const std::list<Demixer>* demixer = nullptr;
EXPECT_THAT(demixing_module->GetDemixers(kAudioElementId, demixer), IsOk());
EXPECT_TRUE(demixer->empty());
}
TEST(CreateForReconstruction, CreatesNoDemixersForAmbisonics) {
const DecodedUleb128 kCodecConfigId = 0;
constexpr std::array<DecodedUleb128, 4> kAmbisonicsSubstreamIds{0, 1, 2, 3};
absl::flat_hash_map<DecodedUleb128, CodecConfigObu> codec_configs;
AddLpcmCodecConfigWithIdAndSampleRate(kCodecConfigId, 48000, codec_configs);
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
AddAmbisonicsMonoAudioElementWithSubstreamIds(kAudioElementId, kCodecConfigId,
kAmbisonicsSubstreamIds,
codec_configs, audio_elements);
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
const std::list<Demixer>* demixer = nullptr;
EXPECT_THAT(demixing_module->GetDemixers(kAudioElementId, demixer), IsOk());
EXPECT_TRUE(demixer->empty());
}
TEST(DemixOriginalAudioSamples, ReturnsErrorAfterCreateForReconstruction) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers(
{{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}},
{ChannelAudioLayerConfig::kLayoutMono,
ChannelAudioLayerConfig::kLayoutStereo},
audio_elements);
auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
EXPECT_THAT(demixing_module->DemixOriginalAudioSamples({}), Not(IsOk()));
}
TEST(DemixDecodedAudioSamples, OutputContainsOriginalAndDemixedSamples) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers(
{{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}},
{ChannelAudioLayerConfig::kLayoutMono,
ChannelAudioLayerConfig::kLayoutStereo},
audio_elements);
std::list<DecodedAudioFrame> decoded_audio_frames;
decoded_audio_frames.push_back(
DecodedAudioFrame{.substream_id = kMonoSubstreamId,
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = {{0}},
.down_mixing_params = DownMixingParams()});
decoded_audio_frames.push_back(
DecodedAudioFrame{.substream_id = kL2SubstreamId,
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = {{0}},
.down_mixing_params = DownMixingParams()});
auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
const auto id_to_labeled_decoded_frame =
demixing_module->DemixDecodedAudioSamples(decoded_audio_frames);
ASSERT_THAT(id_to_labeled_decoded_frame, IsOk());
ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId));
const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId);
EXPECT_TRUE(labeled_frame.label_to_samples.contains(kL2));
EXPECT_TRUE(labeled_frame.label_to_samples.contains(kMono));
EXPECT_TRUE(labeled_frame.label_to_samples.contains(kDemixedR2));
}
TEST(DemixDecodedAudioSamples, OutputEchoesTimingInformation) {
// These values are not very sensible, but as long as they are consistent
// between related frames it is OK.
const DecodedUleb128 kExpectedStartTimestamp = 99;
const DecodedUleb128 kExpectedEndTimestamp = 123;
const DecodedUleb128 kExpectedNumSamplesToTrimAtEnd = 999;
const DecodedUleb128 kExpectedNumSamplesToTrimAtStart = 9999;
const DecodedUleb128 kL2SubstreamId = 1;
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers(
{{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}},
{ChannelAudioLayerConfig::kLayoutMono,
ChannelAudioLayerConfig::kLayoutStereo},
audio_elements);
std::list<DecodedAudioFrame> decoded_audio_frames;
decoded_audio_frames.push_back(DecodedAudioFrame{
.substream_id = kMonoSubstreamId,
.start_timestamp = kExpectedStartTimestamp,
.end_timestamp = kExpectedEndTimestamp,
.samples_to_trim_at_end = kExpectedNumSamplesToTrimAtEnd,
.samples_to_trim_at_start = kExpectedNumSamplesToTrimAtStart,
.decoded_samples = {{0}},
.down_mixing_params = DownMixingParams()});
decoded_audio_frames.push_back(DecodedAudioFrame{
.substream_id = kL2SubstreamId,
.start_timestamp = kExpectedStartTimestamp,
.end_timestamp = kExpectedEndTimestamp,
.samples_to_trim_at_end = kExpectedNumSamplesToTrimAtEnd,
.samples_to_trim_at_start = kExpectedNumSamplesToTrimAtStart,
.decoded_samples = {{0}},
.down_mixing_params = DownMixingParams()});
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
const auto id_to_labeled_decoded_frame =
demixing_module->DemixDecodedAudioSamples(decoded_audio_frames);
ASSERT_THAT(id_to_labeled_decoded_frame, IsOk());
ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId));
const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId);
EXPECT_EQ(labeled_frame.end_timestamp, kExpectedEndTimestamp);
EXPECT_EQ(labeled_frame.samples_to_trim_at_end,
kExpectedNumSamplesToTrimAtEnd);
EXPECT_EQ(labeled_frame.samples_to_trim_at_start,
kExpectedNumSamplesToTrimAtStart);
}
TEST(DemixDecodedAudioSamples, OutputEchoesOriginalLabels) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers(
{{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}},
{ChannelAudioLayerConfig::kLayoutMono,
ChannelAudioLayerConfig::kLayoutStereo},
audio_elements);
std::list<DecodedAudioFrame> decoded_audio_frames;
decoded_audio_frames.push_back(
DecodedAudioFrame{.substream_id = kMonoSubstreamId,
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = {{1}, {2}, {3}},
.down_mixing_params = DownMixingParams()});
decoded_audio_frames.push_back(
DecodedAudioFrame{.substream_id = kL2SubstreamId,
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = {{9}, {10}, {11}},
.down_mixing_params = DownMixingParams()});
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
IdLabeledFrameMap unused_id_labeled_frame;
const auto id_to_labeled_decoded_frame =
demixing_module->DemixDecodedAudioSamples(decoded_audio_frames);
ASSERT_THAT(id_to_labeled_decoded_frame, IsOk());
ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId));
// Examine the demixed frame.
const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId);
constexpr std::array<int32_t, 3> kExpectedMonoSamples = {1, 2, 3};
constexpr std::array<int32_t, 3> kExpectedL2Samples = {9, 10, 11};
EXPECT_THAT(
labeled_frame.label_to_samples.at(kMono),
Pointwise(InternalSampleMatchesIntegralSample(), kExpectedMonoSamples));
EXPECT_THAT(
labeled_frame.label_to_samples.at(kL2),
Pointwise(InternalSampleMatchesIntegralSample(), kExpectedL2Samples));
}
TEST(DemixDecodedAudioSamples, OutputHasReconstructedLayers) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers(
{{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}},
{ChannelAudioLayerConfig::kLayoutMono,
ChannelAudioLayerConfig::kLayoutStereo},
audio_elements);
std::list<DecodedAudioFrame> decoded_audio_frames;
decoded_audio_frames.push_back(
DecodedAudioFrame{.substream_id = kMonoSubstreamId,
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = {{750}},
.down_mixing_params = DownMixingParams()});
decoded_audio_frames.push_back(
DecodedAudioFrame{.substream_id = kL2SubstreamId,
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = {{1000}},
.down_mixing_params = DownMixingParams()});
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
const auto id_to_labeled_decoded_frame =
demixing_module->DemixDecodedAudioSamples(decoded_audio_frames);
ASSERT_THAT(id_to_labeled_decoded_frame, IsOk());
ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId));
// Examine the demixed frame.
const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId);
// D_R2 = M - (L2 - 6 dB) + 6 dB.
EXPECT_THAT(labeled_frame.label_to_samples.at(kDemixedR2),
Pointwise(InternalSampleMatchesIntegralSample(), {500}));
}
TEST(DemixDecodedAudioSamples, OutputContainsReconGainAndLayerInfo) {
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements;
InitAudioElementWithLabelsAndLayers(
{{kMonoSubstreamId, {kMono}}, {kL2SubstreamId, {kL2}}},
{ChannelAudioLayerConfig::kLayoutMono,
ChannelAudioLayerConfig::kLayoutStereo},
audio_elements);
std::list<DecodedAudioFrame> decoded_audio_frames;
ReconGainInfoParameterData recon_gain_info_parameter_data;
recon_gain_info_parameter_data.recon_gain_elements.push_back(ReconGainElement{
.recon_gain_flag = DecodedUleb128(1), .recon_gain = kReconGainValues});
decoded_audio_frames.push_back(DecodedAudioFrame{
.substream_id = kMonoSubstreamId,
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = {{0}},
.down_mixing_params = DownMixingParams(),
.recon_gain_info_parameter_data = recon_gain_info_parameter_data,
.audio_element_with_data = &audio_elements.at(kAudioElementId)});
decoded_audio_frames.push_back(DecodedAudioFrame{
.substream_id = kL2SubstreamId,
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = {{0}},
.down_mixing_params = DownMixingParams(),
.recon_gain_info_parameter_data = recon_gain_info_parameter_data,
.audio_element_with_data = &audio_elements.at(kAudioElementId)});
const auto demixing_module =
DemixingModule::CreateForReconstruction(audio_elements);
ASSERT_THAT(demixing_module, IsOk());
const auto id_to_labeled_decoded_frame =
demixing_module->DemixDecodedAudioSamples(decoded_audio_frames);
ASSERT_THAT(id_to_labeled_decoded_frame, IsOk());
ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId));
const auto& labeled_frame = id_to_labeled_decoded_frame->at(kAudioElementId);
EXPECT_TRUE(labeled_frame.label_to_samples.contains(kL2));
EXPECT_TRUE(labeled_frame.label_to_samples.contains(kMono));
EXPECT_TRUE(labeled_frame.label_to_samples.contains(kDemixedR2));
EXPECT_EQ(
labeled_frame.recon_gain_info_parameter_data.recon_gain_elements.size(),
1);
const auto& recon_gain_element =
labeled_frame.recon_gain_info_parameter_data.recon_gain_elements.at(0);
ASSERT_TRUE(recon_gain_element.has_value());
EXPECT_EQ(recon_gain_element->recon_gain_flag, DecodedUleb128(1));
EXPECT_THAT(recon_gain_element->recon_gain,
testing::ElementsAreArray(kReconGainValues));
EXPECT_EQ(labeled_frame.loudspeaker_layout_per_layer.size(), 2);
EXPECT_THAT(labeled_frame.loudspeaker_layout_per_layer,
testing::ElementsAre(ChannelAudioLayerConfig::kLayoutMono,
ChannelAudioLayerConfig::kLayoutStereo));
}
class DemixingModuleTestBase {
public:
DemixingModuleTestBase() {
audio_frame_metadata_.set_audio_element_id(kAudioElementId);
}
void CreateDemixingModuleExpectOk() {
iamf_tools_cli_proto::UserMetadata user_metadata;
*user_metadata.add_audio_frame_metadata() = audio_frame_metadata_;
audio_elements_.emplace(
kAudioElementId,
AudioElementWithData{
.obu = AudioElementObu(ObuHeader(), kAudioElementId,
AudioElementObu::kAudioElementChannelBased,
/*reserved=*/0,
/*codec_config_id=*/0),
.substream_id_to_labels = substream_id_to_labels_,
});
const absl::StatusOr<absl::flat_hash_map<
DecodedUleb128, DemixingModule::DownmixingAndReconstructionConfig>>
audio_element_id_to_demixing_metadata =
CreateAudioElementIdToDemixingMetadata(user_metadata,
audio_elements_);
ASSERT_THAT(audio_element_id_to_demixing_metadata.status(), IsOk());
auto demixing_module = DemixingModule::CreateForDownMixingAndReconstruction(
std::move(audio_element_id_to_demixing_metadata.value()));
ASSERT_THAT(demixing_module, IsOk());
demixing_module_.emplace(*std::move(demixing_module));
}
void TestCreateDemixingModule(int expected_number_of_down_mixers) {
CreateDemixingModuleExpectOk();
const std::list<Demixer>* down_mixers = nullptr;
const std::list<Demixer>* demixers = nullptr;
ASSERT_THAT(demixing_module_->GetDownMixers(kAudioElementId, down_mixers),
IsOk());
ASSERT_THAT(demixing_module_->GetDemixers(kAudioElementId, demixers),
IsOk());
EXPECT_EQ(down_mixers->size(), expected_number_of_down_mixers);
EXPECT_EQ(demixers->size(), expected_number_of_down_mixers);
}
protected:
void ConfigureAudioFrameMetadata(
absl::Span<const ChannelLabel::Label> labels) {
for (const auto& label : labels) {
auto proto_label = ChannelLabelUtils::LabelToProto(label);
ASSERT_TRUE(proto_label.ok());
audio_frame_metadata_.add_channel_metadatas()->set_channel_label(
*proto_label);
}
}
iamf_tools_cli_proto::AudioFrameObuMetadata audio_frame_metadata_;
absl::flat_hash_map<DecodedUleb128, AudioElementWithData> audio_elements_;
SubstreamIdLabelsMap substream_id_to_labels_;
// Held in `std::optional` for delayed construction.
std::optional<DemixingModule> demixing_module_;
};
class DownMixingModuleTest : public DemixingModuleTestBase,
public ::testing::Test {
protected:
void TestDownMixing(const DownMixingParams& down_mixing_params,
int expected_number_of_down_mixers) {
TestCreateDemixingModule(expected_number_of_down_mixers);
EXPECT_THAT(demixing_module_->DownMixSamplesToSubstreams(
kAudioElementId, down_mixing_params,
input_label_to_samples_, substream_id_to_substream_data_),
IsOk());
for (const auto& [substream_id, substream_data] :
substream_id_to_substream_data_) {
// Copy the output queue to a vector for comparison.
std::vector<std::vector<int32_t>> output_samples;
std::copy(substream_data.samples_obu.begin(),
substream_data.samples_obu.end(),
std::back_inserter(output_samples));
EXPECT_EQ(output_samples,
substream_id_to_expected_samples_[substream_id]);
}
}
void ConfigureInputChannel(ChannelLabel::Label label,
absl::Span<const int32_t> input_samples) {
ConfigureAudioFrameMetadata({label});
auto [iter, inserted] = input_label_to_samples_.emplace(
label, std::vector<InternalSampleType>(input_samples.size(), 0));
Int32ToInternalSampleType(input_samples, absl::MakeSpan(iter->second));
// This function should not be called with the same label twice.
ASSERT_TRUE(inserted);
}
void ConfigureOutputChannel(
const std::list<ChannelLabel::Label>& requested_output_labels,
const std::vector<std::vector<int32_t>>& expected_output_smples) {
// The substream ID itself does not matter. Generate a unique one.
const uint32_t substream_id = substream_id_to_labels_.size();
substream_id_to_labels_[substream_id] = requested_output_labels;
substream_id_to_substream_data_[substream_id] = {.substream_id =
substream_id};
substream_id_to_expected_samples_[substream_id] = expected_output_smples;
}
LabelSamplesMap input_label_to_samples_;
absl::flat_hash_map<uint32_t, SubstreamData> substream_id_to_substream_data_;
absl::flat_hash_map<uint32_t, std::vector<std::vector<int32_t>>>
substream_id_to_expected_samples_;
};
TEST_F(DownMixingModuleTest, OneLayerStereoHasNoDownMixers) {
ConfigureInputChannel(kL2, {});
ConfigureInputChannel(kR2, {});
ConfigureOutputChannel({kL2, kR2}, {{}});
TestCreateDemixingModule(0);
}
TEST_F(DownMixingModuleTest, OneLayer7_1_4HasNoDownMixers) {
// Initialize arguments for single layer 7.1.4.
ConfigureInputChannel(kL7, {});
ConfigureInputChannel(kR7, {});
ConfigureInputChannel(kCentre, {});
ConfigureInputChannel(kLFE, {});
ConfigureInputChannel(kLss7, {});
ConfigureInputChannel(kRss7, {});
ConfigureInputChannel(kLrs7, {});
ConfigureInputChannel(kRrs7, {});
ConfigureInputChannel(kLtf4, {});
ConfigureInputChannel(kRtf4, {});
ConfigureInputChannel(kLtb4, {});
ConfigureInputChannel(kRtb4, {});
ConfigureOutputChannel({kCentre}, {{}});
ConfigureOutputChannel({kL7, kR7}, {});
ConfigureOutputChannel({kLss7, kRss7}, {});
ConfigureOutputChannel({kLrs7, kRrs7}, {});
ConfigureOutputChannel({kLtf4, kRtf4}, {});
ConfigureOutputChannel({kLtb4, kRtb4}, {});
ConfigureOutputChannel({kLFE}, {});
TestCreateDemixingModule(0);
}
TEST_F(DownMixingModuleTest, AmbisonicsHasNoDownMixers) {
ConfigureInputChannel(kA0, {});
ConfigureInputChannel(kA1, {});
ConfigureInputChannel(kA2, {});
ConfigureInputChannel(kA3, {});
ConfigureOutputChannel({kA0}, {{}});
ConfigureOutputChannel({kA1}, {{}});
ConfigureOutputChannel({kA2}, {{}});
ConfigureOutputChannel({kA3}, {{}});
TestCreateDemixingModule(0);
}
TEST_F(DownMixingModuleTest, OneLayerStereo) {
ConfigureInputChannel(kL2, {0, 1, 2, 3});
ConfigureInputChannel(kR2, {100, 101, 102, 103});
// Down-mix to stereo as the highest layer. The highest layer always matches
// the original input.
ConfigureOutputChannel({kL2, kR2}, {{0, 100}, {1, 101}, {2, 102}, {3, 103}});
TestDownMixing({}, 0);
}
TEST_F(DownMixingModuleTest, S2ToS1DownMixer) {
ConfigureInputChannel(kL2, {0, 100, 500, 1000});
ConfigureInputChannel(kR2, {100, 0, 500, 500});
// Down-mix to stereo as the highest layer. The highest layer always matches
// the original input.
ConfigureOutputChannel({kL2}, {{0}, {100}, {500}, {1000}});
// Down-mix to mono as the lowest layer.
// M = (L2 - 6 dB) + (R2 - 6 dB).
ConfigureOutputChannel({kMono}, {{50}, {50}, {500}, {750}});
TestDownMixing({}, 1);
}
TEST_F(DownMixingModuleTest, S3ToS2DownMixer) {
ConfigureInputChannel(kL3, {0, 100});
ConfigureInputChannel(kR3, {0, 100});
ConfigureInputChannel(kCentre, {100, 100});
ConfigureInputChannel(kLtf3, {99999, 99999});
ConfigureInputChannel(kRtf3, {99998, 99998});
// Down-mix to 3.1.2 as the highest layer. The highest layer always matches
// the original input.
ConfigureOutputChannel({kCentre}, {{100}, {100}});
ConfigureOutputChannel({kLtf3, kRtf3}, {{99999, 99998}, {99999, 99998}});
// Down-mix to stereo as the lowest layer.
// L2 = L3 + (C - 3 dB).
// R2 = R3 + (C - 3 dB).
ConfigureOutputChannel({kL2, kR2}, {{70, 70}, {170, 170}});
TestDownMixing({}, 1);
}
TEST_F(DownMixingModuleTest, S5ToS3ToS2DownMixer) {
ConfigureInputChannel(kL5, {100});
ConfigureInputChannel(kR5, {200});
ConfigureInputChannel(kCentre, {1000});
ConfigureInputChannel(kLs5, {2000});
ConfigureInputChannel(kRs5, {3000});
ConfigureInputChannel(kLFE, {6});
// Down-mix to 5.1 as the highest layer. The highest layer always matches the
// original input.
ConfigureOutputChannel({kCentre}, {{1000}});
ConfigureOutputChannel({kLs5, kRs5}, {{2000, 3000}});
ConfigureOutputChannel({kLFE}, {{6}});
// Down-mix to stereo as the lowest layer.
// L3 = L5 + Ls5 * delta.
// L2 = L3 + (C - 3 dB).
ConfigureOutputChannel({kL2, kR2}, {{2221, 3028}});
// Internally there is a down-mixer to L3/R3 then another for L2/R2.
TestDownMixing({.delta = .707}, 2);
}
TEST_F(DownMixingModuleTest, S5ToS3ToDownMixer) {
ConfigureInputChannel(kL5, {1000});
ConfigureInputChannel(kR5, {2000});
ConfigureInputChannel(kCentre, {3});
ConfigureInputChannel(kLs5, {4000});
ConfigureInputChannel(kRs5, {8000});
ConfigureInputChannel(kLtf2, {1000});
ConfigureInputChannel(kRtf2, {2000});
ConfigureInputChannel(kLFE, {8});
// Down-mix to 5.1.2 as the highest layer. The highest layer always matches
// the original input.
ConfigureOutputChannel({kLs5, kRs5}, {{4000, 8000}});
// Down-mix to 3.1.2 as the lowest layer.
// L3 = L5 + Ls5 * delta.
ConfigureOutputChannel({kL3, kR3}, {{3828, 7656}});
ConfigureOutputChannel({kCentre}, {{3}});
// Ltf3 = Ltf2 + Ls5 * w * delta.
ConfigureOutputChannel({kLtf3, kRtf3}, {{1707, 3414}});
ConfigureOutputChannel({kLFE}, {{8}});
// Internally there is a down-mixer for the height and another for the
// surround.
TestDownMixing({.delta = .707, .w = 0.25}, 2);
}
TEST_F(DownMixingModuleTest, T4ToT2DownMixer) {
ConfigureInputChannel(kL5, {1});
ConfigureInputChannel(kR5, {2});
ConfigureInputChannel(kCentre, {3});
ConfigureInputChannel(kLs5, {4});
ConfigureInputChannel(kRs5, {5});
ConfigureInputChannel(kLtf4, {1000});
ConfigureInputChannel(kRtf4, {2000});
ConfigureInputChannel(kLtb4, {1000});
ConfigureInputChannel(kRtb4, {2000});
ConfigureInputChannel(kLFE, {10});
// Down-mix to 5.1.4 as the highest layer. The highest layer always matches
// the original input.
ConfigureOutputChannel({kLtb4, kRtb4}, {{1000, 2000}});
// Down-mix to 5.1.2 as the lowest layer.
ConfigureOutputChannel({kL5, kR5}, {{1, 2}});
ConfigureOutputChannel({kCentre}, {{3}});
ConfigureOutputChannel({kLs5, kRs5}, {{4, 5}});
// Ltf2 = Ltf4 + Ltb4 * gamma.
ConfigureOutputChannel({kLtf2, kRtf2}, {{1707, 3414}});
ConfigureOutputChannel({kLFE}, {{10}});
TestDownMixing({.gamma = .707}, 1);
}
TEST_F(DownMixingModuleTest, S7ToS5DownMixerWithoutT0) {
ConfigureInputChannel(kL7, {1});
ConfigureInputChannel(kR7, {2});
ConfigureInputChannel(kCentre, {3});
ConfigureInputChannel(kLss7, {1000});
ConfigureInputChannel(kRss7, {2000});
ConfigureInputChannel(kLrs7, {3000});
ConfigureInputChannel(kRrs7, {4000});
ConfigureInputChannel(kLFE, {8});
// Down-mix to 7.1.0 as the highest layer. The highest layer always matches
// the original input.
ConfigureOutputChannel({kLrs7, kRrs7}, {{3000, 4000}});
// Down-mix to 5.1.0 as the lowest layer.
ConfigureOutputChannel({kL5, kR5}, {{1, 2}});
ConfigureOutputChannel({kCentre}, {{3}});
// Ls5 = Lss7 * alpha + Lrs7 * beta.
ConfigureOutputChannel({kLs5, kRs5}, {{3598, 5464}});
ConfigureOutputChannel({kLFE}, {{8}});
TestDownMixing({.alpha = 1, .beta = .866}, 1);
}
TEST_F(DownMixingModuleTest, S7ToS5DownMixerWithT2) {
ConfigureInputChannel(kL7, {1});
ConfigureInputChannel(kR7, {2});
ConfigureInputChannel(kCentre, {3});
ConfigureInputChannel(kLss7, {1000});
ConfigureInputChannel(kRss7, {2000});
ConfigureInputChannel(kLrs7, {3000});
ConfigureInputChannel(kRrs7, {4000});
ConfigureInputChannel(kLtf2, {8});
ConfigureInputChannel(kRtf2, {9});
ConfigureInputChannel(kLFE, {10});
// Down-mix to 7.1.2 as the highest layer. The highest layer always matches
// the original input.
ConfigureOutputChannel({kLrs7, kRrs7}, {{3000, 4000}});
// Down-mix to 5.1.2 as the lowest layer.
ConfigureOutputChannel({kL5, kR5}, {{1, 2}});
ConfigureOutputChannel({kCentre}, {{3}});
// Ls5 = Lss7 * alpha + Lrs7 * beta.
ConfigureOutputChannel({kLs5, kRs5}, {{3598, 5464}});
ConfigureOutputChannel({kLtf2, kRtf2}, {{8, 9}});
ConfigureOutputChannel({kLFE}, {{10}});
TestDownMixing({.alpha = 1, .beta = .866}, 1);
}
TEST_F(DownMixingModuleTest, S7ToS5DownMixerWithT4) {
ConfigureInputChannel(kL7, {1});
ConfigureInputChannel(kR7, {2});
ConfigureInputChannel(kCentre, {3});
ConfigureInputChannel(kLss7, {1000});
ConfigureInputChannel(kRss7, {2000});
ConfigureInputChannel(kLrs7, {3000});
ConfigureInputChannel(kRrs7, {4000});
ConfigureInputChannel(kLtf4, {8});
ConfigureInputChannel(kRtf4, {9});
ConfigureInputChannel(kLtb4, {10});
ConfigureInputChannel(kRtb4, {11});
ConfigureInputChannel(kLFE, {12});
// Down-mix to 7.1.4 as the highest layer. The highest layer always matches
// the original input.
ConfigureOutputChannel({kLrs7, kRrs7}, {{3000, 4000}});
// Down-mix to 5.1.4 as the lowest layer.
ConfigureOutputChannel({kL5, kR5}, {{1, 2}});
ConfigureOutputChannel({kCentre}, {{3}});
// Ls5 = Lss7 * alpha + Lrs7 * beta.
ConfigureOutputChannel({kLs5, kRs5}, {{3598, 5464}});
ConfigureOutputChannel({kLtf4, kRtf4}, {{8, 9}});
ConfigureOutputChannel({kLtb4, kRtb4}, {{10, 11}});
ConfigureOutputChannel({kLFE}, {{12}});
TestDownMixing({.alpha = 1, .beta = .866}, 1);
}
TEST_F(DownMixingModuleTest, SixLayer7_1_4) {
ConfigureInputChannel(kL7, {1000});
ConfigureInputChannel(kR7, {2000});
ConfigureInputChannel(kCentre, {1000});
ConfigureInputChannel(kLss7, {1000});
ConfigureInputChannel(kRss7, {2000});
ConfigureInputChannel(kLrs7, {3000});
ConfigureInputChannel(kRrs7, {4000});
ConfigureInputChannel(kLtf4, {1000});
ConfigureInputChannel(kRtf4, {2000});
ConfigureInputChannel(kLtb4, {1000});
ConfigureInputChannel(kRtb4, {2000});
ConfigureInputChannel(kLFE, {12});
// There are different paths to have six-layers, choose 7.1.2, 5.1.2, 3.1.2,
// stereo, mono to avoid dropping the height channels for as many steps as
// possible.
// Down-mix to 7.1.4 as the sixth layer.
ConfigureOutputChannel({kLtb4, kRtb4}, {{1000, 2000}});
// Down-mix to 7.1.2 as the fifth layer.
ConfigureOutputChannel({kLrs7, kRrs7}, {{3000, 4000}});
// Down-mix to 5.1.2 as the fourth layer.
// Ls5 = Lss7 * alpha + Lrs7 * beta.
ConfigureOutputChannel({kLs5, kRs5}, {{3598, 5464}});
// Down-mix to 3.1.2 as the third layer.
ConfigureOutputChannel({kCentre}, {{1000}});
// Ltf2 = Ltf4 + Ltb4 * gamma.
// Ltf3 = Ltf2 + Ls5 * w * delta.
ConfigureOutputChannel({kLtf3, kRtf3}, {{2644, 4914}});
ConfigureOutputChannel({kLFE}, {{12}});
// Down-mix to stereo as the second layer.
// L5 = L7.
// L3 = L5 + Ls5 * delta.
// L2 = L3 + (C - 3 dB).
ConfigureOutputChannel({kL2}, {{4822}});
// Down=mix to mono as the first layer.
// R5 = R7.
// R3 = R5 + Rs5 * delta.
// R2 = R3 + (C - 3 dB).
// M = (L2 - 6 dB) + (R2 - 6 dB).
ConfigureOutputChannel({kMono}, {{6130}});
TestDownMixing(
{.alpha = 1, .beta = .866, .gamma = .866, .delta = .866, .w = 0.25}, 6);
}
class DemixingModuleTest : public DemixingModuleTestBase,
public ::testing::Test {
public:
void ConfigureLosslessAudioFrameAndDecodedAudioFrame(
const std::list<ChannelLabel::Label>& labels,
const std::vector<std::vector<int32_t>>& pcm_samples,
DownMixingParams down_mixing_params = {
.alpha = 1, .beta = .866, .gamma = .866, .delta = .866, .w = 0.25}) {
// The substream ID itself does not matter. Generate a unique one.
const DecodedUleb128 substream_id = substream_id_to_labels_.size();
substream_id_to_labels_[substream_id] = labels;
// Configure a pair of audio frames and decoded audio frames. They share a
// lot of the same information for a lossless codec.
audio_frames_.push_back(AudioFrameWithData{
.obu = AudioFrameObu(ObuHeader(), substream_id, {}),
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.pcm_samples = pcm_samples,
.down_mixing_params = down_mixing_params,
});
decoded_audio_frames_.push_back(
DecodedAudioFrame{.substream_id = substream_id,
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = pcm_samples,
.down_mixing_params = down_mixing_params});
auto& expected_label_to_samples =
expected_id_to_labeled_decoded_frame_[kAudioElementId].label_to_samples;
// `raw_samples` is arranged in (time, channel axes). Arrange the samples
// associated with each channel by time. The demixing process never changes
// data for the input labels.
auto labels_iter = labels.begin();
for (int channel = 0; channel < labels.size(); ++channel) {
auto& samples_for_channel = expected_label_to_samples[*labels_iter];
samples_for_channel.reserve(pcm_samples.size());
for (auto tick : pcm_samples) {
samples_for_channel.push_back(
Int32ToNormalizedFloatingPoint<InternalSampleType>(tick[channel]));
}
labels_iter++;
}
}
void ConfiguredExpectedDemixingChannelFrame(
ChannelLabel::Label label,
const std::vector<int32_t>& expected_demixed_samples) {
std::vector<InternalSampleType> expected_demixed_samples_as_internal_type;
expected_demixed_samples_as_internal_type.reserve(
expected_demixed_samples.size());
for (int32_t sample : expected_demixed_samples) {
expected_demixed_samples_as_internal_type.push_back(
Int32ToNormalizedFloatingPoint<InternalSampleType>(sample));
}
// Configure the expected demixed channels. Typically the input `label`
// should have a "D_" prefix.
expected_id_to_labeled_decoded_frame_[kAudioElementId]
.label_to_samples[label] = expected_demixed_samples_as_internal_type;
}
void TestLosslessDemixing(int expected_number_of_down_mixers) {
TestCreateDemixingModule(expected_number_of_down_mixers);
const auto id_to_labeled_decoded_frame =
demixing_module_->DemixDecodedAudioSamples(decoded_audio_frames_);
ASSERT_THAT(id_to_labeled_decoded_frame, IsOk());
ASSERT_TRUE(id_to_labeled_decoded_frame->contains(kAudioElementId));
// Check that the demixed samples have the correct values.
const auto& actual_label_to_samples =
id_to_labeled_decoded_frame->at(kAudioElementId).label_to_samples;
const auto& expected_label_to_samples =
expected_id_to_labeled_decoded_frame_[kAudioElementId].label_to_samples;
EXPECT_EQ(actual_label_to_samples.size(), expected_label_to_samples.size());
for (const auto& [label, samples] : actual_label_to_samples) {
// Use `DoubleNear` with a tolerance because floating-point arithmetic
// introduces errors larger than allowed by `DoubleEq`.
constexpr double kErrorTolerance = 1e-14;
EXPECT_THAT(samples, Pointwise(DoubleNear(kErrorTolerance),
expected_label_to_samples.at(label)));
}
// Also, since this is lossless, we expect demixing the original samples
// should give the same result.
const auto id_to_labeled_frame =
demixing_module_->DemixOriginalAudioSamples(audio_frames_);
ASSERT_THAT(id_to_labeled_frame, IsOk());
ASSERT_TRUE(id_to_labeled_frame->contains(kAudioElementId));
EXPECT_EQ(id_to_labeled_frame->at(kAudioElementId).label_to_samples,
actual_label_to_samples);
}
protected:
std::list<AudioFrameWithData> audio_frames_;
std::list<DecodedAudioFrame> decoded_audio_frames_;
IdLabeledFrameMap expected_id_to_labeled_decoded_frame_;
}; // namespace
TEST(DemixingModule, DemixingOriginalAudioSamplesSucceedsWithEmptyInputs) {
const auto demixing_module =
DemixingModule::CreateForDownMixingAndReconstruction({});
ASSERT_THAT(demixing_module, IsOk());
EXPECT_THAT(demixing_module->DemixOriginalAudioSamples({}),
IsOkAndHolds(IsEmpty()));
}
TEST(DemixingModule, DemixingDecodedAudioSamplesSucceedsWithEmptyInputs) {
const auto demixing_module =
DemixingModule::CreateForDownMixingAndReconstruction({});
ASSERT_THAT(demixing_module, IsOk());
EXPECT_THAT(demixing_module->DemixDecodedAudioSamples({}),
IsOkAndHolds(IsEmpty()));
}
TEST_F(DemixingModuleTest, AmbisonicsHasNoDemixers) {
ConfigureAudioFrameMetadata({kA0, kA1, kA2, kA3});
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kA0}, {{1}});
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kA1}, {{1}});
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kA2}, {{1}});
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kA3}, {{1}});
TestLosslessDemixing(0);
}
TEST_F(DemixingModuleTest, S1ToS2Demixer) {
// The highest layer is stereo.
ConfigureAudioFrameMetadata({kL2, kR2});
// Mono is the lowest layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kMono}, {{750}, {1500}});
// Stereo is the next layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL2}, {{1000}, {2000}});
// Demixing recovers kDemixedR2
// D_R2 = M - (L2 - 6 dB) + 6 dB.
ConfiguredExpectedDemixingChannelFrame(kDemixedR2, {500, 1000});
TestLosslessDemixing(1);
}
TEST_F(DemixingModuleTest,
DemixOriginalAudioSamplesReturnsErrorIfAudioFrameIsMissingPcmSamples) {
ConfigureAudioFrameMetadata({kL2, kR2});
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kMono}, {{750}, {1500}});
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL2}, {{1000}, {2000}});
IdLabeledFrameMap unused_id_to_labeled_frame, id_to_labeled_decoded_frame;
TestCreateDemixingModule(1);
// Destroy the raw samples.
audio_frames_.back().pcm_samples = std::nullopt;
EXPECT_THAT(demixing_module_->DemixOriginalAudioSamples(audio_frames_),
Not(IsOk()));
}
TEST_F(DemixingModuleTest, S2ToS3Demixer) {
// The highest layer is 3.1.2.
ConfigureAudioFrameMetadata({kL3, kR3, kCentre, kLtf3, kRtf3});
// Stereo is the lowest layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL2, kR2},
{{70, 70}, {1700, 1700}});
// 3.1.2 as the next layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kCentre}, {{2000}, {1000}});
ConfigureLosslessAudioFrameAndDecodedAudioFrame(
{kLtf3, kRtf3}, {{99999, 99998}, {99999, 99998}});
// L3/R3 get demixed from the lower layers.
// L3 = L2 - (C - 3 dB).
// R3 = R2 - (C - 3 dB).
ConfiguredExpectedDemixingChannelFrame(kDemixedL3, {-1344, 993});
ConfiguredExpectedDemixingChannelFrame(kDemixedR3, {-1344, 993});
TestLosslessDemixing(1);
}
TEST_F(DemixingModuleTest, S3ToS5AndTf2ToT2Demixers) {
// Adding a (valid) layer on top of 3.1.2 will always result in both S3ToS5
// and Tf2ToT2 demixers.
// The highest layer is 5.1.2.
ConfigureAudioFrameMetadata({kL5, kR5, kCentre, kLtf2, kRtf2});
const DownMixingParams kDownMixingParams = {.delta = .866, .w = 0.25};
// 3.1.2 is the lowest layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL3, kR3}, {{18660, 28660}},
kDownMixingParams);
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kCentre}, {{100}},
kDownMixingParams);
ConfigureLosslessAudioFrameAndDecodedAudioFrame(
{kLtf3, kRtf3}, {{1000, 2000}}, kDownMixingParams);
// 5.1.2 as the next layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL5, kR5}, {{10000, 20000}},
kDownMixingParams);
// S3ToS5: Ls5/Rs5 get demixed from the lower layers.
// Ls5 = (1 / delta) * (L3 - L5).
// Rs5 = (1 / delta) * (R3 - R5).
ConfiguredExpectedDemixingChannelFrame(kDemixedLs5, {10000});
ConfiguredExpectedDemixingChannelFrame(kDemixedRs5, {10000});
// Tf2ToT2: Ltf2/Rtf2 get demixed from the lower layers.
// Ltf2 = Ltf3 - w * (L3 - L5).
// Rtf2 = Rtf3 - w * (R3 - R5).
ConfiguredExpectedDemixingChannelFrame(kDemixedLtf2, {-1165});
ConfiguredExpectedDemixingChannelFrame(kDemixedRtf2, {-165});
TestLosslessDemixing(2);
}
TEST_F(DemixingModuleTest, S5ToS7Demixer) {
// The highest layer is 7.1.0.
ConfigureAudioFrameMetadata({kL7, kR7, kCentre, kLss7, kRss7, kLrs7, kRrs7});
const DownMixingParams kDownMixingParams = {.alpha = 0.866, .beta = .866};
// 5.1.0 is the lowest layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL5, kR5}, {{100, 100}},
kDownMixingParams);
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kLs5, kRs5}, {{7794, 7794}},
kDownMixingParams);
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kCentre}, {{100}},
kDownMixingParams);
// 7.1.0 as the next layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame(
{kLss7, kRss7}, {{1000, 2000}}, kDownMixingParams);
// L7/R7 get demixed from the lower layers.
// L7 = R5.
// R7 = R5.
ConfiguredExpectedDemixingChannelFrame(kDemixedL7, {100});
ConfiguredExpectedDemixingChannelFrame(kDemixedR7, {100});
// Lrs7/Rrs7 get demixed from the lower layers.
// Lrs7 = (1 / beta) * (Ls5 - alpha * Lss7).
// Rrs7 = (1 / beta) * (Rs5 - alpha * Rss7).
ConfiguredExpectedDemixingChannelFrame(kDemixedLrs7, {8000});
ConfiguredExpectedDemixingChannelFrame(kDemixedRrs7, {7000});
TestLosslessDemixing(1);
}
TEST_F(DemixingModuleTest, T2ToT4Demixer) {
// The highest layer is 5.1.4.
ConfigureAudioFrameMetadata({kL5, kR5, kCentre, kLtf4, kRtf4});
const DownMixingParams kDownMixingParams = {.gamma = .866};
// 5.1.2 is the lowest layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL5, kR5}, {{100, 100}},
kDownMixingParams);
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kLs5, kRs5}, {{100, 100}},
kDownMixingParams);
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kCentre}, {{100}},
kDownMixingParams);
ConfigureLosslessAudioFrameAndDecodedAudioFrame(
{kLtf2, kRtf2}, {{8660, 17320}}, kDownMixingParams);
// 5.1.4 as the next layer.
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kLtf4, kRtf4}, {{866, 1732}},
kDownMixingParams);
// Ltb4/Rtb4 get demixed from the lower layers.
// Ltb4 = (1 / gamma) * (Ltf2 - Ltf4).
// Ttb4 = (1 / gamma) * (Ttf2 - Rtf4).
ConfiguredExpectedDemixingChannelFrame(kDemixedLtb4, {9000});
ConfiguredExpectedDemixingChannelFrame(kDemixedRtb4, {18000});
TestLosslessDemixing(1);
}
} // namespace
} // namespace iamf_tools