blob: dc7d9e45c18df0b5cff1b5b706beef5b4fa57b68 [file] [log] [blame]
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sampler.h"
#include <cmath>
#include <cstdint>
#include <type_traits>
#include <vector>
#include "compactor_stack.h"
#include "gmock/gmock.h"
namespace dist_proc {
namespace aggregation {
namespace internal {
namespace {
using ::testing::_;
using ::testing::AnyOf;
using ::testing::Contains;
using ::testing::Eq;
using ::testing::Optional;
using ::testing::Pair;
class KllQuantileSamplerTest : public ::testing::Test {
protected:
KllQuantileSamplerTest() : random_() {
}
MTRandomGenerator random_;
};
TEST_F(KllQuantileSamplerTest, Add100Items) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
sampler.Add(4);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(Eq(4), Eq(1))));
sampler.Add(10);
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
for (int i = 0; i < 100; i++) {
sampler.Reset();
sampler.DoubleCapacity();
sampler.Add(4);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(Eq(4), Eq(1))));
sampler.Add(10);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(AnyOf(Eq(4), Eq(10)), Eq(2))));
sampler.Add(14);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(_, Eq(3))));
sampler.Add(24);
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
}
}
TEST_F(KllQuantileSamplerTest, ZeroItems) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
}
TEST_F(KllQuantileSamplerTest, OneItem) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.Add(4);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(Eq(4), Eq(1))));
}
TEST_F(KllQuantileSamplerTest, TwoInts) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
EXPECT_EQ(sampler.capacity(), 2);
sampler.Add(1);
sampler.Add(2);
const std::vector<int64_t>& compactor =
compactor_stack.compactors()[sampler.num_replaced_levels()];
EXPECT_THAT(compactor, AnyOf(Contains(Eq(1)), Contains(Eq(2))));
EXPECT_EQ(compactor_stack.num_stored_items(), 1);
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
}
TEST_F(KllQuantileSamplerTest, TwoIntsCapacityFour) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.DoubleCapacity();
EXPECT_EQ(sampler.capacity(), 4);
EXPECT_EQ(sampler.num_replaced_levels(), 2);
sampler.Add(1);
sampler.Add(2);
EXPECT_EQ(compactor_stack.num_stored_items(), 0);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(AnyOf(Eq(1), Eq(2)), Eq(2))));
}
TEST_F(KllQuantileSamplerTest, FourInts) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
EXPECT_EQ(sampler.capacity(), 2);
sampler.Add(1);
sampler.Add(2);
sampler.Add(3);
sampler.Add(4);
const std::vector<int64_t>& compactor =
compactor_stack.compactors()[sampler.num_replaced_levels()];
EXPECT_THAT(compactor, AnyOf(Contains(Eq(1)), Contains(Eq(2))));
EXPECT_THAT(compactor, AnyOf(Contains(Eq(3)), Contains(Eq(4))));
EXPECT_EQ(compactor_stack.num_stored_items(), 2);
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
}
TEST_F(KllQuantileSamplerTest, ThreeInts) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
EXPECT_EQ(sampler.capacity(), 2);
sampler.Add(1);
sampler.Add(2);
sampler.Add(3);
const std::vector<int64_t>& compactor =
compactor_stack.compactors()[sampler.num_replaced_levels()];
EXPECT_THAT(compactor, AnyOf(Contains(Eq(1)), Contains(Eq(2))));
EXPECT_EQ(compactor_stack.num_stored_items(), 1);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(Eq(3), Eq(1))));
}
TEST_F(KllQuantileSamplerTest, AddWithWeightZero) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.AddWithWeight(5, 0);
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
}
TEST_F(KllQuantileSamplerTest, AddWithWeightOne) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.AddWithWeight(5, 1);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(Eq(5), Eq(1))));
}
TEST_F(KllQuantileSamplerTest, AddWithWeightTwo) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.AddWithWeight(1, 2);
const std::vector<int64_t>& compactor =
compactor_stack.compactors()[sampler.num_replaced_levels()];
EXPECT_THAT(compactor, Contains(Eq(1)));
EXPECT_EQ(compactor_stack.num_stored_items(), 1);
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
}
TEST_F(KllQuantileSamplerTest, AddWithWeightThree) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.AddWithWeight(3, 3);
const std::vector<int64_t>& compactor =
compactor_stack.compactors()[sampler.num_replaced_levels()];
EXPECT_THAT(compactor, Contains(Eq(3)));
EXPECT_EQ(compactor_stack.num_stored_items(), 1);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(Eq(3), Eq(1))));
}
TEST_F(KllQuantileSamplerTest, WeightThreeNonEmptySampler) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.Add(1);
sampler.DoubleCapacity();
sampler.DoubleCapacity();
sampler.AddWithWeight(2, 3);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(AnyOf(Eq(1), Eq(2)), Eq(4))));
}
TEST_F(KllQuantileSamplerTest, WeightFiveNonEmptySampler) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.Add(1);
sampler.DoubleCapacity();
sampler.Add(2);
sampler.AddWithWeight(3, 5);
const std::vector<int64_t>& compactor =
compactor_stack.compactors()[sampler.num_replaced_levels()];
EXPECT_THAT(compactor, AnyOf(Contains(Eq(1)), Contains(Eq(2)), Contains(Eq(3))));
EXPECT_EQ(compactor_stack.num_stored_items(), 1);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(Eq(3), Eq(3))));
}
TEST_F(KllQuantileSamplerTest, DoubleCapacityBetweenAdds) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.Add(1);
sampler.DoubleCapacity();
EXPECT_EQ(sampler.capacity(), 4);
EXPECT_EQ(sampler.num_replaced_levels(), 2);
sampler.Add(2);
sampler.Add(3);
EXPECT_THAT(sampler.sampled_item_and_weight(),
Optional(Pair(AnyOf(Eq((1)), Eq((2)), Eq((3))), Eq(3))));
sampler.DoubleCapacity();
EXPECT_EQ(sampler.capacity(), 8);
EXPECT_EQ(sampler.num_replaced_levels(), 3);
sampler.Add(4);
sampler.Add(5);
EXPECT_THAT(sampler.sampled_item_and_weight(),
Optional(Pair(AnyOf(Eq((1)), Eq((2)), Eq((3)), Eq((4)), Eq((5))), Eq(5))));
}
TEST_F(KllQuantileSamplerTest, ResetZeroItems) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.Reset();
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
}
TEST_F(KllQuantileSamplerTest, ResetBetweenAddingOneItem) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.Add(1);
sampler.Reset();
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
sampler.Add(2);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(Eq(2), Eq(1))));
}
TEST_F(KllQuantileSamplerTest, ResetBetweenAddingTenItems) {
CompactorStack compactor_stack(1000, 100000, &random_);
KllSampler sampler(&compactor_stack);
sampler.DoubleCapacity();
EXPECT_EQ(sampler.capacity(), 4);
EXPECT_EQ(sampler.num_replaced_levels(), 2);
for (int i = 0; i < 10; i++) {
sampler.Add(i);
}
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(_, Eq(2))));
sampler.Reset();
EXPECT_THAT(sampler.sampled_item_and_weight(), Eq(std::nullopt));
EXPECT_EQ(sampler.capacity(), 2);
EXPECT_EQ(sampler.num_replaced_levels(), 1);
sampler.DoubleCapacity();
EXPECT_EQ(sampler.capacity(), 4);
EXPECT_EQ(sampler.num_replaced_levels(), 2);
for (int i = 0; i < 10; i++) {
sampler.Add(i);
}
EXPECT_EQ(compactor_stack.num_stored_items(), 4);
EXPECT_THAT(sampler.sampled_item_and_weight(), Optional(Pair(AnyOf(Eq(8), Eq(9)), Eq(2))));
}
class SamplerItemSeenTest : public ::testing::TestWithParam<int> {};
// Whenever making a larger change to the sampler remove the fixed seed for
// this test. You should see around 2% failure rate (among all num_item sizes)
// in this test when running it with a sufficiently large --num_test_runs.
//
// The test checks whether we generate all items from a set of [0, n)
// items in 2 * n * log(n) runs. The expected value for number of repetitions
// required is n * H_n (see coupon collector problem
// https://en.wikipedia.org/wiki/Coupon_collector%27s_problem).
//
// For a given n the probability that we see all items is:
// Stirling_2nd(runs, n) * n! / n^runs (n^runs is the total number
// of possible outcomes, Stirling 2nd is the number of possibilities to
// partition run elements into n non-empty buckets regardless of order and n!
// is needed to introduce the ordering).)
TEST_P(SamplerItemSeenTest, AllItemsSeen) {
const int num_items = GetParam();
auto random = MTRandomGenerator(10);
bool seen_items[1000] = {};
int num_repetitions = 2 * num_items * std::ceil(std::log(1 + num_items));
for (int i = 0; i < num_repetitions; i++) {
CompactorStack compactor_stack(1000, 100000, &random);
KllSampler sampler(&compactor_stack);
while (sampler.capacity() <= num_items) {
sampler.DoubleCapacity();
}
for (int j = 0; j < num_items; j++) {
sampler.Add(j);
}
seen_items[sampler.sampled_item_and_weight().value().first] = true;
EXPECT_EQ(sampler.sampled_item_and_weight().value().second, num_items);
}
for (int i = 0; i < num_items; i++) {
EXPECT_TRUE(seen_items[i]);
}
}
INSTANTIATE_TEST_SUITE_P(SamplerEveryItemSeenTestCases, SamplerItemSeenTest,
testing::Range(1, 1000, 10));
} // namespace
} // namespace internal
} // namespace aggregation
} // namespace dist_proc