blob: 68ab45b33d186162930065def2bcd3420efb698a [file] [log] [blame]
import operator
import sys
from abc import abstractmethod
from copy import copy
from enum import Enum
from functools import total_ordering
from itertools import chain
from typing import (
Any,
Callable,
Dict,
FrozenSet,
Iterable,
Iterator,
List,
Optional,
Tuple,
)
from tools.testing.test_run import TestRun, TestRuns
# Note: Keep the implementation of Relevance private to this file so
# that it's easy to change in the future as we discover what's needed
@total_ordering
class Relevance(Enum):
HIGH = 4
NONE = 3
PROBABLE = 2
UNLIKELY = 1
UNRANKED = 0
def __eq__(self, other: object) -> bool:
if not isinstance(other, Relevance):
return False
return self.value == other.value
def __lt__(self, other: object) -> bool:
if not isinstance(other, Relevance):
raise NotImplementedError(f"Can't compare {self} to {other}")
return self.value < other.value
@staticmethod
def priority_traversal() -> Iterator["Relevance"]:
yield Relevance.HIGH
yield Relevance.NONE
yield Relevance.PROBABLE
yield Relevance.UNLIKELY
yield Relevance.UNRANKED
METRIC_RELEVANCE_GROUP = "relevance_group"
METRIC_ORDER_WITHIN_RELEVANCE_GROUP = "order_within_relevance_group"
METRIC_NUM_TESTS_IN_RELEVANCE_GROUP = "num_tests_in_relevance_group"
METRIC_ORDER_OVERALL = "order_overall"
METRIC_HEURISTIC_NAME = "heuristic_name"
class TestPrioritizations:
"""
Describes the results of whether heuristics consider a test relevant or not.
All the different ranks of tests are disjoint, meaning a test can only be in one category, and they are only
declared at initization time.
A list can be empty if a heuristic doesn't consider any tests to be in that category.
Important: Lists of tests must always be returned in a deterministic order,
otherwise it breaks the test sharding logic
"""
_test_priorities: List[List[TestRun]]
_original_tests: FrozenSet[str]
def __init__(
self,
tests_being_ranked: Iterable[str], # The tests that are being prioritized.
high_relevance: Optional[List[str]] = None,
probable_relevance: Optional[List[str]] = None,
unranked_relevance: Optional[List[str]] = None,
unlikely_relevance: Optional[List[str]] = None,
no_relevance: Optional[List[str]] = None,
) -> None:
self._original_tests = frozenset(tests_being_ranked)
self._test_priorities = [[] for _ in range(5)]
# Setup the initial priorities
self._test_priorities[Relevance.UNRANKED.value] = [
TestRun(test) for test in tests_being_ranked
]
for test in high_relevance or []:
self.set_test_relevance(TestRun(test), Relevance.HIGH)
for test in probable_relevance or []:
self.set_test_relevance(TestRun(test), Relevance.PROBABLE)
for test in unranked_relevance or []:
self.set_test_relevance(TestRun(test), Relevance.UNRANKED)
for test in unlikely_relevance or []:
self.set_test_relevance(TestRun(test), Relevance.UNLIKELY)
for test in no_relevance or []:
self.set_test_relevance(TestRun(test), Relevance.NONE)
self.validate_test_priorities()
def _traverse_priorities(self) -> Iterator[Tuple[Relevance, List[TestRun]]]:
for relevance in Relevance.priority_traversal():
yield (relevance, self._test_priorities[relevance.value])
def get_pointer_to_test(self, test_run: TestRun) -> Iterator[Tuple[Relevance, int]]:
"""
Returns all test runs that contain any subset of the given test_run and their current relevance.
self._test_priorities should NOT have items added or removed form it while iterating over the
results of this function.
"""
# Find a test run that contains the given TestRun and it's relevance.
found_match = False
for relevance, tests in self._traverse_priorities():
for idx, existing_test_run in enumerate(tests):
# Does the existing test run contain any of the test we're looking for?
shared_test = existing_test_run & test_run
if not shared_test.is_empty():
found_match = True
yield (Relevance(relevance), idx)
if not found_match:
raise ValueError(f"Test {test_run} not found in any relevance group")
def _update_test_relevance(
self,
test_run: TestRun,
new_relevance: Relevance,
acceptable_relevance_fn: Callable[[Relevance, Relevance], bool],
) -> None:
"""
Updates the test run's relevance to the new relevance.
If the tests in the test run were previously split up into multiple test runs, all the chunks at a lower
relevance will be merged into one new test run at the new relevance, appended to the end of the relevance group.
However, any tests in a test run that are already at the desired relevance will be left alone, keeping it's
original place in the relevance group.
"""
if test_run.test_file not in self._original_tests:
return # We don't need this test
# The tests covered by test_run could potentially have been split up into
# multiple test runs, each at a different relevance. Let's make sure to bring
# all of them up to the minimum relevance
upgraded_tests = TestRun.empty()
tests_to_remove = []
for curr_relevance, test_run_idx in self.get_pointer_to_test(test_run):
if acceptable_relevance_fn(curr_relevance, new_relevance):
# This test is already at the desired relevance
continue # no changes needed
test_run_to_rerank = self._test_priorities[curr_relevance.value][
test_run_idx
]
# Remove the requested tests from their current relevance group, to be added to the new one
remaining_tests = test_run_to_rerank - test_run
upgraded_tests |= test_run_to_rerank & test_run
# Remove the tests that are being upgraded
if remaining_tests:
self._test_priorities[curr_relevance.value][
test_run_idx
] = remaining_tests
else:
# List traversal prevents us from deleting these immediately, so note them for later
tests_to_remove.append((curr_relevance, test_run_idx))
for relevance, test_idx in tests_to_remove:
del self._test_priorities[relevance.value][test_idx]
# And add them to the desired relevance group
if upgraded_tests:
self._test_priorities[new_relevance.value].append(upgraded_tests)
def set_test_relevance(self, test_run: TestRun, new_relevance: Relevance) -> None:
return self._update_test_relevance(test_run, new_relevance, operator.eq)
def raise_test_relevance(self, test_run: TestRun, new_relevance: Relevance) -> None:
return self._update_test_relevance(test_run, new_relevance, operator.ge)
def validate_test_priorities(self) -> None:
# Union all TestRuns that contain include/exclude pairs
all_tests = self.get_all_tests()
files = {}
for test in all_tests:
if test.test_file not in files:
files[test.test_file] = copy(test)
else:
files[test.test_file] |= test
for test in files.values():
assert (
test.is_full_file()
), f"All includes should have been excluded elsewhere, and vice versa. Test run `{test}` violates that"
# Ensure that the set of tests in the TestPrioritizations is identical to the set of tests passed in
assert self._original_tests == set(
files.keys()
), "The set of tests in the TestPrioritizations must be identical to the set of tests passed in"
def integrate_priorities(self, other: "TestPrioritizations") -> None:
"""
Integrates priorities from another TestPrioritizations object.
The final result takes all tests from the `self` and rearranges them based on priorities from `other`.
Currently it will only raise the priority of a test, never lower it.
"""
assert (
self._original_tests == other._original_tests
), "Both tests should stem from the same original test list"
for relevance, _ in other._traverse_priorities():
if relevance > Relevance.UNRANKED:
for test in other._test_priorities[relevance.value]:
self.raise_test_relevance(test, relevance)
self.validate_test_priorities()
return
def get_all_tests(self) -> TestRuns:
"""Returns all tests in the TestPrioritizations"""
return tuple(chain(*self._test_priorities))
def get_prioritized_tests(self) -> TestRuns:
return self.get_high_relevance_tests() + self.get_probable_relevance_tests()
def get_high_relevance_tests(self) -> TestRuns:
return tuple(test for test in self._test_priorities[Relevance.HIGH.value])
def get_probable_relevance_tests(self) -> TestRuns:
return tuple(test for test in self._test_priorities[Relevance.PROBABLE.value])
def get_unranked_relevance_tests(self) -> TestRuns:
return tuple(test for test in self._test_priorities[Relevance.UNRANKED.value])
def get_unlikely_relevance_tests(self) -> TestRuns:
return tuple(test for test in self._test_priorities[Relevance.UNLIKELY.value])
def get_none_relevance_tests(self) -> TestRuns:
return tuple(test for test in self._test_priorities[Relevance.NONE.value])
def print_info(self) -> None:
def _print_tests(label: str, tests: List[TestRun]) -> None:
if not tests:
return
print(f"{label} tests ({len(tests)}):")
for test in tests:
if test in tests:
print(f" {test}")
for relevance_group, tests in self._traverse_priorities():
_print_tests(f"{Relevance(relevance_group).name.title()} Relevance", tests)
def _get_test_relevance_group(self, test_run: TestRun) -> Relevance:
"""
Returns the relevance of the given test run.
If the heuristic split this test run among multiple runs, then return the
highest relevance of any of the test runs.
"""
for relevance_group, tests in self._traverse_priorities():
# Different heuristics may result in a given test file being split
# into different test runs, so look for the overlapping tests to
# find the match
if any(t & test_run for t in tests):
return Relevance(relevance_group)
raise ValueError(f"Test {test_run} not found in any relevance group")
def _get_test_order(self, test_run: TestRun) -> int:
"""
Returns the rank this heuristic suggested for the test run.
If the heuristic split this test run among multiple runs, then return the
highest relevance of any of the test runs.
"""
base_rank = 0
for _, relevance_group_tests in self._traverse_priorities():
for idx, test in enumerate(relevance_group_tests):
# Different heuristics may result in a given test file being split
# into different test runs, so look for the overlapping tests to
# find the match
if test & test_run:
return base_rank + idx
base_rank += len(relevance_group_tests)
raise ValueError(f"Test {test_run} not found in any relevance group")
def _get_test_order_within_relevance_group(self, test_run: TestRun) -> int:
"""
Returns the highest test order of any test class within the same relevance group.
If the heuristic split this test run among multiple runs, then return the
highest relevance of any of the test runs.
"""
for _, relevance_group_tests in self._traverse_priorities():
for idx, test in enumerate(relevance_group_tests):
# Different heuristics may result in a given test file being split
# into different test runs, so look for the overlapping tests to
# find the match
if test & test_run:
return idx
raise ValueError(f"Test {test_run} not found in any relevance group")
def get_priority_info_for_test(self, test_run: TestRun) -> Dict[str, Any]:
"""Given a failing test, returns information about it's prioritization that we want to emit in our metrics."""
relevance = self._get_test_relevance_group(test_run)
return {
METRIC_RELEVANCE_GROUP: relevance.name,
METRIC_ORDER_WITHIN_RELEVANCE_GROUP: self._get_test_order_within_relevance_group(
test_run
),
METRIC_NUM_TESTS_IN_RELEVANCE_GROUP: len(
self._test_priorities[relevance.value]
),
METRIC_ORDER_OVERALL: self._get_test_order(test_run),
}
class AggregatedHeuristics:
"""
Aggregates the results across all heuristics.
It saves the individual results from each heuristic and exposes an aggregated view.
"""
_heuristic_results: Dict[
"HeuristicInterface", TestPrioritizations
] # Key is the Heuristic's name. Dicts will preserve the order of insertion, which is important for sharding
unranked_tests: Tuple[str, ...]
def __init__(self, unranked_tests: List[str]) -> None:
self.unranked_tests = tuple(unranked_tests)
self._heuristic_results = {}
def add_heuristic_results(
self, heuristic: "HeuristicInterface", heuristic_results: TestPrioritizations
) -> None:
if heuristic in self._heuristic_results:
raise ValueError(f"We already have heuristics for {heuristic.name}")
self._heuristic_results[heuristic] = heuristic_results
def get_aggregated_priorities(
self, include_trial: bool = False
) -> TestPrioritizations:
"""
Returns the aggregated priorities across all heuristics.
"""
aggregated_priorities = TestPrioritizations(
tests_being_ranked=self.unranked_tests
)
for heuristic, heuristic_results in self._heuristic_results.items():
if heuristic.trial_mode and not include_trial:
continue
aggregated_priorities.integrate_priorities(heuristic_results)
return aggregated_priorities
def get_test_stats(self, test: TestRun) -> Dict[str, Any]:
"""
Returns the aggregated statistics for a given test.
"""
stats: Dict[str, Any] = {
"test_name": test.test_file,
"test_filters": test.get_pytest_filter(),
}
# Get baseline metrics assuming we didn't have any TD heuristics
baseline_priorities = TestPrioritizations(
tests_being_ranked=self.unranked_tests
)
baseline_stats = baseline_priorities.get_priority_info_for_test(test)
baseline_stats["heuristic_name"] = "baseline"
stats["without_heuristics"] = baseline_stats
# Get metrics about the heuristics used
heuristics = []
# Figure out which heuristic gave this test the highest priority (if any)
highest_ranking_heuristic = None
highest_ranking_heuristic_order: int = sys.maxsize
# And figure out how many heuristics suggested prioritizing this test
num_heuristics_prioritized_by = 0
for heuristic, heuristic_results in self._heuristic_results.items():
metrics = heuristic_results.get_priority_info_for_test(test)
metrics["heuristic_name"] = heuristic.name
metrics["trial_mode"] = heuristic.trial_mode
heuristics.append(metrics)
if not heuristic.trial_mode and heuristic_results._get_test_relevance_group(
test
) in [
Relevance.HIGH,
Relevance.PROBABLE,
]:
num_heuristics_prioritized_by += 1
# "highest_ranking_heuristic" should only consider heuristics that actually prioritize the test.
# Sometimes an UNRANKED heuristic could be have an overall order above a PRIORITIZED heuristic
# because it was randomly sorted higher initially, while the heuristic that actually prioritized it
# used other data to determined it to be slighlty less relevant than other tests.
if metrics[METRIC_ORDER_OVERALL] < highest_ranking_heuristic_order:
highest_ranking_heuristic = heuristic.name
highest_ranking_heuristic_order = metrics[METRIC_ORDER_OVERALL]
stats["heuristics"] = heuristics
# Easier to compute here than in rockset
stats["num_heuristics_prioritized_by"] = num_heuristics_prioritized_by
stats[
"aggregated"
] = self.get_aggregated_priorities().get_priority_info_for_test(test)
stats["aggregated_trial"] = self.get_aggregated_priorities(
include_trial=True
).get_priority_info_for_test(test)
if highest_ranking_heuristic:
stats["highest_ranking_heuristic"] = highest_ranking_heuristic
return stats
class HeuristicInterface:
"""
Interface for all heuristics.
"""
description: str
# When trial mode is set to True, this heuristic's predictions will not be used
# to reorder tests. It's results will however be emitted in the metrics.
trial_mode: bool
@abstractmethod
def __init__(self, **kwargs: Any) -> None:
self.trial_mode = kwargs.get("trial_mode", False) # type: ignore[assignment]
@abstractmethod
def get_test_priorities(self, tests: List[str]) -> TestPrioritizations:
"""
Returns the prioritizations for the given tests.
The set of test in TestPrioritizations _must_ be identical to the set of tests passed in.
"""
pass
@property
def name(self) -> str:
return self.__class__.__name__
def __str__(self) -> str:
return self.name
@abstractmethod
def get_prediction_confidence(self, tests: List[str]) -> Dict[str, float]:
"""
Like get_test_priorities, but instead returns a float ranking ranging
from -1 to 1, where negative means skip, positive means run, 0 means no
idea, and magnitude = how confident the heuristic is. Used by
AggregatedHeuristicsRankings.
"""
pass