| // Copyright 2019 Google LLC |
| // |
| // This source code is licensed under the BSD-style license found in the |
| // LICENSE file in the root directory of this source tree. |
| |
| #pragma once |
| |
| #include <cstddef> |
| #include <cstdint> |
| |
| #include <xnnpack.h> |
| #include <xnnpack/allocator.h> |
| |
| #include <benchmark/benchmark.h> |
| |
| namespace benchmark { |
| namespace utils { |
| |
| uint32_t WipeCache(); |
| uint32_t PrefetchToL1(const void* ptr, size_t size); |
| |
| // Disable support for denormalized numbers in floating-point units. |
| void DisableDenormals(); |
| |
| // Return clock rate, in Hz, for the currently used logical processor. |
| uint64_t GetCurrentCpuFrequency(); |
| |
| // Return maximum (across all cores/clusters/sockets) last level cache size. |
| // Can overestimate, but not underestimate LLC size. |
| size_t GetMaxCacheSize(); |
| |
| // Set number of elements for a unary elementwise microkernel such that: |
| // - It is divisible by 2, 3, 4, 5, 6. |
| // - It is divisible by AVX512 width. |
| // - Total memory footprint does not exceed the characteristic cache size for |
| // the architecture. |
| template<class InType, class OutType> |
| void UnaryElementwiseParameters(benchmark::internal::Benchmark* benchmark) { |
| benchmark->ArgName("N"); |
| |
| size_t characteristic_l1 = 32 * 1024; |
| size_t characteristic_l2 = 256 * 1024; |
| #if XNN_ARCH_ARM |
| characteristic_l1 = 16 * 1024; |
| characteristic_l2 = 128 * 1024; |
| #endif // XNN_ARCH_ARM |
| |
| const size_t elementwise_size = sizeof(InType) + sizeof(OutType); |
| benchmark->Arg(characteristic_l1 / elementwise_size / 960 * 960); |
| benchmark->Arg(characteristic_l2 / elementwise_size / 960 * 960); |
| } |
| |
| // Set number of elements for a binary elementwise microkernel such that: |
| // - It is divisible by 2, 3, 4, 5, 6. |
| // - It is divisible by AVX512 width. |
| // - Total memory footprint does not exceed the characteristic cache size for |
| // the architecture. |
| template<class InType, class OutType> |
| void BinaryElementwiseParameters(benchmark::internal::Benchmark* benchmark) { |
| benchmark->ArgName("N"); |
| |
| size_t characteristic_l1 = 32 * 1024; |
| size_t characteristic_l2 = 256 * 1024; |
| #if XNN_ARCH_ARM |
| characteristic_l1 = 16 * 1024; |
| characteristic_l2 = 128 * 1024; |
| #endif // XNN_ARCH_ARM |
| |
| const size_t elementwise_size = 2 * sizeof(InType) + sizeof(OutType); |
| benchmark->Arg(characteristic_l1 / elementwise_size / 960 * 960); |
| benchmark->Arg(characteristic_l2 / elementwise_size / 960 * 960); |
| } |
| |
| // Set multi-threading parameters appropriate for the processor. |
| void MultiThreadingParameters(benchmark::internal::Benchmark* benchmark); |
| |
| typedef bool (*IsaCheckFunction)(benchmark::State& state); |
| |
| // Check if either ARM VFPv2 or VFPv3 extension is supported. |
| // If VFP is unsupported, report error in benchmark state, and return false. |
| bool CheckVFP(benchmark::State& state); |
| |
| // Check if ARMv6 extensions are supported. |
| // If ARMv6 extensions are unsupported, report error in benchmark state, and return false. |
| bool CheckARMV6(benchmark::State& state); |
| |
| // Check if ARM NEON extension is supported. |
| // If NEON is unsupported, report error in benchmark state, and return false. |
| bool CheckNEON(benchmark::State& state); |
| |
| // Check if ARM NEON-FP16 extension is supported. |
| // If NEON-FP16 is unsupported, report error in benchmark state, and return false. |
| bool CheckNEONFP16(benchmark::State& state); |
| |
| // Check if ARM NEON-FMA extension is supported. |
| // If NEON-FMA is unsupported, report error in benchmark state, and return false. |
| bool CheckNEONFMA(benchmark::State& state); |
| |
| // Check if ARMv8 NEON instructions are supported. |
| // If ARMv8 NEON is unsupported, report error in benchmark state, and return false. |
| bool CheckNEONV8(benchmark::State& state); |
| |
| // Check if ARM NEON-FP16-ARITH extension is supported. |
| // If NEON-FP16-ARITH is unsupported, report error in benchmark state, and return false. |
| bool CheckNEONFP16ARITH(benchmark::State& state); |
| |
| // Check if ARM NEON-BF16 extension is supported. |
| // If NEON-BF16 is unsupported, report error in benchmark state, and return false. |
| bool CheckNEONBF16(benchmark::State& state); |
| |
| // Check if ARM DOT extension is supported. |
| // If DOT is unsupported, report error in benchmark state, and return false. |
| bool CheckNEONDOT(benchmark::State& state); |
| |
| // Check if x86 SSSE3 extension is supported. |
| // If SSSE3 is unsupported, report error in benchmark state, and return false. |
| bool CheckSSSE3(benchmark::State& state); |
| |
| // Check if x86 SSE4.1 extension is supported. |
| // If SSE4.1 is unsupported, report error in benchmark state, and return false. |
| bool CheckSSE41(benchmark::State& state); |
| |
| // Check if x86 AVX extension is supported. |
| // If AVX is unsupported, report error in benchmark state, and return false. |
| bool CheckAVX(benchmark::State& state); |
| |
| // Check if x86 F16C extension is supported. |
| // If F16C is unsupported, report error in benchmark state, and return false. |
| bool CheckF16C(benchmark::State& state); |
| |
| // Check if x86 XOP extension is supported. |
| // If XOP is unsupported, report error in benchmark state, and return false. |
| bool CheckXOP(benchmark::State& state); |
| |
| // Check if x86 FMA3 extension is supported. |
| // If FMA3 is unsupported, report error in benchmark state, and return false. |
| bool CheckFMA3(benchmark::State& state); |
| |
| // Check if x86 AVX2 extension is supported. |
| // If AVX2 is unsupported, report error in benchmark state, and return false. |
| bool CheckAVX2(benchmark::State& state); |
| |
| // Check if x86 AVX512F extension is supported. |
| // If AVX512F is unsupported, report error in benchmark state, and return false. |
| bool CheckAVX512F(benchmark::State& state); |
| |
| // Check if x86 SKX-level AVX512 extensions (AVX512F, AVX512CD, AVX512BW, AVX512DQ, and AVX512VL) are supported. |
| // If SKX-level AVX512 extensions are unsupported, report error in benchmark state, and return false. |
| bool CheckAVX512SKX(benchmark::State& state); |
| |
| template <class T> |
| inline T DivideRoundUp(T x, T q) { |
| return x / q + T(x % q != 0); |
| } |
| |
| template <class T> |
| inline T RoundUp(T x, T q) { |
| return q * DivideRoundUp(x, q); |
| } |
| |
| template <class T> |
| inline T Doz(T a, T b) { |
| return a >= b ? a - b : T(0); |
| } |
| |
| // A struct that uses RAII pattern to allocate and release code memory. |
| struct CodeMemoryHelper { |
| CodeMemoryHelper(); |
| ~CodeMemoryHelper(); |
| |
| xnn_code_buffer buffer; |
| xnn_status status; |
| }; |
| |
| } // namespace utils |
| } // namespace benchmark |