| /* |
| * Copyright 2024 Valve Corporation |
| * Copyright 2023 Alyssa Rosenzweig |
| * SPDX-License-Identifier: MIT |
| */ |
| |
| #pragma once |
| |
| /* |
| * This header adds definitions that are common between the CPU and the GPU for |
| * shared headers. It also fills in basic standard library holes for internal |
| * OpenCL. |
| */ |
| |
| #ifndef __OPENCL_VERSION__ |
| |
| /* The OpenCL version of this header defines many OpenCL versions of stdint.h |
| * and util/macros.h functions. #include both here for consistency in shared |
| * headers. |
| */ |
| #include <stdint.h> |
| #include "util/macros.h" |
| |
| /* Structures defined in common host/device headers that include device pointers |
| * need to resolve to a real pointer in OpenCL but an opaque 64-bit address on |
| * the host. The DEVICE macro facilitates that. |
| */ |
| #define DEVICE(type_) uint64_t |
| |
| /* However, inline functions defined in common host/device headers that take |
| * pointers need to resolve to pointers on either host or device. (Host pointers |
| * on the host, device pointers on the device.) This would be automatic with |
| * OpenCL generic pointers, but those can cause headaches and lose constantness, |
| * so these defines allow GLOBAL/CONST keywords to be used even in CPU code. |
| * Annoyingly, we can't use global/constant here because it conflicts with C++ |
| * standard library headers. |
| */ |
| #define GLOBAL |
| #define CONST const |
| |
| #else |
| |
| /* GenXML likes to use fp16. Since fp16 is supported by all grown up drivers, we |
| * just enable the extension everywhere. |
| */ |
| #pragma OPENCL EXTENSION cl_khr_fp16 : enable |
| |
| /* The OpenCL side of DEVICE must resolve to real pointer types, unlike |
| * the host version. |
| */ |
| #define DEVICE(type_) global type_ * |
| |
| /* Passthrough */ |
| #define GLOBAL global |
| #define CONST constant |
| |
| /* OpenCL lacks explicitly sized integer types, but we know the sizes of |
| * particular integer types. These typedefs allow defining common headers with |
| * explicit integer types (and therefore compatible data layouts). |
| */ |
| typedef ulong uint64_t; |
| typedef uint uint32_t; |
| typedef ushort uint16_t; |
| typedef uchar uint8_t; |
| |
| typedef long int64_t; |
| typedef int int32_t; |
| typedef short int16_t; |
| typedef char int8_t; |
| |
| /* OpenCL C lacks static_assert, a part of C11. This makes static_assert |
| * available on both host and device. It is defined as variadic to handle also |
| * no-message static_asserts (standardized in C23). |
| */ |
| #define _S(x) #x |
| #define _PASTE_(x, y) x##y |
| #define _PASTE(x, y) _PASTE_(x, y) |
| #define static_assert(_COND, ...) \ |
| typedef char _PASTE(static_assertion, __LINE__)[(_COND) ? 1 : -1] |
| |
| /* NIR's precompilation infrastructure requires specifying a workgroup size with |
| * the kernel, via reqd_work_group_size. Unfortunately, reqd_work_group_size has |
| * terrible ergonomics, so we provide these aliases instead. |
| */ |
| #define KERNEL3D(x, y, z) \ |
| __attribute__((reqd_work_group_size(x, y, z))) kernel void |
| |
| #define KERNEL2D(x, y) KERNEL3D(x, y, 1) |
| #define KERNEL(x) KERNEL2D(x, 1) |
| |
| /* stddef.h usually defines this. We don't have that on the OpenCL side but we |
| * can use the builtin. |
| */ |
| #define offsetof(x, y) __builtin_offsetof(x, y) |
| |
| /* This is not an exact match for the util/macros.h version but without the |
| * aligned(4) we get garbage code gen and in practice this is what you want. |
| */ |
| #define PACKED __attribute__((packed, aligned(4))) |
| |
| /* OpenCL C doesn't seem to have an equivalent for this but it doesn't matter. |
| * Compare util/macros.h |
| */ |
| #define ENUM_PACKED |
| |
| /* FILE * pointers can be useful in function signatures shared across |
| * host/device, but are meaningless in OpenCL. Turn them into void* to allow |
| * consistent prototype across host/device even though there won't be an actual |
| * file pointer on the device side. |
| */ |
| #define FILE void |
| |
| /* OpenCL C lacks a standard memcpy, but clang has one that will be plumbed into |
| * a NIR memcpy intrinsic. This is not a competent implementation of memcpy for |
| * large amounts of data, since it's necessarily single threaded, but memcpy is |
| * too useful for shared CPU/GPU code that it's worth making the standard |
| * library function work. |
| */ |
| #define memcpy __builtin_memcpy |
| |
| /* OpenCL C lacks a standard abort, so we plumb through the NIR intrinsic. */ |
| void nir_printf_abort(void); |
| static inline void abort(void) { nir_printf_abort(); } |
| |
| /* OpenCL C lacks a standard assert. We implement one on top of abort. We are |
| * careful to use a single printf so the lines don't get split up if multiple |
| * threads assert in parallel. |
| */ |
| #ifndef NDEBUG |
| #define _ASSERT_STRING(x) _ASSERT_STRING_INNER(x) |
| #define _ASSERT_STRING_INNER(x) #x |
| #define assert(x) if (!(x)) { \ |
| printf("Shader assertion fail at " __FILE__ ":" \ |
| _ASSERT_STRING(__LINE__) "\nExpected " #x "\n\n"); \ |
| nir_printf_abort(); \ |
| } |
| #else |
| #define assert(x) |
| #endif |
| |
| /* Core OpenCL C like likely/unlikely. We might be able to map to a clang built |
| * in though... |
| */ |
| #define likely(x) (x) |
| #define unlikely(x) (x) |
| |
| /* These duplicate the C standard library and are required for the |
| * u_intN_min/max implementations. |
| */ |
| #define UINT64_MAX 18446744073709551615ul |
| #define INT64_MAX 9223372036854775807l |
| |
| /* These duplicate util/macros.h. This could maybe be cleaned up */ |
| #define BITFIELD_BIT(b) (1u << b) |
| #define BITFIELD_MASK(m) (((m) == 32) ? 0xffffffff : ((1u << (m)) - 1)) |
| #define ASSERTED |
| #define ALWAYS_INLINE |
| #define UNUSED |
| |
| static inline int64_t |
| u_intN_max(unsigned bit_size) |
| { |
| assert(bit_size <= 64 && bit_size > 0); |
| return INT64_MAX >> (64 - bit_size); |
| } |
| |
| static inline int64_t |
| u_intN_min(unsigned bit_size) |
| { |
| return (-u_intN_max(bit_size)) - 1; |
| } |
| |
| static inline uint64_t |
| u_uintN_max(unsigned bit_size) |
| { |
| assert(bit_size <= 64 && bit_size > 0); |
| return UINT64_MAX >> (64 - bit_size); |
| } |
| |
| static inline uint |
| align(uint x, uint y) |
| { |
| return (x + y - 1) & ~(y - 1); |
| } |
| |
| static inline uint32_t |
| util_logbase2(uint32_t n) |
| { |
| return (31 - clz(n | 1)); |
| } |
| |
| static inline uint32_t |
| util_logbase2_ceil(uint32_t n) |
| { |
| return (n <= 1) ? 0 : 32 - clz(n - 1); |
| } |
| |
| #define BITFIELD64_MASK(x) ((x == 64) ? ~0ul : ((1ul << x) - 1)) |
| #define IS_POT(v) (((v) & ((v) - 1)) == 0) |
| #define IS_POT_NONZERO(v) ((v) != 0 && IS_POT(v)) |
| #define DIV_ROUND_UP(A, B) (((A) + (B) - 1) / (B)) |
| #define CLAMP(X, MIN, MAX) ((X) > (MIN) ? ((X) > (MAX) ? (MAX) : (X)) : (MIN)) |
| #define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1)) |
| |
| static inline uint32_t |
| fui(float f) |
| { |
| return as_uint(f); |
| } |
| |
| static inline float |
| uif(uint32_t ui) |
| { |
| return as_float(ui); |
| } |
| |
| #define CL_FLT_EPSILON 1.1920928955078125e-7f |
| |
| /* OpenCL C lacks roundf and llroundf, we can emulate it */ |
| static inline float roundf(float x) |
| { |
| return trunc(x + copysign(0.5f - 0.25f * CL_FLT_EPSILON, x)); |
| } |
| |
| static inline long long llroundf(float x) |
| { |
| return roundf(x); |
| } |
| |
| static inline uint16_t |
| _mesa_float_to_half(float f) |
| { |
| return as_ushort(convert_half(f)); |
| } |
| |
| static inline float |
| _mesa_half_to_float(uint16_t w) |
| { |
| return convert_float(as_half(w)); |
| } |
| |
| /* Duplicates u_math.h. We should make that header CL safe at some point... |
| */ |
| static inline int64_t |
| util_sign_extend(uint64_t val, unsigned width) |
| { |
| unsigned shift = 64 - width; |
| return (int64_t)(val << shift) >> shift; |
| } |
| |
| #endif |