src/compiler/libcl/libcl.h - platform/external/mesa3d - Git at Google

 /*
  * Copyright 2024 Valve Corporation
  * Copyright 2023 Alyssa Rosenzweig
  * SPDX-License-Identifier: MIT
  */

 #pragma once

 /*
  * This header adds definitions that are common between the CPU and the GPU for
  * shared headers. It also fills in basic standard library holes for internal
  * OpenCL.
  */

 #ifndef __OPENCL_VERSION__

 /* The OpenCL version of this header defines many OpenCL versions of stdint.h
  * and util/macros.h functions. #include both here for consistency in shared
  * headers.
  */
 #include <stdint.h>
 #include "util/macros.h"

 /* Structures defined in common host/device headers that include device pointers
  * need to resolve to a real pointer in OpenCL but an opaque 64-bit address on
  * the host. The DEVICE macro facilitates that.
  */
 #define DEVICE(type_) uint64_t

 /* However, inline functions defined in common host/device headers that take
  * pointers need to resolve to pointers on either host or device. (Host pointers
  * on the host, device pointers on the device.) This would be automatic with
  * OpenCL generic pointers, but those can cause headaches and lose constantness,
  * so these defines allow GLOBAL/CONST keywords to be used even in CPU code.
  * Annoyingly, we can't use global/constant here because it conflicts with C++
  * standard library headers.
  */
 #define GLOBAL
 #define CONST const

 #else

 /* GenXML likes to use fp16. Since fp16 is supported by all grown up drivers, we
  * just enable the extension everywhere.
  */
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable

 /* The OpenCL side of DEVICE must resolve to real pointer types, unlike
  * the host version.
  */
 #define DEVICE(type_)   global type_ *

 /* Passthrough */
 #define GLOBAL global
 #define CONST constant

 /* OpenCL lacks explicitly sized integer types, but we know the sizes of
  * particular integer types. These typedefs allow defining common headers with
  * explicit integer types (and therefore compatible data layouts).
  */
 typedef ulong uint64_t;
 typedef uint uint32_t;
 typedef ushort uint16_t;
 typedef uchar uint8_t;

 typedef long int64_t;
 typedef int int32_t;
 typedef short int16_t;
 typedef char int8_t;

 /* OpenCL C lacks static_assert, a part of C11. This makes static_assert
  * available on both host and device. It is defined as variadic to handle also
  * no-message static_asserts (standardized in C23).
  */
 #define _S(x) #x
 #define _PASTE_(x, y) x##y
 #define _PASTE(x, y) _PASTE_(x, y)
 #define static_assert(_COND, ...)                                              \
    typedef char _PASTE(static_assertion, __LINE__)[(_COND) ? 1 : -1]

 /* NIR's precompilation infrastructure requires specifying a workgroup size with
  * the kernel, via reqd_work_group_size. Unfortunately, reqd_work_group_size has
  * terrible ergonomics, so we provide these aliases instead.
  */
 #define KERNEL3D(x, y, z)                                                      \
    __attribute__((reqd_work_group_size(x, y, z))) kernel void

 #define KERNEL2D(x, y)   KERNEL3D(x, y, 1)
 #define KERNEL(x)        KERNEL2D(x, 1)

 /* stddef.h usually defines this. We don't have that on the OpenCL side but we
  * can use the builtin.
  */
 #define offsetof(x, y) __builtin_offsetof(x, y)

 /* This is not an exact match for the util/macros.h version but without the
  * aligned(4) we get garbage code gen and in practice this is what you want.
  */
 #define PACKED __attribute__((packed, aligned(4)))

 /* OpenCL C doesn't seem to have an equivalent for this but it doesn't matter.
  * Compare util/macros.h
  */
 #define ENUM_PACKED

 /* FILE * pointers can be useful in function signatures shared across
  * host/device, but are meaningless in OpenCL. Turn them into void* to allow
  * consistent prototype across host/device even though there won't be an actual
  * file pointer on the device side.
  */
 #define FILE void

 /* OpenCL C lacks a standard memcpy, but clang has one that will be plumbed into
  * a NIR memcpy intrinsic. This is not a competent implementation of memcpy for
  * large amounts of data, since it's necessarily single threaded, but memcpy is
  * too useful for shared CPU/GPU code that it's worth making the standard
  * library function work.
  */
 #define memcpy __builtin_memcpy

 /* OpenCL C lacks a standard abort, so we plumb through the NIR intrinsic. */
 void nir_printf_abort(void);
 static inline void abort(void) { nir_printf_abort(); }

 /* OpenCL C lacks a standard assert. We implement one on top of abort. We are
  * careful to use a single printf so the lines don't get split up if multiple
  * threads assert in parallel.
  */
 #ifndef NDEBUG
 #define _ASSERT_STRING(x) _ASSERT_STRING_INNER(x)
 #define _ASSERT_STRING_INNER(x) #x
 #define assert(x) if (!(x)) { \
    printf("Shader assertion fail at " __FILE__ ":" \
           _ASSERT_STRING(__LINE__) "\nExpected " #x "\n\n"); \
    nir_printf_abort(); \
 }
 #else
 #define assert(x)
 #endif

 /* Core OpenCL C like likely/unlikely. We might be able to map to a clang built
  * in though...
  */
 #define likely(x) (x)
 #define unlikely(x) (x)

 /* These duplicate the C standard library and are required for the
  * u_intN_min/max implementations.
  */
 #define UINT64_MAX 18446744073709551615ul
 #define INT64_MAX 9223372036854775807l

 /* These duplicate util/macros.h. This could maybe be cleaned up */
 #define BITFIELD_BIT(b)  (1u << b)
 #define BITFIELD_MASK(m) (((m) == 32) ? 0xffffffff : ((1u << (m)) - 1))
 #define ASSERTED
 #define ALWAYS_INLINE
 #define UNUSED

 static inline int64_t
 u_intN_max(unsigned bit_size)
 {
    assert(bit_size <= 64 && bit_size > 0);
    return INT64_MAX >> (64 - bit_size);
 }

 static inline int64_t
 u_intN_min(unsigned bit_size)
 {
    return (-u_intN_max(bit_size)) - 1;
 }

 static inline uint64_t
 u_uintN_max(unsigned bit_size)
 {
    assert(bit_size <= 64 && bit_size > 0);
    return UINT64_MAX >> (64 - bit_size);
 }

 static inline uint
 align(uint x, uint y)
 {
    return (x + y - 1) & ~(y - 1);
 }

 static inline uint32_t
 util_logbase2(uint32_t n)
 {
    return (31 - clz(n | 1));
 }

 static inline uint32_t
 util_logbase2_ceil(uint32_t n)
 {
    return (n <= 1) ? 0 : 32 - clz(n - 1);
 }

 #define BITFIELD64_MASK(x) ((x == 64) ? ~0ul : ((1ul << x) - 1))
 #define IS_POT(v)          (((v) & ((v) - 1)) == 0)
 #define IS_POT_NONZERO(v)  ((v) != 0 && IS_POT(v))
 #define DIV_ROUND_UP(A, B)      (((A) + (B) - 1) / (B))
 #define CLAMP(X, MIN, MAX)      ((X) > (MIN) ? ((X) > (MAX) ? (MAX) : (X)) : (MIN))
 #define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1))

 static inline uint32_t
 fui(float f)
 {
    return as_uint(f);
 }

 static inline float
 uif(uint32_t ui)
 {
    return as_float(ui);
 }

 #define CL_FLT_EPSILON 1.1920928955078125e-7f

 /* OpenCL C lacks roundf and llroundf, we can emulate it */
 static inline float roundf(float x)
 {
    return trunc(x + copysign(0.5f - 0.25f * CL_FLT_EPSILON, x));
 }

 static inline long long llroundf(float x)
 {
    return roundf(x);
 }

 static inline uint16_t
 _mesa_float_to_half(float f)
 {
    return as_ushort(convert_half(f));
 }

 static inline float
 _mesa_half_to_float(uint16_t w)
 {
    return convert_float(as_half(w));
 }

 /* Duplicates u_math.h. We should make that header CL safe at some point...
  */
 static inline int64_t
 util_sign_extend(uint64_t val, unsigned width)
 {
    unsigned shift = 64 - width;
    return (int64_t)(val << shift) >> shift;
 }

 #endif
	/*
	* Copyright 2024 Valve Corporation
	* Copyright 2023 Alyssa Rosenzweig
	* SPDX-License-Identifier: MIT
	*/

	#pragma once

	/*
	* This header adds definitions that are common between the CPU and the GPU for
	* shared headers. It also fills in basic standard library holes for internal
	* OpenCL.
	*/

	#ifndef __OPENCL_VERSION__

	/* The OpenCL version of this header defines many OpenCL versions of stdint.h
	* and util/macros.h functions. #include both here for consistency in shared
	* headers.
	*/
	#include <stdint.h>
	#include "util/macros.h"

	/* Structures defined in common host/device headers that include device pointers
	* need to resolve to a real pointer in OpenCL but an opaque 64-bit address on
	* the host. The DEVICE macro facilitates that.
	*/
	#define DEVICE(type_) uint64_t

	/* However, inline functions defined in common host/device headers that take
	* pointers need to resolve to pointers on either host or device. (Host pointers
	* on the host, device pointers on the device.) This would be automatic with
	* OpenCL generic pointers, but those can cause headaches and lose constantness,
	* so these defines allow GLOBAL/CONST keywords to be used even in CPU code.
	* Annoyingly, we can't use global/constant here because it conflicts with C++
	* standard library headers.
	*/
	#define GLOBAL
	#define CONST const

	#else

	/* GenXML likes to use fp16. Since fp16 is supported by all grown up drivers, we
	* just enable the extension everywhere.
	*/
	#pragma OPENCL EXTENSION cl_khr_fp16 : enable

	/* The OpenCL side of DEVICE must resolve to real pointer types, unlike
	* the host version.
	*/
	#define DEVICE(type_) global type_ *

	/* Passthrough */
	#define GLOBAL global
	#define CONST constant

	/* OpenCL lacks explicitly sized integer types, but we know the sizes of
	* particular integer types. These typedefs allow defining common headers with
	* explicit integer types (and therefore compatible data layouts).
	*/
	typedef ulong uint64_t;
	typedef uint uint32_t;
	typedef ushort uint16_t;
	typedef uchar uint8_t;

	typedef long int64_t;
	typedef int int32_t;
	typedef short int16_t;
	typedef char int8_t;

	/* OpenCL C lacks static_assert, a part of C11. This makes static_assert
	* available on both host and device. It is defined as variadic to handle also
	* no-message static_asserts (standardized in C23).
	*/
	#define _S(x) #x
	#define _PASTE_(x, y) x##y
	#define _PASTE(x, y) _PASTE_(x, y)
	#define static_assert(_COND, ...) \
	typedef char _PASTE(static_assertion, __LINE__)[(_COND) ? 1 : -1]

	/* NIR's precompilation infrastructure requires specifying a workgroup size with
	* the kernel, via reqd_work_group_size. Unfortunately, reqd_work_group_size has
	* terrible ergonomics, so we provide these aliases instead.
	*/
	#define KERNEL3D(x, y, z) \
	__attribute__((reqd_work_group_size(x, y, z))) kernel void

	#define KERNEL2D(x, y) KERNEL3D(x, y, 1)
	#define KERNEL(x) KERNEL2D(x, 1)

	/* stddef.h usually defines this. We don't have that on the OpenCL side but we
	* can use the builtin.
	*/
	#define offsetof(x, y) __builtin_offsetof(x, y)

	/* This is not an exact match for the util/macros.h version but without the
	* aligned(4) we get garbage code gen and in practice this is what you want.
	*/
	#define PACKED __attribute__((packed, aligned(4)))

	/* OpenCL C doesn't seem to have an equivalent for this but it doesn't matter.
	* Compare util/macros.h
	*/
	#define ENUM_PACKED

	/* FILE * pointers can be useful in function signatures shared across
	* host/device, but are meaningless in OpenCL. Turn them into void* to allow
	* consistent prototype across host/device even though there won't be an actual
	* file pointer on the device side.
	*/
	#define FILE void

	/* OpenCL C lacks a standard memcpy, but clang has one that will be plumbed into
	* a NIR memcpy intrinsic. This is not a competent implementation of memcpy for
	* large amounts of data, since it's necessarily single threaded, but memcpy is
	* too useful for shared CPU/GPU code that it's worth making the standard
	* library function work.
	*/
	#define memcpy __builtin_memcpy

	/* OpenCL C lacks a standard abort, so we plumb through the NIR intrinsic. */
	void nir_printf_abort(void);
	static inline void abort(void) { nir_printf_abort(); }

	/* OpenCL C lacks a standard assert. We implement one on top of abort. We are
	* careful to use a single printf so the lines don't get split up if multiple
	* threads assert in parallel.
	*/
	#ifndef NDEBUG
	#define _ASSERT_STRING(x) _ASSERT_STRING_INNER(x)
	#define _ASSERT_STRING_INNER(x) #x
	#define assert(x) if (!(x)) { \
	printf("Shader assertion fail at " __FILE__ ":" \
	_ASSERT_STRING(__LINE__) "\nExpected " #x "\n\n"); \
	nir_printf_abort(); \
	}
	#else
	#define assert(x)
	#endif

	/* Core OpenCL C like likely/unlikely. We might be able to map to a clang built
	* in though...
	*/
	#define likely(x) (x)
	#define unlikely(x) (x)

	/* These duplicate the C standard library and are required for the
	* u_intN_min/max implementations.
	*/
	#define UINT64_MAX 18446744073709551615ul
	#define INT64_MAX 9223372036854775807l

	/* These duplicate util/macros.h. This could maybe be cleaned up */
	#define BITFIELD_BIT(b) (1u << b)
	#define BITFIELD_MASK(m) (((m) == 32) ? 0xffffffff : ((1u << (m)) - 1))
	#define ASSERTED
	#define ALWAYS_INLINE
	#define UNUSED

	static inline int64_t
	u_intN_max(unsigned bit_size)
	{
	assert(bit_size <= 64 && bit_size > 0);
	return INT64_MAX >> (64 - bit_size);
	}

	static inline int64_t
	u_intN_min(unsigned bit_size)
	{
	return (-u_intN_max(bit_size)) - 1;
	}

	static inline uint64_t
	u_uintN_max(unsigned bit_size)
	{
	assert(bit_size <= 64 && bit_size > 0);
	return UINT64_MAX >> (64 - bit_size);
	}

	static inline uint
	align(uint x, uint y)
	{
	return (x + y - 1) & ~(y - 1);
	}

	static inline uint32_t
	util_logbase2(uint32_t n)
	{
	return (31 - clz(n \| 1));
	}

	static inline uint32_t
	util_logbase2_ceil(uint32_t n)
	{
	return (n <= 1) ? 0 : 32 - clz(n - 1);
	}

	#define BITFIELD64_MASK(x) ((x == 64) ? ~0ul : ((1ul << x) - 1))
	#define IS_POT(v) (((v) & ((v) - 1)) == 0)
	#define IS_POT_NONZERO(v) ((v) != 0 && IS_POT(v))
	#define DIV_ROUND_UP(A, B) (((A) + (B) - 1) / (B))
	#define CLAMP(X, MIN, MAX) ((X) > (MIN) ? ((X) > (MAX) ? (MAX) : (X)) : (MIN))
	#define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1))

	static inline uint32_t
	fui(float f)
	{
	return as_uint(f);
	}

	static inline float
	uif(uint32_t ui)
	{
	return as_float(ui);
	}

	#define CL_FLT_EPSILON 1.1920928955078125e-7f

	/* OpenCL C lacks roundf and llroundf, we can emulate it */
	static inline float roundf(float x)
	{
	return trunc(x + copysign(0.5f - 0.25f * CL_FLT_EPSILON, x));
	}

	static inline long long llroundf(float x)
	{
	return roundf(x);
	}

	static inline uint16_t
	_mesa_float_to_half(float f)
	{
	return as_ushort(convert_half(f));
	}

	static inline float
	_mesa_half_to_float(uint16_t w)
	{
	return convert_float(as_half(w));
	}

	/* Duplicates u_math.h. We should make that header CL safe at some point...
	*/
	static inline int64_t
	util_sign_extend(uint64_t val, unsigned width)
	{
	unsigned shift = 64 - width;
	return (int64_t)(val << shift) >> shift;
	}

	#endif