include/linux/randomize_kstack.h - kernel/common - Git at Google

 /* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _LINUX_RANDOMIZE_KSTACK_H
 #define _LINUX_RANDOMIZE_KSTACK_H

 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 #include <linux/kernel.h>
 #include <linux/jump_label.h>
 #include <linux/percpu-defs.h>

 DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 			 randomize_kstack_offset);
 DECLARE_PER_CPU(u32, kstack_offset);

 /*
  * Do not use this anywhere else in the kernel. This is used here because
  * it provides an arch-agnostic way to grow the stack with correct
  * alignment. Also, since this use is being explicitly masked to a max of
  * 10 bits, stack-clash style attacks are unlikely. For more details see
  * "VLAs" in Documentation/process/deprecated.rst
  *
  * The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently
  * only with Clang and not GCC). Initializing the unused area on each syscall
  * entry is expensive, and generating an implicit call to memset() may also be
  * problematic (such as in noinstr functions). Therefore, if the compiler
  * supports it (which it should if it initializes allocas), always use the
  * "uninitialized" variant of the builtin.
  */
 #if __has_builtin(__builtin_alloca_uninitialized)
 #define __kstack_alloca __builtin_alloca_uninitialized
 #else
 #define __kstack_alloca __builtin_alloca
 #endif

 /*
  * Use, at most, 10 bits of entropy. We explicitly cap this to keep the
  * "VLA" from being unbounded (see above). 10 bits leaves enough room for
  * per-arch offset masks to reduce entropy (by removing higher bits, since
  * high entropy may overly constrain usable stack space), and for
  * compiler/arch-specific stack alignment to remove the lower bits.
  */
 #define KSTACK_OFFSET_MAX(x)	((x) & 0x3FF)

 /**
  * add_random_kstack_offset - Increase stack utilization by previously
  *			      chosen random offset
  *
  * This should be used in the syscall entry path when interrupts and
  * preempt are disabled, and after user registers have been stored to
  * the stack. For testing the resulting entropy, please see:
  * tools/testing/selftests/lkdtm/stack-entropy.sh
  */
 #define add_random_kstack_offset() do {					\
 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
 				&randomize_kstack_offset)) {		\
 		u32 offset = raw_cpu_read(kstack_offset);		\
 		u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset));	\
 		/* Keep allocation even after "ptr" loses scope. */	\
 		asm volatile("" :: "r"(ptr) : "memory");		\
 	}								\
 } while (0)

 /**
  * choose_random_kstack_offset - Choose the random offset for the next
  *				 add_random_kstack_offset()
  *
  * This should only be used during syscall exit when interrupts and
  * preempt are disabled. This position in the syscall flow is done to
  * frustrate attacks from userspace attempting to learn the next offset:
  * - Maximize the timing uncertainty visible from userspace: if the
  *   offset is chosen at syscall entry, userspace has much more control
  *   over the timing between choosing offsets. "How long will we be in
  *   kernel mode?" tends to be more difficult to predict than "how long
  *   will we be in user mode?"
  * - Reduce the lifetime of the new offset sitting in memory during
  *   kernel mode execution. Exposure of "thread-local" memory content
  *   (e.g. current, percpu, etc) tends to be easier than arbitrary
  *   location memory exposure.
  */
 #define choose_random_kstack_offset(rand) do {				\
 	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,	\
 				&randomize_kstack_offset)) {		\
 		u32 offset = raw_cpu_read(kstack_offset);		\
 		offset = ror32(offset, 5) ^ (rand);			\
 		raw_cpu_write(kstack_offset, offset);			\
 	}								\
 } while (0)
 #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
 #define add_random_kstack_offset()		do { } while (0)
 #define choose_random_kstack_offset(rand)	do { } while (0)
 #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */

 #endif
	/* SPDX-License-Identifier: GPL-2.0-only */
	#ifndef _LINUX_RANDOMIZE_KSTACK_H
	#define _LINUX_RANDOMIZE_KSTACK_H

	#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
	#include <linux/kernel.h>
	#include <linux/jump_label.h>
	#include <linux/percpu-defs.h>

	DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
	randomize_kstack_offset);
	DECLARE_PER_CPU(u32, kstack_offset);

	/*
	* Do not use this anywhere else in the kernel. This is used here because
	* it provides an arch-agnostic way to grow the stack with correct
	* alignment. Also, since this use is being explicitly masked to a max of
	* 10 bits, stack-clash style attacks are unlikely. For more details see
	* "VLAs" in Documentation/process/deprecated.rst
	*
	* The normal __builtin_alloca() is initialized with INIT_STACK_ALL (currently
	* only with Clang and not GCC). Initializing the unused area on each syscall
	* entry is expensive, and generating an implicit call to memset() may also be
	* problematic (such as in noinstr functions). Therefore, if the compiler
	* supports it (which it should if it initializes allocas), always use the
	* "uninitialized" variant of the builtin.
	*/
	#if __has_builtin(__builtin_alloca_uninitialized)
	#define __kstack_alloca __builtin_alloca_uninitialized
	#else
	#define __kstack_alloca __builtin_alloca
	#endif

	/*
	* Use, at most, 10 bits of entropy. We explicitly cap this to keep the
	* "VLA" from being unbounded (see above). 10 bits leaves enough room for
	* per-arch offset masks to reduce entropy (by removing higher bits, since
	* high entropy may overly constrain usable stack space), and for
	* compiler/arch-specific stack alignment to remove the lower bits.
	*/
	#define KSTACK_OFFSET_MAX(x) ((x) & 0x3FF)

	/**
	* add_random_kstack_offset - Increase stack utilization by previously
	* chosen random offset
	*
	* This should be used in the syscall entry path when interrupts and
	* preempt are disabled, and after user registers have been stored to
	* the stack. For testing the resulting entropy, please see:
	* tools/testing/selftests/lkdtm/stack-entropy.sh
	*/
	#define add_random_kstack_offset() do { \
	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
	&randomize_kstack_offset)) { \
	u32 offset = raw_cpu_read(kstack_offset); \
	u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \
	/* Keep allocation even after "ptr" loses scope. */ \
	asm volatile("" :: "r"(ptr) : "memory"); \
	} \
	} while (0)

	/**
	* choose_random_kstack_offset - Choose the random offset for the next
	* add_random_kstack_offset()
	*
	* This should only be used during syscall exit when interrupts and
	* preempt are disabled. This position in the syscall flow is done to
	* frustrate attacks from userspace attempting to learn the next offset:
	* - Maximize the timing uncertainty visible from userspace: if the
	* offset is chosen at syscall entry, userspace has much more control
	* over the timing between choosing offsets. "How long will we be in
	* kernel mode?" tends to be more difficult to predict than "how long
	* will we be in user mode?"
	* - Reduce the lifetime of the new offset sitting in memory during
	* kernel mode execution. Exposure of "thread-local" memory content
	* (e.g. current, percpu, etc) tends to be easier than arbitrary
	* location memory exposure.
	*/
	#define choose_random_kstack_offset(rand) do { \
	if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
	&randomize_kstack_offset)) { \
	u32 offset = raw_cpu_read(kstack_offset); \
	offset = ror32(offset, 5) ^ (rand); \
	raw_cpu_write(kstack_offset, offset); \
	} \
	} while (0)
	#else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
	#define add_random_kstack_offset() do { } while (0)
	#define choose_random_kstack_offset(rand) do { } while (0)
	#endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */

	#endif