| // SPDX-License-Identifier: GPL-2.0-only |
| /* 64-bit system call dispatch */ |
| |
| #include <linux/linkage.h> |
| #include <linux/sys.h> |
| #include <linux/cache.h> |
| #include <linux/syscalls.h> |
| #include <linux/entry-common.h> |
| #include <linux/nospec.h> |
| #include <asm/syscall.h> |
| |
| #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *); |
| #define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *); |
| #include <asm/syscalls_64.h> |
| #ifdef CONFIG_X86_X32_ABI |
| #include <asm/syscalls_x32.h> |
| #endif |
| #undef __SYSCALL |
| |
| #undef __SYSCALL_NORETURN |
| #define __SYSCALL_NORETURN __SYSCALL |
| |
| /* |
| * The sys_call_table[] is no longer used for system calls, but |
| * kernel/trace/trace_syscalls.c still wants to know the system |
| * call address. |
| */ |
| #define __SYSCALL(nr, sym) __x64_##sym, |
| const sys_call_ptr_t sys_call_table[] = { |
| #include <asm/syscalls_64.h> |
| }; |
| #undef __SYSCALL |
| |
| #define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); |
| long x64_sys_call(const struct pt_regs *regs, unsigned int nr) |
| { |
| switch (nr) { |
| #include <asm/syscalls_64.h> |
| default: return __x64_sys_ni_syscall(regs); |
| } |
| } |
| |
| #ifdef CONFIG_X86_X32_ABI |
| long x32_sys_call(const struct pt_regs *regs, unsigned int nr) |
| { |
| switch (nr) { |
| #include <asm/syscalls_x32.h> |
| default: return __x64_sys_ni_syscall(regs); |
| } |
| } |
| #endif |
| |
| static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) |
| { |
| /* |
| * Convert negative numbers to very high and thus out of range |
| * numbers for comparisons. |
| */ |
| unsigned int unr = nr; |
| |
| if (likely(unr < NR_syscalls)) { |
| unr = array_index_nospec(unr, NR_syscalls); |
| regs->ax = x64_sys_call(regs, unr); |
| return true; |
| } |
| return false; |
| } |
| |
| static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) |
| { |
| /* |
| * Adjust the starting offset of the table, and convert numbers |
| * < __X32_SYSCALL_BIT to very high and thus out of range |
| * numbers for comparisons. |
| */ |
| unsigned int xnr = nr - __X32_SYSCALL_BIT; |
| |
| if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { |
| xnr = array_index_nospec(xnr, X32_NR_syscalls); |
| regs->ax = x32_sys_call(regs, xnr); |
| return true; |
| } |
| return false; |
| } |
| |
| /* Returns true to return using SYSRET, or false to use IRET */ |
| __visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr) |
| { |
| add_random_kstack_offset(); |
| nr = syscall_enter_from_user_mode(regs, nr); |
| |
| instrumentation_begin(); |
| |
| if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) { |
| /* Invalid system call, but still a system call. */ |
| regs->ax = __x64_sys_ni_syscall(regs); |
| } |
| |
| instrumentation_end(); |
| syscall_exit_to_user_mode(regs); |
| |
| /* |
| * Check that the register state is valid for using SYSRET to exit |
| * to userspace. Otherwise use the slower but fully capable IRET |
| * exit path. |
| */ |
| |
| /* XEN PV guests always use the IRET path */ |
| if (cpu_feature_enabled(X86_FEATURE_XENPV)) |
| return false; |
| |
| /* SYSRET requires RCX == RIP and R11 == EFLAGS */ |
| if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags)) |
| return false; |
| |
| /* CS and SS must match the values set in MSR_STAR */ |
| if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS)) |
| return false; |
| |
| /* |
| * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP |
| * in kernel space. This essentially lets the user take over |
| * the kernel, since userspace controls RSP. |
| * |
| * TASK_SIZE_MAX covers all user-accessible addresses other than |
| * the deprecated vsyscall page. |
| */ |
| if (unlikely(regs->ip >= TASK_SIZE_MAX)) |
| return false; |
| |
| /* |
| * SYSRET cannot restore RF. It can restore TF, but unlike IRET, |
| * restoring TF results in a trap from userspace immediately after |
| * SYSRET. |
| */ |
| if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF))) |
| return false; |
| |
| /* Use SYSRET to exit to userspace */ |
| return true; |
| } |