| // Program explore is evolved from the code discussed in more depth |
| // here: |
| // |
| // https://github.com/golang/go/issues/3405 |
| // |
| // The code here demonstrates that while PR_SET_NO_NEW_PRIVS only |
| // applies to the calling thread, since |
| // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=103502a35cfce0710909da874f092cb44823ca03 |
| // the seccomp filter application forces the setting to be mirrored on |
| // all the threads of a process. |
| // |
| // Based on the command line options, we can manipulate the program to |
| // behave in various ways. Example command lines: |
| // |
| // sudo ./explore |
| // sudo ./explore --kill=false |
| // sudo ./explore --kill=false --errno=0 |
| // |
| // Supported Go toolchains are after go1.10. Those prior to go1.15 |
| // require this environment variable to be set to build successfully: |
| // |
| // export CGO_LDFLAGS_ALLOW="-Wl,-?-wrap[=,][^-.@][^,]*" |
| // |
| // Go toolchains go1.16+ can be compiled CGO_ENABLED=0 too, |
| // demonstrating native nocgo support for seccomp features. |
| package main |
| |
| import ( |
| "flag" |
| "fmt" |
| "log" |
| "runtime" |
| "syscall" |
| "time" |
| "unsafe" |
| |
| "kernel.org/pub/linux/libs/security/libcap/psx" |
| ) |
| |
| var ( |
| withPSX = flag.Bool("psx", false, "use the psx mechanism to invoke prctl syscall") |
| delays = flag.Bool("delays", false, "use this to pause the program at various places") |
| kill = flag.Bool("kill", true, "kill the process if setuid attempted") |
| errno = flag.Int("errno", int(syscall.ENOTSUP), "if kill is false, block syscall and return this errno") |
| ) |
| |
| const ( |
| prSetNoNewPrivs = 38 |
| |
| sysSeccomp = 317 // x86_64 syscall number |
| seccompSetModeFilter = 1 // uses user-supplied filter. |
| seccompFilterFlagTsync = (1 << 0) // mirror filtering on all threads. |
| seccompRetErrno = 0x00050000 // returns an errno |
| seccompRetData = 0x0000ffff // mask for RET data payload (ex. errno) |
| seccompRetKillProcess = 0x80000000 // kill the whole process immediately |
| seccompRetTrap = 0x00030000 // disallow and force a SIGSYS |
| seccompRetAllow = 0x7fff0000 |
| |
| bpfLd = 0x00 |
| bpfJmp = 0x05 |
| bpfRet = 0x06 |
| |
| bpfW = 0x00 |
| |
| bpfAbs = 0x20 |
| bpfJeq = 0x10 |
| |
| bpfK = 0x00 |
| |
| auditArchX86_64 = 3221225534 // HACK: I don't understand this value |
| archNr = auditArchX86_64 |
| |
| syscallNr = 0 |
| ) |
| |
| // SockFilter is a single filter block. |
| type SockFilter struct { |
| // Code is the filter code instruction. |
| Code uint16 |
| // Jt is the target for a true result from the code execution. |
| Jt uint8 |
| // Jf is the target for a false result from the code execution. |
| Jf uint8 |
| // K is a generic multiuse field |
| K uint32 |
| } |
| |
| // SockFProg is a |
| type SockFProg struct { |
| // Len is the number of contiguous SockFilter blocks that can |
| // be found at *Filter. |
| Len uint16 |
| // Filter is the address of the first SockFilter block of a |
| // program sequence. |
| Filter *SockFilter |
| } |
| |
| // SockFilterSlice is a subprogram filter. |
| type SockFilterSlice []SockFilter |
| |
| func bpfStmt(code uint16, k uint32) SockFilter { |
| return SockFilter{code, 0, 0, k} |
| } |
| |
| func bpfJump(code uint16, k uint32, jt uint8, jf uint8) SockFilter { |
| return SockFilter{code, jt, jf, k} |
| } |
| |
| func validateArchitecture() []SockFilter { |
| return []SockFilter{ |
| bpfStmt(bpfLd+bpfW+bpfAbs, 4), // HACK: I don't understand this 4. |
| bpfJump(bpfJmp+bpfJeq+bpfK, archNr, 1, 0), |
| bpfStmt(bpfRet+bpfK, seccompRetKillProcess), |
| } |
| } |
| |
| func ExamineSyscall() []SockFilter { |
| return []SockFilter{ |
| bpfStmt(bpfLd+bpfW+bpfAbs, syscallNr), |
| } |
| } |
| |
| func AllowSyscall(syscallNum uint32) []SockFilter { |
| return []SockFilter{ |
| bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1), |
| bpfStmt(bpfRet+bpfK, seccompRetAllow), |
| } |
| } |
| |
| func DisallowSyscall(syscallNum, errno uint32) []SockFilter { |
| return []SockFilter{ |
| bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1), |
| bpfStmt(bpfRet+bpfK, seccompRetErrno|(errno&seccompRetData)), |
| } |
| } |
| |
| func KillProcess() []SockFilter { |
| return []SockFilter{ |
| bpfStmt(bpfRet+bpfK, seccompRetKillProcess), |
| } |
| } |
| |
| func NotifyProcessAndDie() []SockFilter { |
| return []SockFilter{ |
| bpfStmt(bpfRet+bpfK, seccompRetTrap), |
| } |
| } |
| |
| func TrapOnSyscall(syscallNum uint32) []SockFilter { |
| return []SockFilter{ |
| bpfJump(bpfJmp+bpfJeq+bpfK, syscallNum, 0, 1), |
| bpfStmt(bpfRet+bpfK, seccompRetTrap), |
| } |
| } |
| |
| func AllGood() []SockFilter { |
| return []SockFilter{ |
| bpfStmt(bpfRet+bpfK, seccompRetAllow), |
| } |
| } |
| |
| // prctl executes the prctl - unless the --psx commandline argument is |
| // used, this is on a single thread. |
| //go:uintptrescapes |
| func prctl(option, arg1, arg2, arg3, arg4, arg5 uintptr) error { |
| var e syscall.Errno |
| if *withPSX { |
| _, _, e = psx.Syscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) |
| } else { |
| _, _, e = syscall.RawSyscall6(syscall.SYS_PRCTL, option, arg1, arg2, arg3, arg4, arg5) |
| } |
| if e != 0 { |
| return e |
| } |
| if *delays { |
| fmt.Println("prctl'd - check now") |
| time.Sleep(1 * time.Minute) |
| } |
| return nil |
| } |
| |
| // SeccompSetModeFilter is our wrapper for performing our seccomp system call. |
| //go:uintptrescapes |
| func SeccompSetModeFilter(prog *SockFProg) error { |
| if _, _, e := syscall.RawSyscall(sysSeccomp, seccompSetModeFilter, seccompFilterFlagTsync, uintptr(unsafe.Pointer(prog))); e != 0 { |
| return e |
| } |
| return nil |
| } |
| |
| var empty func() |
| |
| func lockProcessThread(pick bool) { |
| // Make sure we are |
| pid := uintptr(syscall.Getpid()) |
| runtime.LockOSThread() |
| for { |
| tid, _, _ := syscall.RawSyscall(syscall.SYS_GETTID, 0, 0, 0) |
| if (tid == pid) == pick { |
| fmt.Println("validated TID:", tid, "== PID:", pid, "is", pick) |
| break |
| } |
| runtime.UnlockOSThread() |
| go func() { |
| time.Sleep(1 * time.Microsecond) |
| }() |
| runtime.Gosched() |
| runtime.LockOSThread() |
| } |
| } |
| |
| // applyPolicy uploads the program sequence. |
| func applyPolicy(prog *SockFProg) { |
| // Without PSX we can't guarantee the thread we execute the |
| // seccomp call on will be the same one that we disabled new |
| // privs on. With PSX, the disabling of new privs is mirrored |
| // on all threads. |
| if !*withPSX { |
| lockProcessThread(false) |
| defer runtime.UnlockOSThread() |
| } |
| |
| // This is required to load a filter without privilege. |
| if err := prctl(prSetNoNewPrivs, 1, 0, 0, 0, 0); err != nil { |
| log.Fatalf("Prctl(PR_SET_NO_NEW_PRIVS): %v", err) |
| } |
| |
| fmt.Println("Applying syscall policy...") |
| if err := SeccompSetModeFilter(prog); err != nil { |
| log.Fatalf("seccomp_set_mode_filter: %v", err) |
| } |
| fmt.Println("...Policy applied") |
| } |
| |
| func main() { |
| flag.Parse() |
| |
| if *delays { |
| fmt.Println("check first", syscall.Getpid()) |
| time.Sleep(60 * time.Second) |
| } |
| |
| var filter []SockFilter |
| filter = append(filter, validateArchitecture()...) |
| |
| // Grab the system call number. |
| filter = append(filter, ExamineSyscall()...) |
| |
| // List disallowed syscalls. |
| for _, x := range []uint32{ |
| syscall.SYS_SETUID, |
| } { |
| if *kill { |
| filter = append(filter, TrapOnSyscall(x)...) |
| } else { |
| filter = append(filter, DisallowSyscall(x, uint32(*errno))...) |
| } |
| } |
| |
| filter = append(filter, AllGood()...) |
| |
| prog := &SockFProg{ |
| Len: uint16(len(filter)), |
| Filter: &filter[0], |
| } |
| |
| applyPolicy(prog) |
| |
| // Ensure we are running on the TID=PID. |
| lockProcessThread(true) |
| |
| log.Print("Now it is time to try to run something privileged...") |
| if _, _, e := syscall.RawSyscall(syscall.SYS_SETUID, 1, 0, 0); e != 0 { |
| log.Fatalf("setuid failed with an error: %v", e) |
| } |
| log.Print("Looked like that worked, but it really didn't: uid == ", syscall.Getuid(), " != 1") |
| } |