Implement a meaningful Go port of libcap
This version of the Go package libcap/cap works well
enough to be used by others. Unfortunately, to use it
we need to apply something like the included patch
(contrib/go.patch) to the build sources for the Go
runtime and syscall packages.
I'll be trying to get these accepted by the Go team
in parallel.
Signed-off-by: Andrew G. Morgan <[email protected]>
diff --git a/Make.Rules b/Make.Rules
index b9a59b8..b685ab2 100644
--- a/Make.Rules
+++ b/Make.Rules
@@ -74,7 +74,7 @@
GOLANG := $(shell if [ -n "$(shell go version 2>/dev/null)" ]; then echo yes ; else echo no ; fi)
ifeq ($(GOLANG),yes)
GOROOT := $(shell go env GOROOT)
-GOCGO := $(shell if [ "$(shell go env CGO_ENABLED)" == 1 ]; then echo yes ; else echo no ; fi)
+GOCGO := $(shell if [ "$(shell go env CGO_ENABLED)" = 1 ]; then echo yes ; else echo no ; fi)
GOOSARCH := $(shell go env GOHOSTOS)_$(shell go env GOHOSTARCH)
endif
diff --git a/cap/.gitignore b/cap/.gitignore
index e7946b9..1c780ed 100644
--- a/cap/.gitignore
+++ b/cap/.gitignore
@@ -1 +1,2 @@
names.go
+syscalls.go
diff --git a/cap/cap.go b/cap/cap.go
index b5a72fb..0aa939c 100644
--- a/cap/cap.go
+++ b/cap/cap.go
@@ -52,12 +52,6 @@
)
var (
- // callKernel variables overridable for testing purposes.
- callKernel = syscall.Syscall
- callKernel6 = syscall.Syscall6
-
- // OS environment provides these.
-
// starUp protects setting of the following values: magic,
// words, maxValues.
startUp sync.Once
@@ -80,25 +74,51 @@
pid int32
}
-// capcall provides a pointer etc wrapper for the system calls
-// associated with getcap and setcap.
-func capcall(call uintptr, h *header, d []data) error {
+// caprcall provides a pointer etc wrapper for the system calls
+// associated with getcap.
+func caprcall(call uintptr, h *header, d []data) error {
x := uintptr(0)
if d != nil {
x = uintptr(unsafe.Pointer(&d[0]))
}
- _, _, err := callKernel(call, uintptr(unsafe.Pointer(h)), x, 0)
+ _, _, err := callRKernel(call, uintptr(unsafe.Pointer(h)), x, 0)
if err != 0 {
return err
}
return nil
}
-// prctlcall provides a wrapper for the prctl systemcalls. There is a
-// limited number of arguments needed and the caller should use 0 for
-// those not needed.
-func prctlcall(prVal, v1, v2 uintptr) (int, error) {
- r, _, err := callKernel6(syscall.SYS_PRCTL, prVal, v1, v2, 0, 0, 0)
+// capwcall provides a pointer etc wrapper for the system calls
+// associated with setcap.
+func capwcall(call uintptr, h *header, d []data) error {
+ x := uintptr(0)
+ if d != nil {
+ x = uintptr(unsafe.Pointer(&d[0]))
+ }
+ _, _, err := callWKernel(call, uintptr(unsafe.Pointer(h)), x, 0)
+ if err != 0 {
+ return err
+ }
+ return nil
+}
+
+// prctlrcall provides a wrapper for the prctl systemcalls that only
+// read kernel state. There is a limited number of arguments needed
+// and the caller should use 0 for those not needed.
+func prctlrcall(prVal, v1, v2 uintptr) (int, error) {
+ r, _, err := callRKernel6(syscall.SYS_PRCTL, prVal, v1, v2, 0, 0, 0)
+ if err != 0 {
+ return int(r), err
+ }
+ return int(r), nil
+}
+
+// prctlwcall provides a wrapper for the prctl systemcalls that
+// write/modify kernel state (where available, these will use the
+// POSIX semantics fixup system calls). There is a limited number of
+// arguments needed and the caller should use 0 for those not needed.
+func prctlwcall(prVal, v1, v2 uintptr) (int, error) {
+ r, _, err := callWKernel6(syscall.SYS_PRCTL, prVal, v1, v2, 0, 0, 0)
if err != 0 {
return int(r), err
}
@@ -111,7 +131,7 @@
h := &header{
magic: kv3,
}
- capcall(syscall.SYS_CAPGET, h, nil)
+ caprcall(syscall.SYS_CAPGET, h, nil)
magic = h.magic
switch magic {
case kv1:
@@ -274,7 +294,7 @@
// id; pid=0 is an alias for current.
func GetPID(pid int) (*Set, error) {
v := NewSet()
- if err := capcall(syscall.SYS_CAPGET, &header{magic: magic, pid: int32(pid)}, v.flat); err != nil {
+ if err := caprcall(syscall.SYS_CAPGET, &header{magic: magic, pid: int32(pid)}, v.flat); err != nil {
return nil, err
}
return v, nil
@@ -298,7 +318,7 @@
if c == nil {
return ErrBadSet
}
- return capcall(syscall.SYS_CAPSET, &header{magic: magic}, c.flat)
+ return capwcall(syscall.SYS_CAPSET, &header{magic: magic}, c.flat)
}
// defines from uapi/linux/prctl.h
@@ -311,7 +331,7 @@
// the local bounding set. On systems where the bounding set Value is
// not present, this function returns an error.
func GetBound(val Value) (bool, error) {
- v, err := prctlcall(PR_CAPBSET_READ, uintptr(val), 0)
+ v, err := prctlrcall(PR_CAPBSET_READ, uintptr(val), 0)
if err != nil {
return false, err
}
@@ -328,7 +348,7 @@
// ill-defined state.
func DropBound(val ...Value) error {
for _, v := range val {
- if _, err := prctlcall(PR_CAPBSET_DROP, uintptr(v), 0); err != nil {
+ if _, err := prctlwcall(PR_CAPBSET_DROP, uintptr(v), 0); err != nil {
return err
}
}
@@ -349,7 +369,7 @@
// the local ambient set. On systems where the ambient set Value is
// not present, this function returns an error.
func GetAmbient(val Value) (bool, error) {
- r, err := prctlcall(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, uintptr(val))
+ r, err := prctlrcall(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, uintptr(val))
return r > 0, err
}
@@ -364,7 +384,7 @@
dir = PR_CAP_AMBIENT_RAISE
}
for _, v := range val {
- _, err := prctlcall(PR_CAP_AMBIENT, dir, uintptr(v))
+ _, err := prctlwcall(PR_CAP_AMBIENT, dir, uintptr(v))
if err != nil {
return err
}
@@ -374,6 +394,6 @@
// ResetAmbient attempts to fully clear the Ambient set.
func ResetAmbient() error {
- _, err := prctlcall(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0)
+ _, err := prctlwcall(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0)
return err
}
diff --git a/cap/cap_test.go b/cap/cap_test.go
index a0151cc..e836fde 100644
--- a/cap/cap_test.go
+++ b/cap/cap_test.go
@@ -20,7 +20,7 @@
{"", "", ErrBadText},
{"=", "=", nil},
{"= cap_chown+iep cap_chown-i", "= cap_chown+ep", nil},
- {"= cap_setfcap,cap_chown+iep cap_chown-i", "= cap_setfcap+epi cap_chown+ep", nil},
+ {"= cap_setfcap,cap_chown+iep cap_chown-i", "= cap_setfcap+eip cap_chown+ep", nil},
}
for i, v := range vs {
c, err := FromText(v.from)
diff --git a/cap/file.go b/cap/file.go
index 57144af..189ca61 100644
--- a/cap/file.go
+++ b/cap/file.go
@@ -122,7 +122,7 @@
func GetFd(file *os.File) (*Set, error) {
var raw3 vfs_caps_3
d := make([]byte, binary.Size(raw3))
- sz, _, oErr := callKernel6(syscall.SYS_FGETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
+ sz, _, oErr := callRKernel6(syscall.SYS_FGETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
var err error
if oErr != 0 {
err = oErr
@@ -138,7 +138,7 @@
}
var raw3 vfs_caps_3
d := make([]byte, binary.Size(raw3))
- sz, _, oErr := callKernel6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
+ sz, _, oErr := callRKernel6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
if oErr != 0 {
err = oErr
}
@@ -210,7 +210,7 @@
// capabilities, by calling with c = nil.
func (c *Set) SetFd(file *os.File) error {
if c == nil {
- if _, _, err := callKernel6(syscall.SYS_FREMOVEXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
+ if _, _, err := callRKernel6(syscall.SYS_FREMOVEXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
return err
}
return nil
@@ -221,7 +221,7 @@
if err != nil {
return err
}
- if _, _, err := callKernel6(syscall.SYS_FSETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
+ if _, _, err := callRKernel6(syscall.SYS_FSETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
return err
}
return nil
@@ -249,7 +249,7 @@
return err
}
if c == nil {
- if _, _, err := callKernel6(syscall.SYS_REMOVEXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
+ if _, _, err := callRKernel6(syscall.SYS_REMOVEXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
return err
}
return nil
@@ -260,7 +260,7 @@
if err != nil {
return err
}
- if _, _, err := callKernel6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
+ if _, _, err := callRKernel6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
return err
}
return nil
diff --git a/cap/text.go b/cap/text.go
index 0af6290..b6117e5 100644
--- a/cap/text.go
+++ b/cap/text.go
@@ -45,7 +45,7 @@
iBin = (1 << Inheritable)
)
-var combos = []string{"", "e", "p", "ep", "i", "ei", "pi", "epi"}
+var combos = []string{"", "e", "p", "ep", "i", "ei", "ip", "eip"}
// histo generates a histogram of flag state combinations.
func (c *Set) histo(m uint, bins []int, patterns []uint, from, limit Value) uint {
diff --git a/contrib/go.patch b/contrib/go.patch
new file mode 100644
index 0000000..bb8a1dd
--- /dev/null
+++ b/contrib/go.patch
@@ -0,0 +1,328 @@
+From dcf7596bf3ac7170352c162af4bd61292b720f24 Mon Sep 17 00:00:00 2001
+From: "Andrew G. Morgan" <[email protected]>
+Date: Sat, 16 Nov 2019 08:46:24 -0800
+Subject: [PATCH] POSIX semantics support for some Linux syscalls
+
+This change adds two new methods for invoking system calls
+under Linux: syscall.PosixSyscall() and syscall.PosixSyscall6().
+
+These system call wrappers ensure that all OSThreads mirror
+a common system call. The wrappers serialize execution of the
+runtime to ensure no race conditions where any Go code observes
+a non-atomic OS state change. As such, the syscalls have
+higher runtime overhead than regular system calls, and only
+need to be used where such thread (or 'm' in the parlance
+of the runtime sources) consistency is required.
+
+The new support is used to enable syscall.Setuid() and
+syscall.Setgid() support under Linux.
+
+Extensive discussion of the background issue addressed in this
+patch can be found here:
+
+ https://github.com/golang/go/issues/1435
+
+Signed-off-by: Andrew G. Morgan <[email protected]>
+---
+ src/runtime/proc.go | 93 +++++++++++++++++++++++++++++++++++++-
+ src/runtime/runtime2.go | 3 ++
+ src/syscall/syscall_linux.go | 105 ++++++++++++++++++++++++++++++++++++++++---
+ 3 files changed, 193 insertions(+), 8 deletions(-)
+
+diff --git a/src/runtime/proc.go b/src/runtime/proc.go
+index 56e9530ab6..aa96e84385 100644
+--- a/src/runtime/proc.go
++++ b/src/runtime/proc.go
+@@ -877,6 +877,68 @@ func startTheWorld() {
+ getg().m.preemptoff = ""
+ }
+
++// doPosixSyscall serializes Go execution and executes a specified
++// syscall on all m's.
++//go:linkname doPosixSyscall syscall.doPosixSyscall
++func doPosixSyscall(fn func(bool) bool) {
++ if fn == nil {
++ return
++ }
++ stopTheWorld("doposixsyscall")
++ if x := fn(true); x {
++ var n int32
++ _g_ := getg()
++ tid := _g_.m.procid
++ for tm := allm; tm != nil; tm = tm.alllink {
++ if tm.procid == tid || tm.procid == 0 {
++ continue
++ }
++ n++
++ lock(&tm.mfixlock)
++ tm.mfixupfn = fn
++ if tm.park.key == 0 {
++ // Because tm.mfixupfn is set, this
++ // will cause the wakeup to be short
++ // lived (once the mutex is
++ // unlocked). The next real wakeup
++ // will occur after startTheWorld() is
++ // called.
++ notewakeup(&tm.park)
++ }
++ unlock(&tm.mfixlock)
++ }
++ for {
++ done := true
++ for tm := allm; tm != nil; tm = tm.alllink {
++ if tm.procid == tid {
++ continue
++ }
++ lock(&tm.mfixlock)
++ done = done && (tm.mfixupfn == nil)
++ unlock(&tm.mfixlock)
++ }
++ if done {
++ break
++ }
++ // if needed force sysmon and/or newmHandoff to wakeup.
++ lock(&sched.lock)
++ if atomic.Load(&sched.sysmonwait) != 0 {
++ atomic.Store(&sched.sysmonwait, 0)
++ notewakeup(&sched.sysmonnote)
++ }
++ unlock(&sched.lock)
++ lock(&newmHandoff.lock)
++ if newmHandoff.waiting {
++ newmHandoff.waiting = false
++ notewakeup(&newmHandoff.wake)
++ }
++ unlock(&newmHandoff.lock)
++ usleep(293)
++ }
++ }
++ startTheWorld()
++}
++
+ // stopTheWorldGC has the same effect as stopTheWorld, but blocks
+ // until the GC is not running. It also blocks a GC from starting
+ // until startTheWorldGC is called.
+@@ -1796,6 +1858,21 @@ func startTemplateThread() {
+ newm(templateThread, nil)
+ }
+
++// mDidFixup runs any outstanding fixup function for the running m.
++//go:nosplit
++//go:nowritebarrierrec
++func mDidFixup() bool {
++ _g_ := getg()
++ lock(&_g_.m.mfixlock)
++ fn := _g_.m.mfixupfn
++ if fn != nil {
++ atomic.Storeuintptr((*uintptr)(unsafe.Pointer(&_g_.m.mfixupfn)), 0)
++ fn(false)
++ }
++ unlock(&_g_.m.mfixlock)
++ return fn != nil
++}
++
+ // templateThread is a thread in a known-good state that exists solely
+ // to start new threads in known-good states when the calling thread
+ // may not be in a good state.
+@@ -1832,6 +1909,7 @@ func templateThread() {
+ noteclear(&newmHandoff.wake)
+ unlock(&newmHandoff.lock)
+ notesleep(&newmHandoff.wake)
++ mDidFixup()
+ }
+ }
+
+@@ -1853,8 +1931,14 @@ func stopm() {
+ lock(&sched.lock)
+ mput(_g_.m)
+ unlock(&sched.lock)
+- notesleep(&_g_.m.park)
+- noteclear(&_g_.m.park)
++ // Loop only if we are woken up to perform a mfixupfn call.
++ for {
++ notesleep(&_g_.m.park)
++ noteclear(&_g_.m.park)
++ if !mDidFixup() {
++ break
++ }
++ }
+ acquirep(_g_.m.nextp.ptr())
+ _g_.m.nextp = 0
+ }
+@@ -4464,6 +4548,7 @@ func sysmon() {
+ lasttrace := int64(0)
+ idle := 0 // how many cycles in succession we had not wokeup somebody
+ delay := uint32(0)
++
+ for {
+ if idle == 0 { // start with 20us sleep...
+ delay = 20
+@@ -4474,6 +4559,7 @@ func sysmon() {
+ delay = 10 * 1000
+ }
+ usleep(delay)
++ mDidFixup()
+ now := nanotime()
+ next := timeSleepUntil()
+ if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomic.Load(&sched.npidle) == uint32(gomaxprocs)) {
+@@ -4493,6 +4579,7 @@ func sysmon() {
+ osRelax(true)
+ }
+ notetsleep(&sched.sysmonnote, sleep)
++ mDidFixup()
+ if shouldRelax {
+ osRelax(false)
+ }
+@@ -4529,12 +4616,14 @@ func sysmon() {
+ incidlelocked(1)
+ }
+ }
++ mDidFixup()
+ if next < now {
+ // There are timers that should have already run,
+ // perhaps because there is an unpreemptible P.
+ // Try to start an M to run them.
+ startm(nil, false)
+ }
++
+ // retake P's blocked in syscalls
+ // and preempt long running G's
+ if retake(now) != 0 {
+diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
+index fe1147e247..b10d29ab39 100644
+--- a/src/runtime/runtime2.go
++++ b/src/runtime/runtime2.go
+@@ -525,6 +525,9 @@ type m struct {
+ thread uintptr // thread handle
+ freelink *m // on sched.freem
+
++ mfixlock mutex // lock to protect mfixupfn
++ mfixupfn func(bool) bool // used to synchronize OS related m state (credentials etc)
++
+ // these are here because they are too large to be on the stack
+ // of low-level NOSPLIT functions.
+ libcall libcall
+diff --git a/src/syscall/syscall_linux.go b/src/syscall/syscall_linux.go
+index 2eba033d7c..39a918f6ff 100644
+--- a/src/syscall/syscall_linux.go
++++ b/src/syscall/syscall_linux.go
+@@ -943,17 +943,110 @@ func Getpgrp() (pid int) {
+ //sysnb Setsid() (pid int, err error)
+ //sysnb Settimeofday(tv *Timeval) (err error)
+
+-// issue 1435.
+-// On linux Setuid and Setgid only affects the current thread, not the process.
+-// This does not match what most callers expect so we must return an error
+-// here rather than letting the caller think that the call succeeded.
++// posixCaller holds the input and output state for performing a
++// posixSyscall that needs to synchronize all OS thread state. Linux
++// generally does not always support this natively, so we have to
++// manipulate the runtime to fix things up.
++type posixCaller struct {
++ // arguments
++ trap, a1, a2, a3, a4, a5, a6 uintptr
++
++ // return values (only set by 0th invocation)
++ r1, r2 uintptr
++
++ // err is the error code
++ err Errno
++}
++
++// doSyscall is a callback for executing a syscall on the current m
++// (OS thread).
++//go:nosplit
++func (pc *posixCaller) doSyscall(initial bool) bool {
++ r1, r2, err := RawSyscall(pc.trap, pc.a1, pc.a2, pc.a3)
++ if initial {
++ pc.r1 = r1
++ pc.r2 = r2
++ pc.err = err
++ }
++ return err == 0
++}
++
++// doSyscall6 is a callback for executing a syscall6 on the current m
++// (OS thread).
++//go:nosplit
++func (pc *posixCaller) doSyscall6(initial bool) bool {
++ r1, r2, err := RawSyscall6(pc.trap, pc.a1, pc.a2, pc.a3, pc.a4, pc.a5, pc.a6)
++ if initial {
++ pc.r1 = r1
++ pc.r2 = r2
++ pc.err = err
++ }
++ return err == 0
++}
++
++// PosixSyscall performs a syscall with POSIX semantics - namely it
++// serializes the runtime and performs the syscall once for each OS
++// thread of the Go runtime. The return values and error status are
++// from the first invocation. If this first invocation fails, no more
++// attempts are made.
++func PosixSyscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno) {
++ pc := &posixCaller{
++ trap: trap,
++ a1: a1,
++ a2: a2,
++ a3: a3,
++ }
++ doPosixSyscall(pc.doSyscall)
++ r1 = pc.r1
++ r2 = pc.r2
++ err = pc.err
++ return
++}
++
++// PosixSyscall6 performs a syscall6 with POSIX semantics - namely it
++// serializes the runtime and performs the syscall once for each OS
++// thread of the Go runtime. The return values and error status are
++// from the first invocation. If this first invocation fails, no more
++// attempts are made.
++func PosixSyscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno) {
++ pc := &posixCaller{
++ trap: trap,
++ a1: a1,
++ a2: a2,
++ a3: a3,
++ a4: a4,
++ a5: a5,
++ a6: a6,
++ }
++ doPosixSyscall(pc.doSyscall6)
++ r1 = pc.r1
++ r2 = pc.r2
++ err = pc.err
++ return
++}
++
++// issue 1435. On linux the raw system calls Setuid, Setgid etc only
++// affect the current thread, not the process (and all its peer
++// threads). So, to match what most callers expect, we use the
++// runtime.doPosixSyscall wrapper for the following syscalls.
++//
++// Provided by runtime.doPosixSyscall which serializes the world and
++// invokes the fn on each OS thread (what the runtime refers to as
++// m's). Once this function returns, all threads are in sync.
++func doPosixSyscall(fn func(bool) bool)
+
+ func Setuid(uid int) (err error) {
+- return EOPNOTSUPP
++ if _, _, e1 := PosixSyscall(sys_SETUID, uintptr(uid), 0, 0); e1 != 0 {
++ err = errnoErr(e1)
++ }
++ return
+ }
+
+ func Setgid(gid int) (err error) {
+- return EOPNOTSUPP
++ if _, _, e1 := PosixSyscall(sys_SETGID, uintptr(gid), 0, 0); e1 != 0 {
++ err = errnoErr(e1)
++ }
++ return
+ }
+
+ //sys Setpriority(which int, who int, prio int) (err error)
+--
+2.11.0
+
diff --git a/go/Makefile b/go/Makefile
index 1381f3d..0791768 100644
--- a/go/Makefile
+++ b/go/Makefile
@@ -22,8 +22,11 @@
src/libcap/cap/names.go: ../libcap/cap_names.h src/libcap/cap mknames.go
go run mknames.go --header=$< | gofmt > $@ || rm -f $@
+src/libcap/cap/syscalls.go: src/libcap/cap ./syscalls.sh
+ ./syscalls.sh > $@
+
GOPACKAGE=pkg/$(GOOSARCH)/libcap/cap.a
-$(GOPACKAGE): src/libcap/cap/names.go src/libcap/cap/cap.go src/libcap/cap/text.go
+$(GOPACKAGE): src/libcap/cap/syscalls.go src/libcap/cap/names.go src/libcap/cap/cap.go src/libcap/cap/text.go
echo testing Go package
GOPATH=$(realpath .) go test libcap/cap
echo building $(GOPACKAGE)
@@ -37,6 +40,7 @@
GOPATH=$(realpath .) go build $<
clean:
- GOPATH=$(realpath .) go clean -x -i libcap/cap || exit 0
- rm -f *.o *.so mknames web compare-cap *~ ../cap/*~ ../cap/names.go
+ GOPATH=$(realpath .) go clean -x -i libcap/cap 2> /dev/null || exit 0
+ rm -f *.o *.so mknames web compare-cap *~
+ rm -f ../cap/*~ ../cap/names.go ../cap/syscalls.go
rm -fr pkg src
diff --git a/go/syscalls.sh b/go/syscalls.sh
new file mode 100755
index 0000000..4966742
--- /dev/null
+++ b/go/syscalls.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+cat <<EOF
+package cap
+
+import "syscall"
+
+// callKernel variables overridable for testing purposes.
+EOF
+
+if [ -n "$(go doc syscall 2>/dev/null|grep PosixSyscall)" ]; then
+ cat <<EOF
+// (Go build tree contains PosixSyscall support.)
+var callWKernel = syscall.PosixSyscall
+var callWKernel6 = syscall.PosixSyscall6
+var callRKernel = syscall.RawSyscall
+var callRKernel6 = syscall.RawSyscall6
+EOF
+else
+ cat <<EOF
+// (Go build tree does not contain PosixSyscall support.)
+var callWKernel = syscall.RawSyscall
+var callWKernel6 = syscall.RawSyscall6
+var callRKernel = syscall.RawSyscall
+var callRKernel6 = syscall.RawSyscall6
+EOF
+fi
diff --git a/go/web.go b/go/web.go
index 82b363c..a1ab937 100644
--- a/go/web.go
+++ b/go/web.go
@@ -1,11 +1,24 @@
// Progam web provides an example of a webserver using capabilities to
// bind to a privileged port.
//
-// While this program serves as a demonstration of how to use
-// libcap/cap to achieve this, it currently reveals how problematic
-// the Go runtime is for actually dropping all privilege. For now, the
-// runtime can only raise and lower effective capabilities in critical
-// sections with any reliability: it cannot drop privilege.
+// This program will not work reliably without the equivalent of
+// the Go runtime patch that adds a POSIX semantics wrappers around
+// the system calls that change kernel state. A patch for the Go
+// compiler/runtime to add this support is available here [2019-11-16]:
+//
+// https://git.kernel.org/pub/scm/libs/libcap/libcap.git/tree/contrib/go.patch
+//
+// To set this up, compile and empower this binary as follows (package
+// libcap/cap should be installed):
+//
+// go build web.go
+// sudo setcap cap_net_bind_service=p web
+// ./web --port=80
+//
+// Make requests using wget and observe the log of web (try --debug as
+// a web command line flag too):
+//
+// wget -o/dev/null -O/dev/stdout localhost:80
package main
import (
@@ -21,23 +34,12 @@
var (
port = flag.Int("port", 0, "port to listen on")
- debug = flag.Bool("debug", false, "enable to observe the go runtime os thread state confusion")
skipPriv = flag.Bool("skip", false, "skip raising the effective capability - will fail for low ports")
)
// ensureNotEUID aborts the program if it is running setuid something,
-// since it can't be forced to get euid to match uid etc. Go's
-// runtime model is fragile with respect fully dropping capabilities,
-// or other forms of privilege, so we need to collapse the runtime to
-// a single os process. Until such time as Go supports some sort of
-// "serialize execution and run this on all hardware threads before
-// resuming" functionality, dropping capabilities and euid vs uid
-// kinds of discrepencies cannot be secured for all hardware threads
-// of the running program.
-//
-// Read more about this here:
-//
-// https://github.com/golang/go/issues/1435 .
+// or being invoked by root. That is, the preparer isn't setting up
+// the program correctly.
func ensureNotEUID() {
euid := syscall.Geteuid()
uid := syscall.Getuid()
@@ -46,81 +48,57 @@
if uid != euid || gid != egid {
log.Fatalf("go runtime unable to resolve differing uids:(%d vs %d), gids(%d vs %d)", uid, euid, gid, egid)
}
+ if uid == 0 {
+ log.Fatalf("go runtime is running as root - cheating")
+ }
}
// listen creates a listener by raising effective privilege only to
-// bind to address and then lowering that effective privilege. To set
-// this up, compile and empower this binary as follows (package
-// libcap/cap should be installed):
-//
-// go build web.go
-// sudo setcap cap_net_bind_service=p web
-// ./web --port=80
-//
-// Make requests using wget and observe the log of web (try --debug as
-// a web command line flag too):
-//
-// wget -o/dev/null -O/dev/stdout localhost:80
+// bind to address and then lowering that effective privilege.
func listen(network, address string) (net.Listener, error) {
- runtime.LockOSThread()
- defer runtime.UnlockOSThread()
-
- // The intention of the following code is as follows.
- // Collapse down the number of hardware threads to one so we
- // can drop privilege and only then up them again. (This does
- // not seem to do that by killing the surplas threads. You can
- // run --debug and try "pstree -p ; getpcap <list of pids>" to
- // get a sense of what is going on.)
- count := runtime.GOMAXPROCS(1)
- defer runtime.GOMAXPROCS(count)
- log.Printf("max proc count = %d", count)
-
- ensureNotEUID()
-
- c := cap.GetProc()
- orig, err := c.Dup()
- if err != nil {
- return nil, fmt.Errorf("failed to dup cap.Set: %v", err)
+ if *skipPriv {
+ return net.Listen(network, address)
}
- if *debug {
- defer func() {
- if err := cap.NewSet().SetProc(); err != nil {
- panic(fmt.Errorf("unable to drop all privilege: %v", err))
- }
- return
- }()
- } else {
- defer func() {
- if err := orig.SetProc(); err != nil {
- panic(fmt.Errorf("unable to lower privilege (%q): %v", orig, err))
- }
- }()
+
+ orig := cap.GetProc()
+ defer orig.SetProc() // restore original caps on exit.
+
+ c, err := orig.Dup()
+ if err != nil {
+ return nil, fmt.Errorf("failed to dup caps: %v", err)
}
if on, _ := c.GetFlag(cap.Permitted, cap.NET_BIND_SERVICE); !on {
return nil, fmt.Errorf("insufficient privilege to bind to low ports - want %q, have %q", cap.NET_BIND_SERVICE, c)
}
- if !*skipPriv {
- if err := c.SetFlag(cap.Effective, true, cap.NET_BIND_SERVICE); err != nil {
- return nil, fmt.Errorf("unable to set capability: %v", err)
- }
+
+ if err := c.SetFlag(cap.Effective, true, cap.NET_BIND_SERVICE); err != nil {
+ return nil, fmt.Errorf("unable to set capability: %v", err)
}
+
if err := c.SetProc(); err != nil {
return nil, fmt.Errorf("unable to raise capabilities %q: %v", c, err)
}
-
return net.Listen(network, address)
}
// Handler is used to abstract the ServeHTTP function.
type Handler struct{}
-// ServeHTTP says hello from a single Go hardware thread and reveals its capabilities.
+// ServeHTTP says hello from a single Go hardware thread and reveals
+// its capabilities.
func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ runtime.LockOSThread()
+ // Get some numbers consistent to the current execution, so
+ // the returned web page demonstrates that the code execution
+ // is bouncing around on different kernel thread ids.
p := syscall.Getpid()
+ t := syscall.Gettid()
c := cap.GetProc()
- log.Printf("Saying hello from proc: %d, caps=%q", p, c)
- fmt.Fprintf(w, "Hello from proc: %d, caps=%q\n", p, c)
+ runtime.UnlockOSThread()
+
+ log.Printf("Saying hello from proc: %d->%d, caps=%q", p, t, c)
+ fmt.Fprintf(w, "Hello from proc: %d->%d, caps=%q\n", p, t, c)
}
func main() {
@@ -130,12 +108,20 @@
log.Fatal("please supply --port value")
}
+ ensureNotEUID()
+
ls, err := listen("tcp", fmt.Sprintf(":%d", *port))
if err != nil {
log.Fatalf("aborting: %v", err)
}
defer ls.Close()
+ if !*skipPriv {
+ if err := cap.NewSet().SetProc(); err != nil {
+ panic(fmt.Errorf("unable to drop all privilege: %v", err))
+ }
+ }
+
if err := http.Serve(ls, &Handler{}); err != nil {
log.Fatalf("server failed: %v", err)
}