Implement cap.Launch()
From a Go runtime provide a convenient way to launch a different
process with modified capabilities etc. without disturbing the
security state of the parent.
Signed-off-by: Andrew G. Morgan <[email protected]>
diff --git a/cap/cap.go b/cap/cap.go
index 469b3cd..45b1bdc 100644
--- a/cap/cap.go
+++ b/cap/cap.go
@@ -84,14 +84,33 @@
pid int32
}
+// scwMu is used to fully serialize the write system calls. Note, this
+// is generally not necesary, but in the case of Launch we get into a
+// situation where the launching thread is temporarily allowed to
+// deviate from the kernel state of the rest of the runtime and
+// allowing other threads to perform w* syscalls will potentially
+// interfere with the launching process.
+var scwMu sync.Mutex
+
+// syscaller is a type for abstracting syscalls. The r* variants are
+// for reading state, and can be parallelized, the w* variants need to
+// be serialized so all OS threads can share state.
+type syscaller struct {
+ r3 func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
+ w3 func(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno)
+ r6 func(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
+ w6 func(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno)
+}
+
// caprcall provides a pointer etc wrapper for the system calls
// associated with getcap.
-func caprcall(call uintptr, h *header, d []data) error {
+//go:nosplit
+func (sc *syscaller) caprcall(call uintptr, h *header, d []data) error {
x := uintptr(0)
if d != nil {
x = uintptr(unsafe.Pointer(&d[0]))
}
- _, _, err := callRKernel(call, uintptr(unsafe.Pointer(h)), x, 0)
+ _, _, err := sc.r3(call, uintptr(unsafe.Pointer(h)), x, 0)
if err != 0 {
return err
}
@@ -100,12 +119,13 @@
// capwcall provides a pointer etc wrapper for the system calls
// associated with setcap.
-func capwcall(call uintptr, h *header, d []data) error {
+//go:nosplit
+func (sc *syscaller) capwcall(call uintptr, h *header, d []data) error {
x := uintptr(0)
if d != nil {
x = uintptr(unsafe.Pointer(&d[0]))
}
- _, _, err := callWKernel(call, uintptr(unsafe.Pointer(h)), x, 0)
+ _, _, err := sc.w3(call, uintptr(unsafe.Pointer(h)), x, 0)
if err != 0 {
return err
}
@@ -115,8 +135,9 @@
// prctlrcall provides a wrapper for the prctl systemcalls that only
// read kernel state. There is a limited number of arguments needed
// and the caller should use 0 for those not needed.
-func prctlrcall(prVal, v1, v2 uintptr) (int, error) {
- r, _, err := callRKernel(syscall.SYS_PRCTL, prVal, v1, v2)
+//go:nosplit
+func (sc *syscaller) prctlrcall(prVal, v1, v2 uintptr) (int, error) {
+ r, _, err := sc.r3(syscall.SYS_PRCTL, prVal, v1, v2)
if err != 0 {
return int(r), err
}
@@ -127,8 +148,9 @@
// read kernel state and require 6 arguments - ambient cap API, I'm
// looking at you. There is a limited number of arguments needed and
// the caller should use 0 for those not needed.
-func prctlrcall6(prVal, v1, v2, v3, v4, v5 uintptr) (int, error) {
- r, _, err := callRKernel6(syscall.SYS_PRCTL, prVal, v1, v2, v3, v4, v5)
+//go:nosplit
+func (sc *syscaller) prctlrcall6(prVal, v1, v2, v3, v4, v5 uintptr) (int, error) {
+ r, _, err := sc.r6(syscall.SYS_PRCTL, prVal, v1, v2, v3, v4, v5)
if err != 0 {
return int(r), err
}
@@ -139,8 +161,9 @@
// write/modify kernel state. Where available, these will use the
// POSIX semantics fixup system calls. There is a limited number of
// arguments needed and the caller should use 0 for those not needed.
-func prctlwcall(prVal, v1, v2 uintptr) (int, error) {
- r, _, err := callWKernel(syscall.SYS_PRCTL, prVal, v1, v2)
+//go:nosplit
+func (sc *syscaller) prctlwcall(prVal, v1, v2 uintptr) (int, error) {
+ r, _, err := sc.w3(syscall.SYS_PRCTL, prVal, v1, v2)
if err != 0 {
return int(r), err
}
@@ -152,8 +175,9 @@
// API, I'm looking at you. (Where available, these will use the POSIX
// semantics fixup system calls). There is a limited number of
// arguments needed and the caller should use 0 for those not needed.
-func prctlwcall6(prVal, v1, v2, v3, v4, v5 uintptr) (int, error) {
- r, _, err := callWKernel6(syscall.SYS_PRCTL, prVal, v1, v2, v3, v4, v5)
+//go:nosplit
+func (sc *syscaller) prctlwcall6(prVal, v1, v2, v3, v4, v5 uintptr) (int, error) {
+ r, _, err := sc.w6(syscall.SYS_PRCTL, prVal, v1, v2, v3, v4, v5)
if err != 0 {
return int(r), err
}
@@ -162,11 +186,12 @@
// cInit perfoms the lazy identification of the capability vintage of
// the running system.
-func cInit() {
+//go:nosplit
+func (sc *syscaller) cInit() {
h := &header{
magic: kv3,
}
- caprcall(syscall.SYS_CAPGET, h, nil)
+ sc.caprcall(syscall.SYS_CAPGET, h, nil)
magic = h.magic
switch magic {
case kv1:
@@ -192,13 +217,13 @@
// MaxBits returns the number of kernel-named capabilities discovered
// at runtime in the current system.
func MaxBits() Value {
- startUp.Do(cInit)
+ startUp.Do(multisc.cInit)
return Value(maxValues)
}
// NewSet returns an empty capability set.
func NewSet() *Set {
- startUp.Do(cInit)
+ startUp.Do(multisc.cInit)
return &Set{
flat: make([]data, words),
}
@@ -225,7 +250,7 @@
// id; pid=0 is an alias for current.
func GetPID(pid int) (*Set, error) {
v := NewSet()
- if err := caprcall(syscall.SYS_CAPGET, &header{magic: magic, pid: int32(pid)}, v.flat); err != nil {
+ if err := multisc.caprcall(syscall.SYS_CAPGET, &header{magic: magic, pid: int32(pid)}, v.flat); err != nil {
return nil, err
}
return v, nil
@@ -242,33 +267,50 @@
return c
}
+//go:nosplit
+func (sc *syscaller) setProc(c *Set) error {
+ if c == nil || len(c.flat) == 0 {
+ return ErrBadSet
+ }
+ return sc.capwcall(syscall.SYS_CAPSET, &header{magic: magic}, c.flat)
+}
+
// SetProc attempts to write the capability Set to the current
// process. The kernel will perform permission checks and an error
// will be returned if the attempt fails.
func (c *Set) SetProc() error {
- if c == nil || len(c.flat) == 0 {
- return ErrBadSet
- }
- return capwcall(syscall.SYS_CAPSET, &header{magic: magic}, c.flat)
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setProc(c)
}
// defines from uapi/linux/prctl.h
const (
- PR_CAPBSET_READ = 23
- PR_CAPBSET_DROP = 24
+ pr_CAPBSET_READ = 23
+ pr_CAPBSET_DROP = 24
)
// GetBound determines if a specific capability is currently part of
// the local bounding set. On systems where the bounding set Value is
// not present, this function returns an error.
func GetBound(val Value) (bool, error) {
- v, err := prctlrcall(PR_CAPBSET_READ, uintptr(val), 0)
+ v, err := multisc.prctlrcall(pr_CAPBSET_READ, uintptr(val), 0)
if err != nil {
return false, err
}
return v > 0, nil
}
+//go:nosplit
+func (sc *syscaller) dropBound(val ...Value) error {
+ for _, v := range val {
+ if _, err := sc.prctlwcall(pr_CAPBSET_DROP, uintptr(v), 0); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
// DropBound attempts to suppress bounding set Values. The kernel will
// never allow a bounding set Value bit to be raised once successfully
// dropped. However, dropping requires the current process is
@@ -276,46 +318,40 @@
// Effective flag vector). Note, the drops are performed in order and
// if one bounding value cannot be dropped, the function returns
// immediately with an error which may leave the system in an
-// ill-defined state.
+// ill-defined state. The caller can determine where things went wrong
+// from on error using GetBound().
func DropBound(val ...Value) error {
- for _, v := range val {
- if _, err := prctlwcall(PR_CAPBSET_DROP, uintptr(v), 0); err != nil {
- return err
- }
- }
- return nil
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.dropBound(val...)
}
// defines from uapi/linux/prctl.h
const (
- PR_CAP_AMBIENT = 47
+ pr_CAP_AMBIENT = 47
- PR_CAP_AMBIENT_IS_SET = 1
- PR_CAP_AMBIENT_RAISE = 2
- PR_CAP_AMBIENT_LOWER = 3
- PR_CAP_AMBIENT_CLEAR_ALL = 4
+ pr_CAP_AMBIENT_IS_SET = 1
+ pr_CAP_AMBIENT_RAISE = 2
+ pr_CAP_AMBIENT_LOWER = 3
+ pr_CAP_AMBIENT_CLEAR_ALL = 4
)
// GetAmbient determines if a specific capability is currently part of
// the local ambient set. On systems where the ambient set Value is
// not present, this function returns an error.
func GetAmbient(val Value) (bool, error) {
- r, err := prctlrcall6(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, uintptr(val), 0, 0, 0)
+ r, err := multisc.prctlrcall6(pr_CAP_AMBIENT, pr_CAP_AMBIENT_IS_SET, uintptr(val), 0, 0, 0)
return r > 0, err
}
-// SetAmbient attempts to set a specific Value bit to the enable
-// state. This function will return an error if insufficient
-// permission is available to perform this task. The settings are
-// performed in order and the function returns immediately an error is
-// detected.
-func SetAmbient(enable bool, val ...Value) error {
- dir := uintptr(PR_CAP_AMBIENT_LOWER)
+//go:nosplit
+func (sc *syscaller) setAmbient(enable bool, val ...Value) error {
+ dir := uintptr(pr_CAP_AMBIENT_LOWER)
if enable {
- dir = PR_CAP_AMBIENT_RAISE
+ dir = pr_CAP_AMBIENT_RAISE
}
for _, v := range val {
- _, err := prctlwcall6(PR_CAP_AMBIENT, dir, uintptr(v), 0, 0, 0)
+ _, err := sc.prctlwcall6(pr_CAP_AMBIENT, dir, uintptr(v), 0, 0, 0)
if err != nil {
return err
}
@@ -323,11 +359,19 @@
return nil
}
-// ResetAmbient attempts to ensure the Ambient set is fully
-// cleared. It works by first reading the set and if it finds any bits
-// raised it will attempt a reset. This is a workaround for situations
-// where the Ambient API is locked.
-func ResetAmbient() error {
+// SetAmbient attempts to set a specific Value bit to the enable
+// state. This function will return an error if insufficient
+// permission is available to perform this task. The settings are
+// performed in order and the function returns immediately an error is
+// detected. Use GetAmbient() to unravel where things went wrong.
+func SetAmbient(enable bool, val ...Value) error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setAmbient(enable, val...)
+}
+
+//go:nosplit
+func (sc *syscaller) resetAmbient() error {
var v bool
var err error
@@ -337,6 +381,16 @@
return nil
}
}
- _, err = prctlwcall6(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0)
+ _, err = sc.prctlwcall6(pr_CAP_AMBIENT, pr_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0)
return err
}
+
+// ResetAmbient attempts to ensure the Ambient set is fully
+// cleared. It works by first reading the set and if it finds any bits
+// raised it will attempt a reset. This is a workaround for situations
+// where the Ambient API is locked.
+func ResetAmbient() error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.resetAmbient()
+}
diff --git a/cap/convenience.go b/cap/convenience.go
index 9dd9d07..6fe9ddd 100644
--- a/cap/convenience.go
+++ b/cap/convenience.go
@@ -33,19 +33,25 @@
// GetSecbits returns the current setting of the process' Secbits.
func GetSecbits() Secbits {
- v, err := prctlrcall(PR_GET_SECUREBITS, 0, 0)
+ v, err := multisc.prctlrcall(PR_GET_SECUREBITS, 0, 0)
if err != nil {
panic(err)
}
return Secbits(v)
}
+func (sc *syscaller) setSecbits(s Secbits) error {
+ _, err := sc.prctlwcall(PR_SET_SECUREBITS, uintptr(s), 0)
+ return err
+}
+
// Set attempts to force the process Secbits to a value. This function
// will raise cap.SETPCAP in order to achieve this operation, and will
// completely lower the Effective vector of the process returning.
func (s Secbits) Set() error {
- _, err := prctlwcall(PR_SET_SECUREBITS, uintptr(s), 0)
- return err
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setSecbits(s)
}
// Mode summarizes a complicated secure-bits and capability mode in a
@@ -116,25 +122,17 @@
var ErrBadMode = errors.New("unsupported mode")
-// Set attempts to enter the specified mode. An attempt is made to
-// enter the mode, so if you prefer this operation to be a no-op if
-// entering the same mode, call only if CurrentMode() disagrees with
-// the desired mode.
-//
-// This function will raise cap.SETPCAP in order to achieve this
-// operation, and will completely lower the Effective vector of the
-// process before returning.
-func (m Mode) Set() error {
+func (sc *syscaller) setMode(m Mode) error {
w := GetProc()
defer func() {
w.ClearFlag(Effective)
- w.SetProc()
+ sc.setProc(w)
}()
if err := w.SetFlag(Effective, true, SETPCAP); err != nil {
return err
}
- if err := w.SetProc(); err != nil {
+ if err := sc.setProc(w); err != nil {
return err
}
@@ -147,11 +145,11 @@
sb := securedAmbientBits
if _, err := GetAmbient(0); err != nil {
sb = securedBasicBits
- } else if err := ResetAmbient(); err != nil {
+ } else if err := sc.resetAmbient(); err != nil {
return err
}
- if err := sb.Set(); err != nil {
+ if err := sc.setSecbits(sb); err != nil {
return err
}
@@ -159,13 +157,27 @@
return nil
}
- for c := Value(0); DropBound(c) == nil; c++ {
+ for c := Value(0); sc.dropBound(c) == nil; c++ {
}
w.ClearFlag(Permitted)
return nil
}
+// Set attempts to enter the specified mode. An attempt is made to
+// enter the mode, so if you prefer this operation to be a no-op if
+// entering the same mode, call only if CurrentMode() disagrees with
+// the desired mode.
+//
+// This function will raise cap.SETPCAP in order to achieve this
+// operation, and will completely lower the Effective vector of the
+// process before returning.
+func (m Mode) Set() error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setMode(m)
+}
+
// String returns the libcap conventional string for this mode.
func (m Mode) String() string {
switch m {
@@ -182,16 +194,11 @@
}
}
-// SetUID is a convenience function for robustly setting the UID and
-// all other variants of UID (EUID etc) to the specified value without
-// dropping the privilege of the current process. This function will
-// raise cap.SETUID in order to achieve this operation, and will
-// completely lower the Effective vector of the process before returning.
-func SetUID(uid int) error {
+func (sc *syscaller) setUID(uid int) error {
w := GetProc()
defer func() {
w.ClearFlag(Effective)
- w.SetProc()
+ sc.setProc(w)
}()
if err := w.SetFlag(Effective, true, SETUID); err != nil {
@@ -200,14 +207,60 @@
// these may or may not work depending on whether or not they
// are locked. We try them just in case.
- prctlwcall(PR_SET_KEEPCAPS, 1, 0)
- defer prctlwcall(PR_SET_KEEPCAPS, 0, 0)
+ sc.prctlwcall(PR_SET_KEEPCAPS, 1, 0)
+ defer sc.prctlwcall(PR_SET_KEEPCAPS, 0, 0)
- if err := w.SetProc(); err != nil {
+ if err := sc.setProc(w); err != nil {
return err
}
- if _, _, err := callWKernel(syscall.SYS_SETUID, uintptr(uid), 0, 0); err != 0 {
+ if _, _, err := sc.w3(syscall.SYS_SETUID, uintptr(uid), 0, 0); err != 0 {
+ return err
+ }
+ return nil
+}
+
+// SetUID is a convenience function for robustly setting the UID and
+// all other variants of UID (EUID etc) to the specified value without
+// dropping the privilege of the current process. This function will
+// raise cap.SETUID in order to achieve this operation, and will
+// completely lower the Effective vector of the process before returning.
+func SetUID(uid int) error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setUID(uid)
+}
+
+func (sc *syscaller) setGroups(gid int, suppl []int) error {
+ w := GetProc()
+ defer func() {
+ w.ClearFlag(Effective)
+ sc.setProc(w)
+ }()
+
+ if err := w.SetFlag(Effective, true, SETGID); err != nil {
+ return err
+ }
+ if err := sc.setProc(w); err != nil {
+ return err
+ }
+
+ if _, _, err := sc.w3(syscall.SYS_SETGID, uintptr(gid), 0, 0); err != 0 {
+ return err
+ }
+ if len(suppl) == 0 {
+ if _, _, err := sc.w3(sys_setgroups_variant, 0, 0, 0); err != 0 {
+ return err
+ }
+ return nil
+ }
+
+ // On linux gid values are 32-bits.
+ gs := make([]uint32, len(suppl))
+ for i, g := range suppl {
+ gs[i] = uint32(g)
+ }
+ if _, _, err := sc.w3(sys_setgroups_variant, uintptr(len(suppl)), uintptr(unsafe.Pointer(&gs[0])), 0); err != 0 {
return err
}
return nil
@@ -219,36 +272,7 @@
// raise cap.SETGID in order to achieve this operation, and will
// completely lower the Effective vector of the process before returning.
func SetGroups(gid int, suppl ...int) error {
- w := GetProc()
- defer func() {
- w.ClearFlag(Effective)
- w.SetProc()
- }()
-
- if err := w.SetFlag(Effective, true, SETGID); err != nil {
- return err
- }
- if err := w.SetProc(); err != nil {
- return err
- }
-
- if _, _, err := callWKernel(syscall.SYS_SETGID, uintptr(gid), 0, 0); err != 0 {
- return err
- }
- if len(suppl) == 0 {
- if _, _, err := callWKernel(sys_setgroups_variant, 0, 0, 0); err != 0 {
- return err
- }
- return nil
- }
-
- // On linux gid values are 32-bits.
- gs := make([]uint32, len(suppl))
- for i, g := range suppl {
- gs[i] = uint32(g)
- }
- if _, _, err := callWKernel(sys_setgroups_variant, uintptr(len(suppl)), uintptr(unsafe.Pointer(&gs[0])), 0); err != 0 {
- return err
- }
- return nil
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.setGroups(gid, suppl)
}
diff --git a/cap/file.go b/cap/file.go
index 189ca61..e57647e 100644
--- a/cap/file.go
+++ b/cap/file.go
@@ -122,7 +122,7 @@
func GetFd(file *os.File) (*Set, error) {
var raw3 vfs_caps_3
d := make([]byte, binary.Size(raw3))
- sz, _, oErr := callRKernel6(syscall.SYS_FGETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
+ sz, _, oErr := multisc.r6(syscall.SYS_FGETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
var err error
if oErr != 0 {
err = oErr
@@ -138,7 +138,7 @@
}
var raw3 vfs_caps_3
d := make([]byte, binary.Size(raw3))
- sz, _, oErr := callRKernel6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
+ sz, _, oErr := multisc.r6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0)
if oErr != 0 {
err = oErr
}
@@ -210,7 +210,7 @@
// capabilities, by calling with c = nil.
func (c *Set) SetFd(file *os.File) error {
if c == nil {
- if _, _, err := callRKernel6(syscall.SYS_FREMOVEXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
+ if _, _, err := multisc.r6(syscall.SYS_FREMOVEXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
return err
}
return nil
@@ -221,7 +221,7 @@
if err != nil {
return err
}
- if _, _, err := callRKernel6(syscall.SYS_FSETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
+ if _, _, err := multisc.r6(syscall.SYS_FSETXATTR, uintptr(file.Fd()), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
return err
}
return nil
@@ -249,7 +249,7 @@
return err
}
if c == nil {
- if _, _, err := callRKernel6(syscall.SYS_REMOVEXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
+ if _, _, err := multisc.r6(syscall.SYS_REMOVEXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), 0, 0, 0, 0); err != 0 {
return err
}
return nil
@@ -260,7 +260,7 @@
if err != nil {
return err
}
- if _, _, err := callRKernel6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
+ if _, _, err := multisc.r6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(p)), uintptr(unsafe.Pointer(xattrNameCaps)), uintptr(unsafe.Pointer(&d[0])), uintptr(len(d)), 0, 0); err != 0 {
return err
}
return nil
diff --git a/cap/iab.go b/cap/iab.go
index c39e260..e192115 100644
--- a/cap/iab.go
+++ b/cap/iab.go
@@ -33,7 +33,7 @@
// IABInit() returns an empty IAB.
func IABInit() *IAB {
- startUp.Do(cInit)
+ startUp.Do(multisc.cInit)
return &IAB{
i: make([]uint32, words),
a: make([]uint32, words),
@@ -126,9 +126,8 @@
return strings.Join(vs, ",")
}
-// SetProc attempts to change the Inheritable, Ambient and Bounding
-// capabilty vectors of the current process.
-func (iab *IAB) SetProc() (err error) {
+//go:nosplit
+func (sc *syscaller) iabSetProc(iab *IAB) (err error) {
temp := GetProc()
var raising uint32
for i := 0; i < words; i++ {
@@ -146,26 +145,26 @@
if err = working.SetFlag(Effective, true, SETPCAP); err != nil {
return
}
- if err = working.SetProc(); err != nil {
+ if err = sc.setProc(working); err != nil {
return
}
}
defer func() {
- if err2 := temp.SetProc(); err == nil {
+ if err2 := sc.setProc(temp); err == nil {
err = err2
}
}()
- if err = ResetAmbient(); err != nil {
+ if err = sc.resetAmbient(); err != nil {
return
}
for c := Value(maxValues); c > 0; {
c--
offset, mask := omask(c)
if iab.a[offset]&mask != 0 {
- err = SetAmbient(true, c)
+ err = sc.setAmbient(true, c)
}
if err == nil && iab.nb[offset]&mask != 0 {
- err = DropBound(c)
+ err = sc.dropBound(c)
}
if err != nil {
return
@@ -174,6 +173,14 @@
return
}
+// SetProc attempts to change the Inheritable, Ambient and Bounding
+// capabilty vectors of the current process.
+func (iab *IAB) SetProc() error {
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ return multisc.iabSetProc(iab)
+}
+
// GetVector returns the raised state of the specific capability bit
// of the indicated vector.
func (iab *IAB) GetVector(vec Vector, val Value) (bool, error) {
diff --git a/cap/launch.go b/cap/launch.go
new file mode 100644
index 0000000..3fd0a06
--- /dev/null
+++ b/cap/launch.go
@@ -0,0 +1,239 @@
+package cap
+
+import (
+ "errors"
+ "os"
+ "runtime"
+ "syscall"
+ "unsafe"
+)
+
+// Launcher holds a configuration for launching a child process with
+// capability state different from (generally more restricted than)
+// the parent.
+type Launcher struct {
+ path string
+ args []string
+ env []string
+
+ callbackFn func(pa *syscall.ProcAttr, data interface{}) error
+
+ changeUIDs bool
+ uid int
+
+ changeGIDs bool
+ gid int
+ groups []int
+
+ changeMode bool
+ mode Mode
+
+ iab *IAB
+
+ chroot string
+}
+
+// NewLauncher returns a new launcher for the specified program path
+// and args with the specified environment.
+func NewLauncher(path string, args []string, env []string) *Launcher {
+ return &Launcher{
+ path: path,
+ args: args,
+ env: env,
+ }
+}
+
+// Callback specifies a callback for Launch() to call before changing
+// privilege. The only thing that is assumed is that the OS thread in
+// use to call this callback function at launch time will be the one
+// that ultimately calls fork. Any returned error value of said
+// function will terminate the launch process. A nil callback (the
+// default) is ignored. The specified callback fn should not call any
+// "cap" package functions since this may deadlock or generate
+// undefined behavior for the parent process.
+func (attr *Launcher) Callback(fn func(*syscall.ProcAttr, interface{}) error) {
+ attr.callbackFn = fn
+}
+
+// SetUID specifies the UID to be used by the launched command.
+func (attr *Launcher) SetUID(uid int) {
+ attr.changeUIDs = true
+ attr.uid = uid
+}
+
+// SetGroups specifies the GID and supplementary groups for the
+// launched command.
+func (attr *Launcher) SetGroups(gid int, groups []int) {
+ attr.changeGIDs = true
+ attr.gid = gid
+ attr.groups = groups
+}
+
+// SetMode specifies the libcap Mode to be used by the launched command.
+func (attr *Launcher) SetMode(mode Mode) {
+ attr.changeMode = true
+ attr.mode = mode
+}
+
+// SetIAB specifies the AIB capability vectors to be inherited by the
+// launched command. A nil value means the prevailing vectors of the
+// parent will be inherited.
+func (attr *Launcher) SetIAB(iab *IAB) {
+ attr.iab = iab
+}
+
+// SetChroot specifies the chroot value to be used by the launched
+// command. An empty value means no-change from the prevailing value.
+func (attr *Launcher) SetChroot(root string) {
+ attr.chroot = root
+}
+
+// lResult is used to get the result from the doomed launcher thread.
+type lResult struct {
+ pid int
+ err error
+}
+
+// ErrLaunchFailed is returned if a launch was aborted with no more
+// specific error.
+var ErrLaunchFailed = errors.New("launch failed")
+
+// ErrNoLaunch indicates the go runtime available to this binary does
+// not reliably support launching. See cap.LaunchSupported.
+var ErrNoLaunch = errors.New("launch not supported")
+
+// ErrAmbiguousChroot indicates that the Launcher is being used in
+// addition to callback supplied Chroot. The former should be used
+// exclusively for this.
+var ErrAmbiguousChroot = errors.New("use Launcher for chroot")
+
+// ErrAmbiguousIDs indicates that the Launcher is being used in
+// addition to callback supplied Credentials. The former should be
+// used exclusively for this.
+var ErrAmbiguousIDs = errors.New("use Launcher for uids and gids")
+
+// ErrAmbiguousAmbient indicates that the Launcher is being used in
+// addition callback supplied ambient set and the former should be
+// used exclusively in a Launch call.
+var ErrAmbiguousAmbient = errors.New("use Launcher for ambient caps")
+
+// lName is the name we temporarily give to the launcher thread. Note,
+// this will likely stick around in the process tree if the Go runtime
+// is not cleaning up locked launcher OS threads.
+var lName = []byte("cap-launcher\000")
+
+// <uapi/linux/prctl.h>
+const pr_SET_NAME = 15
+
+func launch(result chan<- lResult, attr *Launcher, data interface{}) {
+ defer close(result)
+
+ pid := syscall.Getpid()
+ // Wait until we are not scheduled on the parent thread. We
+ // will exit this thread once the child has launched, and
+ // don't want other goroutines to use this thread afterwards.
+ for {
+ runtime.LockOSThread()
+ tid := syscall.Gettid()
+ if tid != pid {
+ break
+ }
+ runtime.UnlockOSThread()
+ runtime.Gosched()
+ }
+
+ // By never releasing the LockOSThread here, we guarantee that
+ // the runtime will terminate this OS thread once this
+ // function returns.
+
+ // Name the launcher thread - transient, but helps if the
+ // callbackFn or something else hangs up.
+ singlesc.prctlrcall(pr_SET_NAME, uintptr(unsafe.Pointer(&lName[0])), 0)
+
+ pa := &syscall.ProcAttr{
+ Files: []uintptr{0, 1, 2},
+ }
+ var err error
+ var needChroot bool
+
+ if len(attr.env) != 0 {
+ pa.Env = attr.env
+ } else {
+ pa.Env = os.Environ()
+ }
+
+ if attr.callbackFn != nil {
+ if err = attr.callbackFn(pa, data); err != nil {
+ goto abort
+ }
+ }
+
+ if needChroot, err = validatePA(pa, attr.chroot); err != nil {
+ goto abort
+ }
+ if attr.changeUIDs {
+ if err = singlesc.setUID(attr.uid); err != nil {
+ goto abort
+ }
+ }
+ if attr.changeGIDs {
+ if err = singlesc.setGroups(attr.gid, attr.groups); err != nil {
+ goto abort
+ }
+ }
+ if attr.changeMode {
+ if err = singlesc.setMode(attr.mode); err != nil {
+ goto abort
+ }
+ }
+ if attr.iab != nil {
+ if err = singlesc.iabSetProc(attr.iab); err != nil {
+ goto abort
+ }
+ }
+
+ if needChroot {
+ c := GetProc()
+ if err = c.SetFlag(Effective, true, SYS_CHROOT); err != nil {
+ goto abort
+ }
+ if err = singlesc.setProc(c); err != nil {
+ goto abort
+ }
+ }
+ pid, err = syscall.ForkExec(attr.path, attr.args, pa)
+
+abort:
+ if err != nil {
+ pid = -1
+ }
+ result <- lResult{pid: pid, err: err}
+}
+
+// Launch performs a new program launch with security state specified
+// in the supplied launcher.
+func (attr *Launcher) Launch(data interface{}) (int, error) {
+ if attr.path == "" || len(attr.args) == 0 {
+ return -1, ErrLaunchFailed
+ }
+ if !LaunchSupported {
+ return -1, ErrNoLaunch
+ }
+
+ scwMu.Lock()
+ defer scwMu.Unlock()
+ result := make(chan lResult)
+
+ go launch(result, attr, data)
+ for {
+ select {
+ case v, ok := <-result:
+ if !ok {
+ return -1, ErrLaunchFailed
+ }
+ return v.pid, v.err
+ default:
+ runtime.Gosched()
+ }
+ }
+}
diff --git a/cap/oslockluster.go b/cap/oslockluster.go
new file mode 100644
index 0000000..7c753ec
--- /dev/null
+++ b/cap/oslockluster.go
@@ -0,0 +1,35 @@
+// +build !go1.10
+
+package cap
+
+import "syscall"
+
+// LaunchSupported indicates that is safe to return from a locked OS
+// Thread and have that OS Thread be terminated by the runtime. The
+// Launch functionality really needs to rely on the fact that an
+// excess of runtime.LockOSThread() vs. runtime.UnlockOSThread() calls
+// in a returning go routine will cause the underlying locked OSThread
+// to terminate. That feature was added to the Go runtime in version
+// 1.10.
+//
+// See these bugs for the discussion and feature assumed by the code
+// in this Launch() functionality:
+//
+// https://github.com/golang/go/issues/20395
+// https://github.com/golang/go/issues/20458
+//
+// A value of false for this constant causes cap.(*Launcher).Launch()
+// to park the go routine used to perform the launch indefinitely so
+// its kernel privilege state of the OS Thread locked to it does not
+// pollute the rest of the runtime - yes, it leaks an OSThread. If
+// this is a problem for your application you have two workarounds:
+//
+// 1) don't use cap.(*Launcher).Launch()
+// 2) upgrade your Go toolchain to 1.10+
+const LaunchSupported = false
+
+// validatePA confirms that the pa.Sys entry is not incompatible with
+// Launch.
+func validatePA(pa *syscall.ProcAttr, chroot string) (bool, error) {
+ return false, ErrNoLaunch
+}
diff --git a/cap/oslocks.go b/cap/oslocks.go
new file mode 100644
index 0000000..10db24c
--- /dev/null
+++ b/cap/oslocks.go
@@ -0,0 +1,53 @@
+// +build go1.10
+
+package cap
+
+import "syscall"
+
+// LaunchSupported indicates that is safe to return from a locked
+// OS Thread and have that OS Thread be terminated by the runtime. The
+// Launch functionality really needs to rely on the fact that an
+// excess of runtime.LockOSThread() vs. runtime.UnlockOSThread() calls
+// in a returning go routine will cause the underlying locked OSThread
+// to terminate. That feature was added to the Go runtime in version
+// 1.10.
+//
+// See these bugs for the discussion and feature assumed by the code
+// in this Launch() functionality:
+//
+// https://github.com/golang/go/issues/20395
+// https://github.com/golang/go/issues/20458
+//
+// A value of false for this constant causes cap.(*Launcher).Launch()
+// to park the go routine used to perform the launch indefinitely so
+// the kernel privilege state of the OS Thread locked to it does not
+// pollute the rest of the runtime - yes, it leaks an OSThread. If
+// this is a problem for your application you have two workarounds:
+//
+// 1) don't use cap.(*Launcher).Launch()
+// 2) upgrade your Go toolchain to 1.10+ (ie., do this one).
+const LaunchSupported = true
+
+// validatePA confirms that the pa.Sys entry is not incompatible with
+// Launch and loads up the chroot value.
+func validatePA(pa *syscall.ProcAttr, chroot string) (bool, error) {
+ s := pa.Sys
+ if s == nil {
+ if chroot == "" {
+ return false, nil
+ }
+ s = &syscall.SysProcAttr{
+ Chroot: chroot,
+ }
+ pa.Sys = s
+ } else if s.Chroot != "" {
+ return false, ErrAmbiguousChroot
+ }
+ if s.Credential != nil {
+ return false, ErrAmbiguousIDs
+ }
+ if len(s.AmbientCaps) != 0 {
+ return false, ErrAmbiguousAmbient
+ }
+ return s != nil && s.Chroot != "", nil
+}
diff --git a/cap/text.go b/cap/text.go
index 01b45b4..4f37c8f 100644
--- a/cap/text.go
+++ b/cap/text.go
@@ -21,7 +21,7 @@
// FromName converts a named capability Value to its binary
// representation.
func FromName(name string) (Value, error) {
- startUp.Do(cInit)
+ startUp.Do(multisc.cInit)
v, ok := bits[name]
if ok {
if v >= Value(words*32) {
diff --git a/go/.gitignore b/go/.gitignore
index bc4cd9b..660b12d 100644
--- a/go/.gitignore
+++ b/go/.gitignore
@@ -1,5 +1,8 @@
compare-cap
+try-launching
+try-launching-cgo
mknames
web
+ok
pkg
src
diff --git a/go/Makefile b/go/Makefile
index 704c28f..7269716 100644
--- a/go/Makefile
+++ b/go/Makefile
@@ -10,11 +10,14 @@
DEPS=../libcap/libcap.a ../libcap/libpsx.a
-all: $(PSXGOPACKAGE) $(CAPGOPACKAGE) web compare-cap
+all: $(PSXGOPACKAGE) $(CAPGOPACKAGE) web compare-cap try-launching
$(DEPS):
make -C ../libcap all
+../progs/capsh:
+ make -C ../progs capsh
+
src/libcap/psx:
mkdir -p src/libcap
ln -s $(topdir)/psx src/libcap/
@@ -52,12 +55,29 @@
@echo "NOTE: RAISED cap_net_bind_service ON web binary"
endif
-test: all
+ok: ok.go
+ CGO_ENABLED=0 GOPATH=$(GOPATH) go build $<
+
+try-launching: try-launching.go $(CAPGOPACKAGE) ok
+ CGO_ENABLED="$(CGO_REQUIRED)" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) go build $<
+ifeq ($(CGO_REQUIRED),0)
+ CGO_ENABLED="1" CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) go build -o $@-cgo $<
+endif
+
+test: all ../progs/capsh
CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH="$(GOPATH)" go test libcap/psx
CGO_LDFLAGS_ALLOW="$(CGO_LDFLAGS_ALLOW)" CGO_CFLAGS="$(CGO_CFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" GOPATH=$(GOPATH) go test libcap/cap
LD_LIBRARY_PATH=../libcap ./compare-cap
+ ./try-launching
+ifeq ($(CGO_REQUIRED),0)
+ ./try-launching-cgo
+endif
sudotest: test
+ sudo ./try-launching
+ifeq ($(CGO_REQUIRED),0)
+ sudo ./try-launching-cgo
+endif
install: all
mkdir -p $(FAKEROOT)$(GOPKGDIR)/libcap/psx
@@ -70,7 +90,8 @@
clean:
GOPATH=$(GOPATH) go clean -x -i libcap/cap 2> /dev/null || exit 0
GOPATH=$(GOPATH) go clean -x -i libcap/psx 2> /dev/null || exit 0
- rm -f *.o *.so mknames web compare-cap *~
+ rm -f *.o *.so *~ mknames web ok
+ rm -f compare-cap try-launching try-launching-cgo
rm -f $(topdir)/cap/*~ $(topdir)/cap/names.go $(topdir)/cap/syscalls*.go
rm -f $(topdir)/psx/*~
rm -fr pkg src
diff --git a/go/compare-cap.go b/go/compare-cap.go
index 19c618a..4e104a1 100644
--- a/go/compare-cap.go
+++ b/go/compare-cap.go
@@ -128,10 +128,10 @@
}
}
-// tryProcCaps performs a set of convenience functions and compares the
-// results with those seen by libcap. At the end of this function, the
-// running process has no privileges at all. So exiting is the only
-// option.
+// tryProcCaps performs a set of convenience functions and compares
+// the results with those seen by libcap. At the end of this function,
+// the running process has no privileges at all. So exiting the
+// program is the only option.
func tryProcCaps() {
c := cap.GetProc()
if v, err := c.GetFlag(cap.Permitted, cap.SETPCAP); err != nil {
@@ -173,7 +173,7 @@
for i, mode := range []cap.Mode{cap.ModePure1E, cap.ModePure1EInit, cap.ModeNoPriv} {
if err := mode.Set(); err != nil {
- log.Fatalf("[%d] failed to set mode to %d (%v): %v", i, mode, mode, err)
+ log.Fatalf("[%d] in mode=%v and failed to set mode to %d (%v): %v", i, cap.GetMode(), mode, mode, err)
}
if got := cap.GetMode(); got != mode {
log.Fatalf("[%d] unable to recognise mode %d (%v), got: %d (%v)", i, mode, mode, got, got)
@@ -325,8 +325,8 @@
// the current program is capable enough and do not involve
// any cgo calls to libcap.
tryFileCaps()
- tryProcCaps()
- // Since we have no privilege, there is nothing left to do but exit.
+ // Nothing left to do but exit after this one.
+ tryProcCaps()
log.Printf("compare-cap success!")
}
diff --git a/go/ok.go b/go/ok.go
new file mode 100644
index 0000000..509638e
--- /dev/null
+++ b/go/ok.go
@@ -0,0 +1,9 @@
+// Program ok exits with status zero. We use it as a chroot test.
+// To avoid any confusion, it needs to be linked statically.
+package main
+
+import "os"
+
+func main() {
+ os.Exit(0)
+}
diff --git a/go/syscalls.sh b/go/syscalls.sh
index b91424d..5affe11 100755
--- a/go/syscalls.sh
+++ b/go/syscalls.sh
@@ -20,12 +20,24 @@
"syscall"
)
-// callKernel variables overridable for testing purposes.
+// multisc provides syscalls overridable for testing purposes that
+// support a single kernel security state for all OS threads.
// (Go build tree has no syscall.PerOSThreadSyscall support.)
-var callWKernel = psx.Syscall3
-var callWKernel6 = psx.Syscall6
-var callRKernel = syscall.RawSyscall
-var callRKernel6 = syscall.RawSyscall6
+var multisc = &syscaller{
+ w3: psx.Syscall3,
+ w6: psx.Syscall6,
+ r3: syscall.RawSyscall,
+ r6: syscall.RawSyscall6,
+}
+
+// singlesc provides a single threaded implementation. Users should
+// take care to ensure the thread is OS locked.
+var singlesc = &syscaller{
+ w3: syscall.RawSyscall,
+ w6: syscall.RawSyscall6,
+ r3: syscall.RawSyscall,
+ r6: syscall.RawSyscall6,
+}
EOF
exit 0
@@ -39,12 +51,23 @@
import "syscall"
-// callKernel variables overridable for testing purposes.
-// (Go build tree contains syscall.PerOSThreadSyscall support.)
-var callWKernel = syscall.PerOSThreadSyscall
-var callWKernel6 = syscall.PerOSThreadSyscall6
-var callRKernel = syscall.RawSyscall
-var callRKernel6 = syscall.RawSyscall6
+// multisc provides syscalls overridable for testing purposes that
+// support a single kernel security state for all OS threads.
+var multisc = &syscaller{
+ w3: syscall.PerOSThreadSyscall,
+ w6: syscall.PerOSThreadSyscall6,
+ r3: syscall.RawSyscall,
+ r6: syscall.RawSyscall6,
+}
+
+// singlesc provides a single threaded implementation. Users should
+// take care to ensure the thread is locked and marked nogc.
+var singlesc = &syscaller{
+ w3: syscall.RawSyscall,
+ w6: syscall.RawSyscall6,
+ r3: syscall.RawSyscall,
+ r6: syscall.RawSyscall6,
+}
EOF
cat > "${dir}/syscalls_cgo.go" <<EOF
@@ -57,11 +80,23 @@
"syscall"
)
-// callKernel variables overridable for testing purposes.
+// multisc provides syscalls overridable for testing purposes that
+// support a single kernel security state for all OS threads.
// We use this version when we are cgo compiling because
// we need to manage the native C pthreads too.
-var callWKernel = psx.Syscall3
-var callWKernel6 = psx.Syscall6
-var callRKernel = syscall.RawSyscall
-var callRKernel6 = syscall.RawSyscall6
+var multisc = &syscaller{
+ w3: psx.Syscall3,
+ w6: psx.Syscall6,
+ r3: syscall.RawSyscall,
+ r6: syscall.RawSyscall6,
+}
+
+// singlesc provides a single threaded implementation. Users should
+// take care to ensure the thread is locked and marked nogc.
+var singlesc = &syscaller{
+ w3: syscall.RawSyscall,
+ w6: syscall.RawSyscall6,
+ r3: syscall.RawSyscall,
+ r6: syscall.RawSyscall6,
+}
EOF
diff --git a/go/try-launching.go b/go/try-launching.go
new file mode 100644
index 0000000..847925b
--- /dev/null
+++ b/go/try-launching.go
@@ -0,0 +1,104 @@
+// Program try-launching validates the cap.Launch feature.
+package main
+
+import (
+ "fmt"
+ "log"
+ "strings"
+ "syscall"
+
+ "libcap/cap"
+)
+
+// tryLaunching attempts to launch a bunch of programs in parallel. It
+// first tries some unprivileged launches, and then (if privileged)
+// tries some more ambitious ones.
+func tryLaunching() {
+ cwd, err := syscall.Getwd()
+ if err != nil {
+ log.Fatalf("no working directory: %v", err)
+ }
+ root := cwd[:strings.LastIndex(cwd, "/")]
+
+ vs := []struct {
+ args []string
+ fail bool
+ callbackFn func(*syscall.ProcAttr, interface{}) error
+ chroot string
+ iab string
+ uid int
+ gid int
+ groups []int
+ }{
+ {args: []string{root + "/go/ok"}},
+ {
+ args: []string{root + "/progs/capsh", "--dropped=cap_chown", "--is-uid=123", "--is-gid=456", "--has-a=cap_setuid"},
+ iab: "!cap_chown,^cap_setuid,cap_sys_admin",
+ uid: 123,
+ gid: 456,
+ groups: []int{1, 2, 3},
+ fail: syscall.Getuid() != 0,
+ },
+ {
+ args: []string{"/ok"},
+ chroot: root + "/go",
+ fail: syscall.Getuid() != 0,
+ },
+ }
+
+ ps := make([]int, len(vs))
+ ws := make([]syscall.WaitStatus, len(vs))
+
+ for i, v := range vs {
+ e := cap.NewLauncher(v.args[0], v.args, nil)
+ e.Callback(v.callbackFn)
+ if v.chroot != "" {
+ e.SetChroot(v.chroot)
+ }
+ if v.uid != 0 {
+ e.SetUID(v.uid)
+ }
+ if v.gid != 0 {
+ e.SetGroups(v.gid, v.groups)
+ }
+ if v.iab != "" {
+ if iab, err := cap.IABFromText(v.iab); err != nil {
+ log.Fatalf("failed to parse iab=%q: %v", v.iab, err)
+ } else {
+ e.SetIAB(iab)
+ }
+ }
+ if ps[i], err = e.Launch(nil); err != nil {
+ if v.fail {
+ continue
+ }
+ log.Fatalf("[%d] launch %q failed: %v", i, v.args, err)
+ }
+ }
+
+ for i, p := range ps {
+ if p == -1 {
+ continue
+ }
+ if pr, err := syscall.Wait4(p, &ws[i], 0, nil); err != nil {
+ log.Fatalf("wait4 <%d> failed: %v", p, err)
+ } else if p != pr {
+ log.Fatalf("wait4 <%d> returned <%d> instead", p, pr)
+ } else if ws[i] != 0 {
+ if vs[i].fail {
+ continue
+ }
+ log.Fatalf("wait4 <%d> status was %d", p, ws[i])
+ }
+ }
+}
+
+func main() {
+ if cap.LaunchSupported {
+ // The Go runtime had some OS threading bugs that
+ // prevented Launch from working. Specifically, the
+ // launch OS thread would get reused.
+ tryLaunching()
+ }
+ fmt.Println("PASSED")
+}