tools: add filtering by mount namespace
In previous patches, I added the option --cgroupmap to filter events
belonging to a set of cgroup-v2. Although this approach works fine with
systemd services and containers when cgroup-v2 is enabled, it does not
work with containers when only cgroup-v1 is enabled because
bpf_get_current_cgroup_id() only works with cgroup-v2. It also requires
Linux 4.18 to get this bpf helper function.
This patch adds an additional way to filter by containers, using mount
namespaces.
Note that this does not help with systemd services since they normally
don't create a new mount namespace (unless you set some options like
'ReadOnlyPaths=', see "man 5 systemd.exec").
My goal with this patch is to filter Kubernetes pods, even on
distributions with an older kernel (<4.18) or without cgroup-v2 enabled.
- This is only implemented for tools that already support filtering by
cgroup id (bindsnoop, capable, execsnoop, profile, tcpaccept, tcpconnect,
tcptop and tcptracer).
- I picked the mount namespace because the other namespaces could be
disabled in Kubernetes (e.g. HostNetwork, HostPID, HostIPC).
It can be tested by following the example in docs/special_filtering added
in this commit, to avoid compiling locally the following command can be used
```
sudo bpftool map create /sys/fs/bpf/mnt_ns_set type hash key 8 value 4 \
entries 128 name mnt_ns_set flags 0
docker run -ti --rm --privileged \
-v /usr/src:/usr/src -v /lib/modules:/lib/modules \
-v /sys/fs/bpf:/sys/fs/bpf --pid=host kinvolk/bcc:alban-containers-filters \
/usr/share/bcc/tools/execsnoop --mntnsmap /sys/fs/bpf/mnt_ns_set
```
Co-authored-by: Alban Crequy <[email protected]>
Co-authored-by: Mauricio Vásquez <[email protected]>
diff --git a/tools/bindsnoop.py b/tools/bindsnoop.py
index 4d3133f..de569c2 100755
--- a/tools/bindsnoop.py
+++ b/tools/bindsnoop.py
@@ -6,7 +6,7 @@
# based on tcpconnect utility from Brendan Gregg's suite.
#
# USAGE: bindsnoop [-h] [-t] [-E] [-p PID] [-P PORT[,PORT ...]] [-w]
-# [--count] [--cgroupmap mappath]
+# [--count] [--cgroupmap mappath] [--mntnsmap mappath]
#
# bindsnoop reports socket options set before the bind call
# that would impact this system call behavior:
@@ -28,6 +28,7 @@
from __future__ import print_function, absolute_import, unicode_literals
from bcc import BPF, DEBUG_SOURCE
+from bcc.containers import filter_by_containers
from bcc.utils import printb
import argparse
import re
@@ -51,6 +52,7 @@
./bindsnoop -E # report bind errors
./bindsnoop --count # count bind per src ip
./bindsnoop --cgroupmap mappath # only trace cgroups in this BPF map
+ ./bindsnoop --mntnsmap mappath # only trace mount namespaces in the map
it is reporting socket options set before the bins call
impacting system call behavior:
@@ -84,6 +86,8 @@
help="count binds per src ip and port")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
parser.add_argument("--debug-source", action="store_true",
@@ -148,8 +152,6 @@
};
BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
-CGROUP_MAP
-
// bind options for event reporting
union bind_options {
u8 data;
@@ -174,7 +176,9 @@
FILTER_UID
- FILTER_CGROUP
+ if (container_should_be_filtered()) {
+ return 0;
+ }
// stash the sock ptr for lookup on return
currsock.update(&tid, &socket);
@@ -323,11 +327,6 @@
bpf_get_current_comm(&data6.task, sizeof(data6.task));
ipv6_bind_events.perf_submit(ctx, &data6, sizeof(data6));"""
},
- 'filter_cgroup': """
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
- }""",
}
# code substitutions
@@ -351,22 +350,11 @@
'if (uid != %s) { return 0; }' % args.uid)
if args.errors:
bpf_text = bpf_text.replace('FILTER_ERRORS', 'ignore_errors = 0;')
-if args.cgroupmap:
- bpf_text = bpf_text.replace('FILTER_CGROUP', struct_init['filter_cgroup'])
- bpf_text = bpf_text.replace(
- 'CGROUP_MAP',
- (
- 'BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "%s");' %
- args.cgroupmap
- )
- )
-
+bpf_text = filter_by_containers(args) + bpf_text
bpf_text = bpf_text.replace('FILTER_PID', '')
bpf_text = bpf_text.replace('FILTER_PORT', '')
bpf_text = bpf_text.replace('FILTER_UID', '')
bpf_text = bpf_text.replace('FILTER_ERRORS', '')
-bpf_text = bpf_text.replace('FILTER_CGROUP', '')
-bpf_text = bpf_text.replace('CGROUP_MAP', '')
# selecting output format - 80 characters or wide, fitting IPv6 addresses
header_fmt = "%8s %-12.12s %-4s %-15s %-5s %5s %2s"
diff --git a/tools/bindsnoop_example.txt b/tools/bindsnoop_example.txt
index 77e040e..c7c5135 100644
--- a/tools/bindsnoop_example.txt
+++ b/tools/bindsnoop_example.txt
@@ -59,7 +59,7 @@
# ./bindsnoop.py --cgroupmap /sys/fs/bpf/test01
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
In order to track heavy bind usage one can use --count option
@@ -74,7 +74,7 @@
Usage message:
# ./bindsnoop.py -h
usage: bindsnoop.py [-h] [-t] [-w] [-p PID] [-P PORT] [-E] [-U] [-u UID]
- [--count] [--cgroupmap CGROUPMAP]
+ [--count] [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP]
Trace TCP binds
@@ -103,6 +103,7 @@
./bindsnoop -E # report bind errors
./bindsnoop --count # count bind per src ip
./bindsnoop --cgroupmap mappath # only trace cgroups in this BPF map
+ ./bindsnoop --mntnsmap mappath # only trace mount namespaces in the map
it is reporting socket options set before the bins call
impacting system call behavior:
diff --git a/tools/capable.py b/tools/capable.py
index 3852e22..94d1c32 100755
--- a/tools/capable.py
+++ b/tools/capable.py
@@ -15,6 +15,7 @@
from os import getpid
from functools import partial
from bcc import BPF
+from bcc.containers import filter_by_containers
import errno
import argparse
from time import strftime
@@ -28,7 +29,8 @@
./capable -U # add user-space stacks to trace
./capable -x # extra fields: show TID and INSETID columns
./capable --unique # don't repeat stacks for the same pid or cgroup
- ./capable --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./capable --cgroupmap mappath # only trace cgroups in this BPF map
+ ./capable --mntnsmap mappath # only trace mount namespaces in the map
"""
parser = argparse.ArgumentParser(
description="Trace security capability checks",
@@ -46,6 +48,8 @@
help="show extra fields in TID and INSETID columns")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
parser.add_argument("--unique", action="store_true",
help="don't repeat stacks for the same pid or cgroup")
args = parser.parse_args()
@@ -145,10 +149,6 @@
BPF_HASH(seen, struct repeat_t, u64);
#endif
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
#if defined(USER_STACKS) || defined(KERNEL_STACKS)
BPF_STACK_TRACE(stacks, 2048);
#endif
@@ -173,12 +173,10 @@
FILTER1
FILTER2
FILTER3
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
+
+ if (container_should_be_filtered()) {
return 0;
}
-#endif
u32 uid = bpf_get_current_uid_gid();
struct data_t data = {.tgid = tgid, .pid = pid, .uid = uid, .cap = cap, .audit = audit, .insetid = insetid};
@@ -192,7 +190,7 @@
#if UNIQUESET
struct repeat_t repeat = {0,};
repeat.cap = cap;
-#if CGROUPSET
+#if CGROUP_ID_SET
repeat.cgroupid = bpf_get_current_cgroup_id();
#else
repeat.tgid = tgid;
@@ -229,11 +227,7 @@
bpf_text = bpf_text.replace('FILTER2', '')
bpf_text = bpf_text.replace('FILTER3',
'if (pid == %s) { return 0; }' % getpid())
-if args.cgroupmap:
- bpf_text = bpf_text.replace('CGROUPSET', '1')
- bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
- bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
if args.unique:
bpf_text = bpf_text.replace('UNIQUESET', '1')
else:
diff --git a/tools/capable_example.txt b/tools/capable_example.txt
index bcd6d01..1701b6a 100644
--- a/tools/capable_example.txt
+++ b/tools/capable_example.txt
@@ -4,7 +4,7 @@
capable traces calls to the kernel cap_capable() function, which does security
capability checks, and prints details for each call. For example:
-# ./capable.py
+# ./capable.py
TIME UID PID COMM CAP NAME AUDIT
22:11:23 114 2676 snmpd 12 CAP_NET_ADMIN 1
22:11:23 0 6990 run 24 CAP_SYS_RESOURCE 1
@@ -100,14 +100,14 @@
# ./capable.py --cgroupmap /sys/fs/bpf/test01
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
USAGE:
# ./capable.py -h
usage: capable.py [-h] [-v] [-p PID] [-K] [-U] [-x] [--cgroupmap CGROUPMAP]
- [--unique]
+ [--mntnsmap MNTNSMAP] [--unique]
Trace security capability checks
@@ -120,6 +120,7 @@
-x, --extra show extra fields in TID and INSETID columns
--cgroupmap CGROUPMAP
trace cgroups in this BPF map only
+ --mntnsmap MNTNSMAP trace mount namespaces in this BPF map only
--unique don't repeat stacks for the same pid or cgroup
examples:
@@ -130,4 +131,5 @@
./capable -U # add user-space stacks to trace
./capable -x # extra fields: show TID and INSETID columns
./capable --unique # don't repeat stacks for the same pid or cgroup
- ./capable --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./capable --cgroupmap mappath # only trace cgroups in this BPF map
+ ./capable --mntnsmap mappath # only trace mount namespaces in the map
diff --git a/tools/execsnoop.py b/tools/execsnoop.py
index 9879d2c..53052d3 100755
--- a/tools/execsnoop.py
+++ b/tools/execsnoop.py
@@ -19,6 +19,7 @@
from __future__ import print_function
from bcc import BPF
+from bcc.containers import filter_by_containers
from bcc.utils import ArgString, printb
import bcc.utils as utils
import argparse
@@ -57,7 +58,8 @@
./execsnoop -q # add "quotemarks" around arguments
./execsnoop -n main # only print command lines containing "main"
./execsnoop -l tpkg # only print command where arguments contains "tpkg"
- ./execsnoop --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./execsnoop --cgroupmap mappath # only trace cgroups in this BPF map
+ ./execsnoop --mntnsmap mappath # only trace mount namespaces in the map
"""
parser = argparse.ArgumentParser(
description="Trace exec() syscalls",
@@ -71,6 +73,8 @@
help="include failed exec()s")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
parser.add_argument("-u", "--uid", type=parse_uid, metavar='USER',
help="trace this UID only")
parser.add_argument("-q", "--quote", action="store_true",
@@ -113,9 +117,6 @@
int retval;
};
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
BPF_PERF_OUTPUT(events);
static int __submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
@@ -145,12 +146,9 @@
UID_FILTER
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
+ if (container_should_be_filtered()) {
+ return 0;
}
-#endif
// create data here and pass to submit_arg to save stack space (#555)
struct data_t data = {};
@@ -185,12 +183,9 @@
int do_ret_sys_execve(struct pt_regs *ctx)
{
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
+ if (container_should_be_filtered()) {
+ return 0;
}
-#endif
struct data_t data = {};
struct task_struct *task;
@@ -223,11 +218,7 @@
'if (uid != %s) { return 0; }' % args.uid)
else:
bpf_text = bpf_text.replace('UID_FILTER', '')
-if args.cgroupmap:
- bpf_text = bpf_text.replace('CGROUPSET', '1')
- bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
- bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
if args.ebpf:
print(bpf_text)
exit()
diff --git a/tools/execsnoop_example.txt b/tools/execsnoop_example.txt
index a90d007..8cdfe0d 100644
--- a/tools/execsnoop_example.txt
+++ b/tools/execsnoop_example.txt
@@ -83,7 +83,7 @@
# ./execsnoop --cgroupmap /sys/fs/bpf/test01
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
The -U option include UID on output:
@@ -121,6 +121,7 @@
-x, --fails include failed exec()s
--cgroupmap CGROUPMAP
trace cgroups in this BPF map only
+ --mntnsmap MNTNSMAP trace mount namespaces in this BPF map only
-u USER, --uid USER trace this UID only
-q, --quote Add quotemarks (") around arguments.
-n NAME, --name NAME only print commands matching this name (regex), any
@@ -142,4 +143,5 @@
./execsnoop -q # add "quotemarks" around arguments
./execsnoop -n main # only print command lines containing "main"
./execsnoop -l tpkg # only print command where arguments contains "tpkg"
- ./execsnoop --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./execsnoop --cgroupmap mappath # only trace cgroups in this BPF map
+ ./execsnoop --mntnsmap mappath # only trace mount namespaces in the map
diff --git a/tools/opensnoop.py b/tools/opensnoop.py
index 28fe755..a68b13f 100755
--- a/tools/opensnoop.py
+++ b/tools/opensnoop.py
@@ -17,6 +17,7 @@
from __future__ import print_function
from bcc import ArgString, BPF
+from bcc.containers import filter_by_containers
from bcc.utils import printb
import argparse
from datetime import datetime, timedelta
@@ -35,7 +36,8 @@
./opensnoop -n main # only print process names containing "main"
./opensnoop -e # show extended fields
./opensnoop -f O_WRONLY -f O_RDWR # only print calls for writing
- ./opensnoop --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./opensnoop --cgroupmap mappath # only trace cgroups in this BPF map
+ ./opensnoop --mntnsmap mappath # only trace mount namespaces in the map
"""
parser = argparse.ArgumentParser(
description="Trace open() syscalls",
@@ -53,6 +55,8 @@
help="trace this TID only")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
parser.add_argument("-u", "--uid",
help="trace this UID only")
parser.add_argument("-d", "--duration",
@@ -102,9 +106,6 @@
int flags; // EXTENDED_STRUCT_MEMBER
};
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
BPF_PERF_OUTPUT(events);
"""
@@ -122,12 +123,11 @@
PID_TID_FILTER
UID_FILTER
FLAGS_FILTER
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
+
+ if (container_should_be_filtered()) {
+ return 0;
}
-#endif
+
if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
val.id = id;
val.fname = filename;
@@ -177,12 +177,9 @@
PID_TID_FILTER
UID_FILTER
FLAGS_FILTER
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
+ if (container_should_be_filtered()) {
+ return 0;
}
-#endif
struct data_t data = {};
bpf_get_current_comm(&data.comm, sizeof(data.comm));
@@ -221,11 +218,7 @@
'if (uid != %s) { return 0; }' % args.uid)
else:
bpf_text = bpf_text.replace('UID_FILTER', '')
-if args.cgroupmap:
- bpf_text = bpf_text.replace('CGROUPSET', '1')
- bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
- bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
if args.flag_filter:
bpf_text = bpf_text.replace('FLAGS_FILTER',
'if (!(flags & %d)) { return 0; }' % flag_filter_mask)
diff --git a/tools/opensnoop_example.txt b/tools/opensnoop_example.txt
index 44f0e33..f15e84f 100644
--- a/tools/opensnoop_example.txt
+++ b/tools/opensnoop_example.txt
@@ -187,14 +187,15 @@
# ./opensnoop --cgroupmap /sys/fs/bpf/test01
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
USAGE message:
# ./opensnoop -h
-usage: opensnoop [-h] [-T] [-x] [-p PID] [-t TID] [-d DURATION] [-n NAME]
- [-e] [-f FLAG_FILTER]
+usage: opensnoop.py [-h] [-T] [-U] [-x] [-p PID] [-t TID]
+ [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] [-u UID]
+ [-d DURATION] [-n NAME] [-e] [-f FLAG_FILTER]
Trace open() syscalls
@@ -205,6 +206,9 @@
-x, --failed only show failed opens
-p PID, --pid PID trace this PID only
-t TID, --tid TID trace this TID only
+ --cgroupmap CGROUPMAP
+ trace cgroups in this BPF map only
+ --mntnsmap MNTNSMAP trace mount namespaces in this BPF map on
-u UID, --uid UID trace this UID only
-d DURATION, --duration DURATION
total duration of trace in seconds
@@ -226,3 +230,5 @@
./opensnoop -n main # only print process names containing "main"
./opensnoop -e # show extended fields
./opensnoop -f O_WRONLY -f O_RDWR # only print calls for writing
+ ./opensnoop --cgroupmap mappath # only trace cgroups in this BPF map
+ ./opensnoop --mntnsmap mappath # only trace mount namespaces in the map
diff --git a/tools/profile.py b/tools/profile.py
index 2067933..dd6f65f 100755
--- a/tools/profile.py
+++ b/tools/profile.py
@@ -24,10 +24,11 @@
#
# 15-Jul-2016 Brendan Gregg Created this.
# 20-Oct-2016 " " Switched to use the new 4.9 support.
-# 26-Jan-2019 " " Changed to exclude CPU idle by default.
+# 26-Jan-2019 " " Changed to exclude CPU idle by default.
from __future__ import print_function
from bcc import BPF, PerfType, PerfSWConfig
+from bcc.containers import filter_by_containers
from sys import stderr
from time import sleep
import argparse
@@ -72,7 +73,8 @@
./profile -L 185 # only profile thread with TID 185
./profile -U # only show user space stacks (no kernel)
./profile -K # only show kernel space stacks (no user)
- ./profile --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./profile --cgroupmap mappath # only trace cgroups in this BPF map
+ ./profile --mntnsmap mappath # only trace mount namespaces in the map
"""
parser = argparse.ArgumentParser(
description="Profile CPU stack traces at a timed interval",
@@ -115,6 +117,8 @@
help=argparse.SUPPRESS)
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
# option logic
args = parser.parse_args()
@@ -146,10 +150,6 @@
BPF_HASH(counts, struct key_t);
BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
// This code gets a bit complex. Probably not suitable for casual hacking.
int do_perf_event(struct bpf_perf_event_data *ctx) {
@@ -163,12 +163,9 @@
if (!(THREAD_FILTER))
return 0;
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
+ if (container_should_be_filtered()) {
return 0;
}
-#endif
// create map key
struct key_t key = {.pid = tgid};
@@ -246,11 +243,7 @@
stack_context = "user + kernel"
bpf_text = bpf_text.replace('USER_STACK_GET', user_stack_get)
bpf_text = bpf_text.replace('KERNEL_STACK_GET', kernel_stack_get)
-if args.cgroupmap:
- bpf_text = bpf_text.replace('CGROUPSET', '1')
- bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
- bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
sample_freq = 0
sample_period = 0
diff --git a/tools/profile_example.txt b/tools/profile_example.txt
index bb3c5ae..2c7e702 100644
--- a/tools/profile_example.txt
+++ b/tools/profile_example.txt
@@ -708,7 +708,7 @@
# ./profile --cgroupmap /sys/fs/bpf/test01
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
USAGE message:
@@ -717,7 +717,7 @@
usage: profile.py [-h] [-p PID | -L TID] [-U | -K] [-F FREQUENCY | -c COUNT]
[-d] [-a] [-I] [-f]
[--stack-storage-size STACK_STORAGE_SIZE] [-C CPU]
- [--cgroupmap CGROUPMAP]
+ [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP]
[duration]
Profile CPU stack traces at a timed interval
@@ -750,6 +750,7 @@
-C CPU, --cpu CPU cpu number to run profile on
--cgroupmap CGROUPMAP
trace cgroups in this BPF map only
+ --mntnsmap MNTNSMAP trace mount namespaces in this BPF map only
examples:
./profile # profile stack traces at 49 Hertz until Ctrl-C
@@ -761,4 +762,5 @@
./profile -L 185 # only profile thread with TID 185
./profile -U # only show user space stacks (no kernel)
./profile -K # only show kernel space stacks (no user)
- ./profile --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./profile --cgroupmap mappath # only trace cgroups in this BPF map
+ ./profile --mntnsmap mappath # only trace mount namespaces in the map
diff --git a/tools/tcpaccept.py b/tools/tcpaccept.py
index 03b05e0..4aa7fd7 100755
--- a/tools/tcpaccept.py
+++ b/tools/tcpaccept.py
@@ -16,6 +16,7 @@
# 14-Feb-2016 " " Switch to bpf_perf_output.
from __future__ import print_function
+from bcc.containers import filter_by_containers
from bcc import BPF
from socket import inet_ntop, AF_INET, AF_INET6
from struct import pack
@@ -29,7 +30,8 @@
./tcpaccept -t # include timestamps
./tcpaccept -P 80,81 # only trace port 80 and 81
./tcpaccept -p 181 # only trace PID 181
- ./tcpaccept --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./tcpaccept --cgroupmap mappath # only trace cgroups in this BPF map
+ ./tcpaccept --mntnsmap mappath # only trace mount namespaces in the map
"""
parser = argparse.ArgumentParser(
description="Trace TCP accepts",
@@ -45,6 +47,8 @@
help="comma-separated list of local ports to trace")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
@@ -80,11 +84,6 @@
char task[TASK_COMM_LEN];
};
BPF_PERF_OUTPUT(ipv6_events);
-
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
"""
#
@@ -97,12 +96,9 @@
bpf_text_kprobe = """
int kretprobe__inet_csk_accept(struct pt_regs *ctx)
{
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
+ if (container_should_be_filtered()) {
return 0;
}
-#endif
struct sock *newsk = (struct sock *)PT_REGS_RC(ctx);
u32 pid = bpf_get_current_pid_tgid() >> 32;
@@ -115,21 +111,21 @@
// check this is TCP
u8 protocol = 0;
// workaround for reading the sk_protocol bitfield:
-
+
// Following comments add by Joe Yin:
// Unfortunately,it can not work since Linux 4.10,
// because the sk_wmem_queued is not following the bitfield of sk_protocol.
// And the following member is sk_gso_max_segs.
// So, we can use this:
// bpf_probe_read(&protocol, 1, (void *)((u64)&newsk->sk_gso_max_segs) - 3);
- // In order to diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime,
- // sk_lingertime is closed to the gso_max_segs_offset,and
- // the offset between the two members is 4
+ // In order to diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime,
+ // sk_lingertime is closed to the gso_max_segs_offset,and
+ // the offset between the two members is 4
int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
- if (sk_lingertime_offset - gso_max_segs_offset == 4)
+ if (sk_lingertime_offset - gso_max_segs_offset == 4)
// 4.10+ with little endian
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 3);
@@ -199,11 +195,7 @@
lports_if = ' && '.join(['lport != %d' % lport for lport in lports])
bpf_text = bpf_text.replace('##FILTER_PORT##',
'if (%s) { return 0; }' % lports_if)
-if args.cgroupmap:
- bpf_text = bpf_text.replace('CGROUPSET', '1')
- bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
- bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
if debug or args.ebpf:
print(bpf_text)
if args.ebpf:
diff --git a/tools/tcpaccept_example.txt b/tools/tcpaccept_example.txt
index 5b6b1a6..9381565 100644
--- a/tools/tcpaccept_example.txt
+++ b/tools/tcpaccept_example.txt
@@ -38,7 +38,7 @@
# ./tcpaccept --cgroupmap /sys/fs/bpf/test01
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
USAGE message:
@@ -62,4 +62,5 @@
./tcpaccept -t # include timestamps
./tcpaccept -P 80,81 # only trace port 80 and 81
./tcpaccept -p 181 # only trace PID 181
- ./tcpaccept --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./tcpaccept --cgroupmap mappath # only trace cgroups in this BPF map
+ ./tcpaccept --mntnsmap mappath # only trace mount namespaces in the map
\ No newline at end of file
diff --git a/tools/tcpconnect.py b/tools/tcpconnect.py
index 67f2cef..40878ee 100755
--- a/tools/tcpconnect.py
+++ b/tools/tcpconnect.py
@@ -21,6 +21,7 @@
from __future__ import print_function
from bcc import BPF
+from bcc.containers import filter_by_containers
from bcc.utils import printb
import argparse
from socket import inet_ntop, ntohs, AF_INET, AF_INET6
@@ -37,7 +38,8 @@
./tcpconnect -U # include UID
./tcpconnect -u 1000 # only trace UID 1000
./tcpconnect -c # count connects per src ip and dest ip/port
- ./tcpconnect --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./tcpconnect --cgroupmap mappath # only trace cgroups in this BPF map
+ ./tcpconnect --mntnsmap mappath # only trace mount namespaces in the map
"""
parser = argparse.ArgumentParser(
description="Trace TCP connects",
@@ -57,6 +59,8 @@
help="count connects per src ip and dest ip/port")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
@@ -70,10 +74,6 @@
BPF_HASH(currsock, u32, struct sock *);
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
// separate data structs for ipv4 and ipv6
struct ipv4_data_t {
u64 ts_us;
@@ -116,12 +116,9 @@
int trace_connect_entry(struct pt_regs *ctx, struct sock *sk)
{
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
+ if (container_should_be_filtered()) {
+ return 0;
}
-#endif
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid >> 32;
@@ -248,11 +245,7 @@
if args.uid:
bpf_text = bpf_text.replace('FILTER_UID',
'if (uid != %s) { return 0; }' % args.uid)
-if args.cgroupmap:
- bpf_text = bpf_text.replace('CGROUPSET', '1')
- bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
- bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
bpf_text = bpf_text.replace('FILTER_PID', '')
bpf_text = bpf_text.replace('FILTER_PORT', '')
diff --git a/tools/tcpconnect_example.txt b/tools/tcpconnect_example.txt
index cf97562..7efac4a 100644
--- a/tools/tcpconnect_example.txt
+++ b/tools/tcpconnect_example.txt
@@ -73,14 +73,14 @@
# ./tcpconnect --cgroupmap /sys/fs/bpf/test01
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
USAGE message:
# ./tcpconnect -h
usage: tcpconnect.py [-h] [-t] [-p PID] [-P PORT] [-U] [-u UID] [-c]
- [--cgroupmap CGROUPMAP]
+ [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP]
Trace TCP connects
@@ -104,4 +104,5 @@
./tcpconnect -U # include UID
./tcpconnect -u 1000 # only trace UID 1000
./tcpconnect -c # count connects per src ip and dest ip/port
- ./tcpconnect --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./tcpconnect --cgroupmap mappath # only trace cgroups in this BPF map
+ ./tcpconnect --mntnsmap mappath # only trace mount namespaces in the map
\ No newline at end of file
diff --git a/tools/tcptop.py b/tools/tcptop.py
index 9fb3ca2..510c4e8 100755
--- a/tools/tcptop.py
+++ b/tools/tcptop.py
@@ -26,6 +26,7 @@
from __future__ import print_function
from bcc import BPF
+from bcc.containers import filter_by_containers
import argparse
from socket import inet_ntop, AF_INET, AF_INET6
from struct import pack
@@ -45,7 +46,8 @@
./tcptop # trace TCP send/recv by host
./tcptop -C # don't clear the screen
./tcptop -p 181 # only trace PID 181
- ./tcptop --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./tcptop --cgroupmap mappath # only trace cgroups in this BPF map
+ ./tcptop --mntnsmap mappath # only trace mount namespaces in the map
"""
parser = argparse.ArgumentParser(
description="Summarize TCP send/recv throughput by host",
@@ -63,6 +65,8 @@
help="number of outputs")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
@@ -98,21 +102,16 @@
BPF_HASH(ipv6_send_bytes, struct ipv6_key_t);
BPF_HASH(ipv6_recv_bytes, struct ipv6_key_t);
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk,
struct msghdr *msg, size_t size)
{
- u32 pid = bpf_get_current_pid_tgid() >> 32;
- FILTER
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
+ if (container_should_be_filtered()) {
return 0;
}
-#endif
+
+ u32 pid = bpf_get_current_pid_tgid() >> 32;
+ FILTER_PID
+
u16 dport = 0, family = sk->__sk_common.skc_family;
if (family == AF_INET) {
@@ -148,14 +147,13 @@
*/
int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied)
{
- u32 pid = bpf_get_current_pid_tgid() >> 32;
- FILTER
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
+ if (container_should_be_filtered()) {
return 0;
}
-#endif
+
+ u32 pid = bpf_get_current_pid_tgid() >> 32;
+ FILTER_PID
+
u16 dport = 0, family = sk->__sk_common.skc_family;
u64 *val, zero = 0;
@@ -190,15 +188,11 @@
# code substitutions
if args.pid:
- bpf_text = bpf_text.replace('FILTER',
+ bpf_text = bpf_text.replace('FILTER_PID',
'if (pid != %s) { return 0; }' % args.pid)
else:
- bpf_text = bpf_text.replace('FILTER', '')
-if args.cgroupmap:
- bpf_text = bpf_text.replace('CGROUPSET', '1')
- bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
- bpf_text = bpf_text.replace('CGROUPSET', '0')
+ bpf_text = bpf_text.replace('FILTER_PID', '')
+bpf_text = filter_by_containers(args) + bpf_text
if debug or args.ebpf:
print(bpf_text)
if args.ebpf:
diff --git a/tools/tcptop_example.txt b/tools/tcptop_example.txt
index 379aff2..e29e2fa 100644
--- a/tools/tcptop_example.txt
+++ b/tools/tcptop_example.txt
@@ -97,13 +97,14 @@
# tcptop --cgroupmap /sys/fs/bpf/test01
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
USAGE:
# tcptop -h
usage: tcptop.py [-h] [-C] [-S] [-p PID] [--cgroupmap CGROUPMAP]
+ [--mntnsmap MNTNSMAP]
[interval] [count]
Summarize TCP send/recv throughput by host
@@ -125,3 +126,4 @@
./tcptop -C # don't clear the screen
./tcptop -p 181 # only trace PID 181
./tcptop --cgroupmap ./mappath # only trace cgroups in this BPF map
+ ./tcptop --mntnsmap mappath # only trace mount namespaces in the map
diff --git a/tools/tcptracer.py b/tools/tcptracer.py
index 8e6e1ec..7f67d33 100755
--- a/tools/tcptracer.py
+++ b/tools/tcptracer.py
@@ -16,6 +16,7 @@
# Licensed under the Apache License, Version 2.0 (the "License")
from __future__ import print_function
from bcc import BPF
+from bcc.containers import filter_by_containers
import argparse as ap
from socket import inet_ntop, AF_INET, AF_INET6
@@ -31,6 +32,8 @@
help="trace this Network Namespace only")
parser.add_argument("--cgroupmap",
help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+ help="trace mount namespaces in this BPF map only")
parser.add_argument("-v", "--verbose", action="store_true",
help="include Network Namespace in the output")
parser.add_argument("--ebpf", action="store_true",
@@ -79,10 +82,6 @@
};
BPF_PERF_OUTPUT(tcp_ipv6_event);
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
// tcp_set_state doesn't run in the context of the process that initiated the
// connection so we need to store a map TUPLE -> PID to send the right PID on
// the event
@@ -179,12 +178,9 @@
int trace_connect_v4_entry(struct pt_regs *ctx, struct sock *sk)
{
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
+ if (container_should_be_filtered()) {
+ return 0;
}
-#endif
u64 pid = bpf_get_current_pid_tgid();
@@ -233,12 +229,9 @@
int trace_connect_v6_entry(struct pt_regs *ctx, struct sock *sk)
{
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
+ if (container_should_be_filtered()) {
+ return 0;
}
-#endif
u64 pid = bpf_get_current_pid_tgid();
##FILTER_PID##
@@ -371,12 +364,9 @@
int trace_close_entry(struct pt_regs *ctx, struct sock *skp)
{
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
+ if (container_should_be_filtered()) {
+ return 0;
}
-#endif
u64 pid = bpf_get_current_pid_tgid();
@@ -439,12 +429,9 @@
int trace_accept_return(struct pt_regs *ctx)
{
-#if CGROUPSET
- u64 cgroupid = bpf_get_current_cgroup_id();
- if (cgroupset.lookup(&cgroupid) == NULL) {
- return 0;
+ if (container_should_be_filtered()) {
+ return 0;
}
-#endif
struct sock *newsk = (struct sock *)PT_REGS_RC(ctx);
u64 pid = bpf_get_current_pid_tgid();
@@ -614,11 +601,7 @@
bpf_text = bpf_text.replace('##FILTER_PID##', pid_filter)
bpf_text = bpf_text.replace('##FILTER_NETNS##', netns_filter)
-if args.cgroupmap:
- bpf_text = bpf_text.replace('CGROUPSET', '1')
- bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
- bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
if args.ebpf:
print(bpf_text)
diff --git a/tools/tcptracer_example.txt b/tools/tcptracer_example.txt
index b6e5258..0f61ecc 100644
--- a/tools/tcptracer_example.txt
+++ b/tools/tcptracer_example.txt
@@ -42,4 +42,4 @@
# ./tcptracer --cgroupmap /sys/fs/bpf/test01
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md