tools: add filtering by mount namespace

In previous patches, I added the option --cgroupmap to filter events
belonging to a set of cgroup-v2. Although this approach works fine with
systemd services and containers when cgroup-v2 is enabled, it does not
work with containers when only cgroup-v1 is enabled because
bpf_get_current_cgroup_id() only works with cgroup-v2. It also requires
Linux 4.18 to get this bpf helper function.

This patch adds an additional way to filter by containers, using mount
namespaces.

Note that this does not help with systemd services since they normally
don't create a new mount namespace (unless you set some options like
'ReadOnlyPaths=', see "man 5 systemd.exec").

My goal with this patch is to filter Kubernetes pods, even on
distributions with an older kernel (<4.18) or without cgroup-v2 enabled.

- This is only implemented for tools that already support filtering by
  cgroup id (bindsnoop, capable, execsnoop, profile, tcpaccept, tcpconnect,
  tcptop and tcptracer).

- I picked the mount namespace because the other namespaces could be
  disabled in Kubernetes (e.g. HostNetwork, HostPID, HostIPC).

It can be tested by following the example in docs/special_filtering added
in this commit, to avoid compiling locally the following command can be used

```
sudo bpftool map create /sys/fs/bpf/mnt_ns_set type hash key 8 value 4 \
  entries 128 name mnt_ns_set flags 0
docker run -ti --rm --privileged \
  -v /usr/src:/usr/src -v /lib/modules:/lib/modules \
  -v /sys/fs/bpf:/sys/fs/bpf --pid=host kinvolk/bcc:alban-containers-filters \
  /usr/share/bcc/tools/execsnoop --mntnsmap /sys/fs/bpf/mnt_ns_set

```

Co-authored-by: Alban Crequy <[email protected]>
Co-authored-by: Mauricio Vásquez <[email protected]>
diff --git a/tools/bindsnoop.py b/tools/bindsnoop.py
index 4d3133f..de569c2 100755
--- a/tools/bindsnoop.py
+++ b/tools/bindsnoop.py
@@ -6,7 +6,7 @@
 # based on tcpconnect utility from Brendan Gregg's suite.
 #
 # USAGE: bindsnoop [-h] [-t] [-E] [-p PID] [-P PORT[,PORT ...]] [-w]
-#             [--count] [--cgroupmap mappath]
+#             [--count] [--cgroupmap mappath] [--mntnsmap mappath]
 #
 # bindsnoop reports socket options set before the bind call
 # that would impact this system call behavior:
@@ -28,6 +28,7 @@
 
 from __future__ import print_function, absolute_import, unicode_literals
 from bcc import BPF, DEBUG_SOURCE
+from bcc.containers import filter_by_containers
 from bcc.utils import printb
 import argparse
 import re
@@ -51,6 +52,7 @@
     ./bindsnoop -E        # report bind errors
     ./bindsnoop --count   # count bind per src ip
     ./bindsnoop --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./bindsnoop --mntnsmap  mappath  # only trace mount namespaces in the map
 
 it is reporting socket options set before the bins call
 impacting system call behavior:
@@ -84,6 +86,8 @@
     help="count binds per src ip and port")
 parser.add_argument("--cgroupmap",
     help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+    help="trace mount namespaces in this BPF map only")
 parser.add_argument("--ebpf", action="store_true",
     help=argparse.SUPPRESS)
 parser.add_argument("--debug-source", action="store_true",
@@ -148,8 +152,6 @@
 };
 BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
 
-CGROUP_MAP
-
 // bind options for event reporting
 union bind_options {
     u8 data;
@@ -174,7 +176,9 @@
 
     FILTER_UID
 
-    FILTER_CGROUP
+    if (container_should_be_filtered()) {
+        return 0;
+    }
 
     // stash the sock ptr for lookup on return
     currsock.update(&tid, &socket);
@@ -323,11 +327,6 @@
                bpf_get_current_comm(&data6.task, sizeof(data6.task));
                ipv6_bind_events.perf_submit(ctx, &data6, sizeof(data6));"""
     },
-    'filter_cgroup': """
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
-    }""",
 }
 
 # code substitutions
@@ -351,22 +350,11 @@
         'if (uid != %s) { return 0; }' % args.uid)
 if args.errors:
     bpf_text = bpf_text.replace('FILTER_ERRORS', 'ignore_errors = 0;')
-if args.cgroupmap:
-    bpf_text = bpf_text.replace('FILTER_CGROUP', struct_init['filter_cgroup'])
-    bpf_text = bpf_text.replace(
-        'CGROUP_MAP',
-        (
-            'BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "%s");' %
-            args.cgroupmap
-        )
-    )
-
+bpf_text = filter_by_containers(args) + bpf_text
 bpf_text = bpf_text.replace('FILTER_PID', '')
 bpf_text = bpf_text.replace('FILTER_PORT', '')
 bpf_text = bpf_text.replace('FILTER_UID', '')
 bpf_text = bpf_text.replace('FILTER_ERRORS', '')
-bpf_text = bpf_text.replace('FILTER_CGROUP', '')
-bpf_text = bpf_text.replace('CGROUP_MAP', '')
 
 # selecting output format - 80 characters or wide, fitting IPv6 addresses
 header_fmt = "%8s %-12.12s %-4s %-15s %-5s %5s %2s"
diff --git a/tools/bindsnoop_example.txt b/tools/bindsnoop_example.txt
index 77e040e..c7c5135 100644
--- a/tools/bindsnoop_example.txt
+++ b/tools/bindsnoop_example.txt
@@ -59,7 +59,7 @@
 
 # ./bindsnoop.py --cgroupmap /sys/fs/bpf/test01
 
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
 
 
 In order to track heavy bind usage one can use --count option
@@ -74,7 +74,7 @@
 Usage message:
 # ./bindsnoop.py -h
 usage: bindsnoop.py [-h] [-t] [-w] [-p PID] [-P PORT] [-E] [-U] [-u UID]
-                  [--count] [--cgroupmap CGROUPMAP]
+                  [--count] [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP]
 
 Trace TCP binds
 
@@ -103,6 +103,7 @@
     ./bindsnoop -E        # report bind errors
     ./bindsnoop --count   # count bind per src ip
     ./bindsnoop --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./bindsnoop --mntnsmap  mappath  # only trace mount namespaces in the map
 
     it is reporting socket options set before the bins call
     impacting system call behavior:
diff --git a/tools/capable.py b/tools/capable.py
index 3852e22..94d1c32 100755
--- a/tools/capable.py
+++ b/tools/capable.py
@@ -15,6 +15,7 @@
 from os import getpid
 from functools import partial
 from bcc import BPF
+from bcc.containers import filter_by_containers
 import errno
 import argparse
 from time import strftime
@@ -28,7 +29,8 @@
     ./capable -U          # add user-space stacks to trace
     ./capable -x          # extra fields: show TID and INSETID columns
     ./capable --unique    # don't repeat stacks for the same pid or cgroup
-    ./capable --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./capable --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./capable --mntnsmap mappath   # only trace mount namespaces in the map
 """
 parser = argparse.ArgumentParser(
     description="Trace security capability checks",
@@ -46,6 +48,8 @@
     help="show extra fields in TID and INSETID columns")
 parser.add_argument("--cgroupmap",
     help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+    help="trace mount namespaces in this BPF map only")
 parser.add_argument("--unique", action="store_true",
     help="don't repeat stacks for the same pid or cgroup")
 args = parser.parse_args()
@@ -145,10 +149,6 @@
 BPF_HASH(seen, struct repeat_t, u64);
 #endif
 
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
 #if defined(USER_STACKS) || defined(KERNEL_STACKS)
 BPF_STACK_TRACE(stacks, 2048);
 #endif
@@ -173,12 +173,10 @@
     FILTER1
     FILTER2
     FILTER3
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
+
+    if (container_should_be_filtered()) {
         return 0;
     }
-#endif
 
     u32 uid = bpf_get_current_uid_gid();
     struct data_t data = {.tgid = tgid, .pid = pid, .uid = uid, .cap = cap, .audit = audit, .insetid = insetid};
@@ -192,7 +190,7 @@
 #if UNIQUESET
     struct repeat_t repeat = {0,};
     repeat.cap = cap;
-#if CGROUPSET
+#if CGROUP_ID_SET
     repeat.cgroupid = bpf_get_current_cgroup_id();
 #else
     repeat.tgid = tgid;
@@ -229,11 +227,7 @@
 bpf_text = bpf_text.replace('FILTER2', '')
 bpf_text = bpf_text.replace('FILTER3',
     'if (pid == %s) { return 0; }' % getpid())
-if args.cgroupmap:
-    bpf_text = bpf_text.replace('CGROUPSET', '1')
-    bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
-    bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
 if args.unique:
     bpf_text = bpf_text.replace('UNIQUESET', '1')
 else:
diff --git a/tools/capable_example.txt b/tools/capable_example.txt
index bcd6d01..1701b6a 100644
--- a/tools/capable_example.txt
+++ b/tools/capable_example.txt
@@ -4,7 +4,7 @@
 capable traces calls to the kernel cap_capable() function, which does security
 capability checks, and prints details for each call. For example:
 
-# ./capable.py 
+# ./capable.py
 TIME      UID    PID    COMM             CAP  NAME                 AUDIT
 22:11:23  114    2676   snmpd            12   CAP_NET_ADMIN        1
 22:11:23  0      6990   run              24   CAP_SYS_RESOURCE     1
@@ -100,14 +100,14 @@
 
 # ./capable.py --cgroupmap /sys/fs/bpf/test01
 
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
 
 
 USAGE:
 
 # ./capable.py -h
 usage: capable.py [-h] [-v] [-p PID] [-K] [-U] [-x] [--cgroupmap CGROUPMAP]
-                  [--unique]
+                  [--mntnsmap MNTNSMAP] [--unique]
 
 Trace security capability checks
 
@@ -120,6 +120,7 @@
   -x, --extra           show extra fields in TID and INSETID columns
   --cgroupmap CGROUPMAP
                         trace cgroups in this BPF map only
+  --mntnsmap MNTNSMAP   trace mount namespaces in this BPF map only
   --unique              don't repeat stacks for the same pid or cgroup
 
 examples:
@@ -130,4 +131,5 @@
     ./capable -U          # add user-space stacks to trace
     ./capable -x          # extra fields: show TID and INSETID columns
     ./capable --unique    # don't repeat stacks for the same pid or cgroup
-    ./capable --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./capable --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./capable --mntnsmap mappath   # only trace mount namespaces in the map
diff --git a/tools/execsnoop.py b/tools/execsnoop.py
index 9879d2c..53052d3 100755
--- a/tools/execsnoop.py
+++ b/tools/execsnoop.py
@@ -19,6 +19,7 @@
 
 from __future__ import print_function
 from bcc import BPF
+from bcc.containers import filter_by_containers
 from bcc.utils import ArgString, printb
 import bcc.utils as utils
 import argparse
@@ -57,7 +58,8 @@
     ./execsnoop -q        # add "quotemarks" around arguments
     ./execsnoop -n main   # only print command lines containing "main"
     ./execsnoop -l tpkg   # only print command where arguments contains "tpkg"
-    ./execsnoop --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./execsnoop --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./execsnoop --mntnsmap mappath   # only trace mount namespaces in the map
 """
 parser = argparse.ArgumentParser(
     description="Trace exec() syscalls",
@@ -71,6 +73,8 @@
     help="include failed exec()s")
 parser.add_argument("--cgroupmap",
     help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+    help="trace mount namespaces in this BPF map only")
 parser.add_argument("-u", "--uid", type=parse_uid, metavar='USER',
     help="trace this UID only")
 parser.add_argument("-q", "--quote", action="store_true",
@@ -113,9 +117,6 @@
     int retval;
 };
 
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
 BPF_PERF_OUTPUT(events);
 
 static int __submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
@@ -145,12 +146,9 @@
 
     UID_FILTER
 
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
+    if (container_should_be_filtered()) {
+        return 0;
     }
-#endif
 
     // create data here and pass to submit_arg to save stack space (#555)
     struct data_t data = {};
@@ -185,12 +183,9 @@
 
 int do_ret_sys_execve(struct pt_regs *ctx)
 {
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
+    if (container_should_be_filtered()) {
+        return 0;
     }
-#endif
 
     struct data_t data = {};
     struct task_struct *task;
@@ -223,11 +218,7 @@
         'if (uid != %s) { return 0; }' % args.uid)
 else:
     bpf_text = bpf_text.replace('UID_FILTER', '')
-if args.cgroupmap:
-    bpf_text = bpf_text.replace('CGROUPSET', '1')
-    bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
-    bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
 if args.ebpf:
     print(bpf_text)
     exit()
diff --git a/tools/execsnoop_example.txt b/tools/execsnoop_example.txt
index a90d007..8cdfe0d 100644
--- a/tools/execsnoop_example.txt
+++ b/tools/execsnoop_example.txt
@@ -83,7 +83,7 @@
 
 # ./execsnoop --cgroupmap /sys/fs/bpf/test01
 
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
 
 The -U option include UID on output:
 
@@ -121,6 +121,7 @@
   -x, --fails           include failed exec()s
   --cgroupmap CGROUPMAP
                         trace cgroups in this BPF map only
+  --mntnsmap MNTNSMAP   trace mount namespaces in this BPF map only
   -u USER, --uid USER   trace this UID only
   -q, --quote           Add quotemarks (") around arguments.
   -n NAME, --name NAME  only print commands matching this name (regex), any
@@ -142,4 +143,5 @@
     ./execsnoop -q        # add "quotemarks" around arguments
     ./execsnoop -n main   # only print command lines containing "main"
     ./execsnoop -l tpkg   # only print command where arguments contains "tpkg"
-    ./execsnoop --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./execsnoop --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./execsnoop --mntnsmap mappath   # only trace mount namespaces in the map
diff --git a/tools/opensnoop.py b/tools/opensnoop.py
index 28fe755..a68b13f 100755
--- a/tools/opensnoop.py
+++ b/tools/opensnoop.py
@@ -17,6 +17,7 @@
 
 from __future__ import print_function
 from bcc import ArgString, BPF
+from bcc.containers import filter_by_containers
 from bcc.utils import printb
 import argparse
 from datetime import datetime, timedelta
@@ -35,7 +36,8 @@
     ./opensnoop -n main   # only print process names containing "main"
     ./opensnoop -e        # show extended fields
     ./opensnoop -f O_WRONLY -f O_RDWR  # only print calls for writing
-    ./opensnoop --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./opensnoop --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./opensnoop --mntnsmap mappath   # only trace mount namespaces in the map
 """
 parser = argparse.ArgumentParser(
     description="Trace open() syscalls",
@@ -53,6 +55,8 @@
     help="trace this TID only")
 parser.add_argument("--cgroupmap",
     help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+    help="trace mount namespaces in this BPF map only")
 parser.add_argument("-u", "--uid",
     help="trace this UID only")
 parser.add_argument("-d", "--duration",
@@ -102,9 +106,6 @@
     int flags; // EXTENDED_STRUCT_MEMBER
 };
 
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
 BPF_PERF_OUTPUT(events);
 """
 
@@ -122,12 +123,11 @@
     PID_TID_FILTER
     UID_FILTER
     FLAGS_FILTER
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
+
+    if (container_should_be_filtered()) {
+        return 0;
     }
-#endif
+
     if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
         val.id = id;
         val.fname = filename;
@@ -177,12 +177,9 @@
     PID_TID_FILTER
     UID_FILTER
     FLAGS_FILTER
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
+    if (container_should_be_filtered()) {
+        return 0;
     }
-#endif
 
     struct data_t data = {};
     bpf_get_current_comm(&data.comm, sizeof(data.comm));
@@ -221,11 +218,7 @@
         'if (uid != %s) { return 0; }' % args.uid)
 else:
     bpf_text = bpf_text.replace('UID_FILTER', '')
-if args.cgroupmap:
-    bpf_text = bpf_text.replace('CGROUPSET', '1')
-    bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
-    bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
 if args.flag_filter:
     bpf_text = bpf_text.replace('FLAGS_FILTER',
         'if (!(flags & %d)) { return 0; }' % flag_filter_mask)
diff --git a/tools/opensnoop_example.txt b/tools/opensnoop_example.txt
index 44f0e33..f15e84f 100644
--- a/tools/opensnoop_example.txt
+++ b/tools/opensnoop_example.txt
@@ -187,14 +187,15 @@
 
 # ./opensnoop --cgroupmap /sys/fs/bpf/test01
 
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
 
 
 USAGE message:
 
 # ./opensnoop -h
-usage: opensnoop [-h] [-T] [-x] [-p PID] [-t TID] [-d DURATION] [-n NAME]
-                 [-e] [-f FLAG_FILTER]
+usage: opensnoop.py [-h] [-T] [-U] [-x] [-p PID] [-t TID]
+                    [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] [-u UID]
+                    [-d DURATION] [-n NAME] [-e] [-f FLAG_FILTER]
 
 Trace open() syscalls
 
@@ -205,6 +206,9 @@
   -x, --failed          only show failed opens
   -p PID, --pid PID     trace this PID only
   -t TID, --tid TID     trace this TID only
+  --cgroupmap CGROUPMAP
+                        trace cgroups in this BPF map only
+  --mntnsmap MNTNSMAP   trace mount namespaces in this BPF map on
   -u UID, --uid UID     trace this UID only
   -d DURATION, --duration DURATION
                         total duration of trace in seconds
@@ -226,3 +230,5 @@
     ./opensnoop -n main   # only print process names containing "main"
     ./opensnoop -e        # show extended fields
     ./opensnoop -f O_WRONLY -f O_RDWR  # only print calls for writing
+    ./opensnoop --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./opensnoop --mntnsmap mappath   # only trace mount namespaces in the map
diff --git a/tools/profile.py b/tools/profile.py
index 2067933..dd6f65f 100755
--- a/tools/profile.py
+++ b/tools/profile.py
@@ -24,10 +24,11 @@
 #
 # 15-Jul-2016   Brendan Gregg   Created this.
 # 20-Oct-2016      "      "     Switched to use the new 4.9 support.
-# 26-Jan-2019      "      "     Changed to exclude CPU idle by default. 
+# 26-Jan-2019      "      "     Changed to exclude CPU idle by default.
 
 from __future__ import print_function
 from bcc import BPF, PerfType, PerfSWConfig
+from bcc.containers import filter_by_containers
 from sys import stderr
 from time import sleep
 import argparse
@@ -72,7 +73,8 @@
     ./profile -L 185      # only profile thread with TID 185
     ./profile -U          # only show user space stacks (no kernel)
     ./profile -K          # only show kernel space stacks (no user)
-    ./profile --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./profile --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./profile --mntnsmap mappath   # only trace mount namespaces in the map
 """
 parser = argparse.ArgumentParser(
     description="Profile CPU stack traces at a timed interval",
@@ -115,6 +117,8 @@
     help=argparse.SUPPRESS)
 parser.add_argument("--cgroupmap",
     help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+    help="trace mount namespaces in this BPF map only")
 
 # option logic
 args = parser.parse_args()
@@ -146,10 +150,6 @@
 BPF_HASH(counts, struct key_t);
 BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
 
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
 // This code gets a bit complex. Probably not suitable for casual hacking.
 
 int do_perf_event(struct bpf_perf_event_data *ctx) {
@@ -163,12 +163,9 @@
     if (!(THREAD_FILTER))
         return 0;
 
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
+    if (container_should_be_filtered()) {
         return 0;
     }
-#endif
 
     // create map key
     struct key_t key = {.pid = tgid};
@@ -246,11 +243,7 @@
     stack_context = "user + kernel"
 bpf_text = bpf_text.replace('USER_STACK_GET', user_stack_get)
 bpf_text = bpf_text.replace('KERNEL_STACK_GET', kernel_stack_get)
-if args.cgroupmap:
-    bpf_text = bpf_text.replace('CGROUPSET', '1')
-    bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
-    bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
 
 sample_freq = 0
 sample_period = 0
diff --git a/tools/profile_example.txt b/tools/profile_example.txt
index bb3c5ae..2c7e702 100644
--- a/tools/profile_example.txt
+++ b/tools/profile_example.txt
@@ -708,7 +708,7 @@
 
 # ./profile --cgroupmap /sys/fs/bpf/test01
 
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
 
 
 USAGE message:
@@ -717,7 +717,7 @@
 usage: profile.py [-h] [-p PID | -L TID] [-U | -K] [-F FREQUENCY | -c COUNT]
                   [-d] [-a] [-I] [-f]
                   [--stack-storage-size STACK_STORAGE_SIZE] [-C CPU]
-                  [--cgroupmap CGROUPMAP]
+                  [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP]
                   [duration]
 
 Profile CPU stack traces at a timed interval
@@ -750,6 +750,7 @@
   -C CPU, --cpu CPU     cpu number to run profile on
   --cgroupmap CGROUPMAP
                         trace cgroups in this BPF map only
+  --mntnsmap MNTNSMAP   trace mount namespaces in this BPF map only
 
 examples:
     ./profile             # profile stack traces at 49 Hertz until Ctrl-C
@@ -761,4 +762,5 @@
     ./profile -L 185      # only profile thread with TID 185
     ./profile -U          # only show user space stacks (no kernel)
     ./profile -K          # only show kernel space stacks (no user)
-    ./profile --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./profile --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./profile --mntnsmap mappath   # only trace mount namespaces in the map
diff --git a/tools/tcpaccept.py b/tools/tcpaccept.py
index 03b05e0..4aa7fd7 100755
--- a/tools/tcpaccept.py
+++ b/tools/tcpaccept.py
@@ -16,6 +16,7 @@
 # 14-Feb-2016      "      "     Switch to bpf_perf_output.
 
 from __future__ import print_function
+from bcc.containers import filter_by_containers
 from bcc import BPF
 from socket import inet_ntop, AF_INET, AF_INET6
 from struct import pack
@@ -29,7 +30,8 @@
     ./tcpaccept -t        # include timestamps
     ./tcpaccept -P 80,81  # only trace port 80 and 81
     ./tcpaccept -p 181    # only trace PID 181
-    ./tcpaccept --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./tcpaccept --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./tcpaccept --mntnsmap mappath   # only trace mount namespaces in the map
 """
 parser = argparse.ArgumentParser(
     description="Trace TCP accepts",
@@ -45,6 +47,8 @@
     help="comma-separated list of local ports to trace")
 parser.add_argument("--cgroupmap",
     help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+    help="trace mount namespaces in this BPF map only")
 parser.add_argument("--ebpf", action="store_true",
     help=argparse.SUPPRESS)
 args = parser.parse_args()
@@ -80,11 +84,6 @@
     char task[TASK_COMM_LEN];
 };
 BPF_PERF_OUTPUT(ipv6_events);
-
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
 """
 
 #
@@ -97,12 +96,9 @@
 bpf_text_kprobe = """
 int kretprobe__inet_csk_accept(struct pt_regs *ctx)
 {
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
+    if (container_should_be_filtered()) {
         return 0;
     }
-#endif
 
     struct sock *newsk = (struct sock *)PT_REGS_RC(ctx);
     u32 pid = bpf_get_current_pid_tgid() >> 32;
@@ -115,21 +111,21 @@
     // check this is TCP
     u8 protocol = 0;
     // workaround for reading the sk_protocol bitfield:
-    
+
     // Following comments add by Joe Yin:
     // Unfortunately,it can not work since Linux 4.10,
     // because the sk_wmem_queued is not following the bitfield of sk_protocol.
     // And the following member is sk_gso_max_segs.
     // So, we can use this:
     // bpf_probe_read(&protocol, 1, (void *)((u64)&newsk->sk_gso_max_segs) - 3);
-    // In order to  diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime, 
-    // sk_lingertime is closed to the gso_max_segs_offset,and  
-    // the offset between the two members is 4 
+    // In order to  diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime,
+    // sk_lingertime is closed to the gso_max_segs_offset,and
+    // the offset between the two members is 4
 
     int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
     int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
 
-    if (sk_lingertime_offset - gso_max_segs_offset == 4) 
+    if (sk_lingertime_offset - gso_max_segs_offset == 4)
         // 4.10+ with little endian
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 3);
@@ -199,11 +195,7 @@
     lports_if = ' && '.join(['lport != %d' % lport for lport in lports])
     bpf_text = bpf_text.replace('##FILTER_PORT##',
         'if (%s) { return 0; }' % lports_if)
-if args.cgroupmap:
-    bpf_text = bpf_text.replace('CGROUPSET', '1')
-    bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
-    bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
 if debug or args.ebpf:
     print(bpf_text)
     if args.ebpf:
diff --git a/tools/tcpaccept_example.txt b/tools/tcpaccept_example.txt
index 5b6b1a6..9381565 100644
--- a/tools/tcpaccept_example.txt
+++ b/tools/tcpaccept_example.txt
@@ -38,7 +38,7 @@
 
 # ./tcpaccept --cgroupmap /sys/fs/bpf/test01
 
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
 
 
 USAGE message:
@@ -62,4 +62,5 @@
     ./tcpaccept -t        # include timestamps
     ./tcpaccept -P 80,81  # only trace port 80 and 81
     ./tcpaccept -p 181    # only trace PID 181
-    ./tcpaccept --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./tcpaccept --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./tcpaccept --mntnsmap mappath   # only trace mount namespaces in the map
\ No newline at end of file
diff --git a/tools/tcpconnect.py b/tools/tcpconnect.py
index 67f2cef..40878ee 100755
--- a/tools/tcpconnect.py
+++ b/tools/tcpconnect.py
@@ -21,6 +21,7 @@
 
 from __future__ import print_function
 from bcc import BPF
+from bcc.containers import filter_by_containers
 from bcc.utils import printb
 import argparse
 from socket import inet_ntop, ntohs, AF_INET, AF_INET6
@@ -37,7 +38,8 @@
     ./tcpconnect -U        # include UID
     ./tcpconnect -u 1000   # only trace UID 1000
     ./tcpconnect -c        # count connects per src ip and dest ip/port
-    ./tcpconnect --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./tcpconnect --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./tcpconnect --mntnsmap mappath   # only trace mount namespaces in the map
 """
 parser = argparse.ArgumentParser(
     description="Trace TCP connects",
@@ -57,6 +59,8 @@
     help="count connects per src ip and dest ip/port")
 parser.add_argument("--cgroupmap",
     help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+    help="trace mount namespaces in this BPF map only")
 parser.add_argument("--ebpf", action="store_true",
     help=argparse.SUPPRESS)
 args = parser.parse_args()
@@ -70,10 +74,6 @@
 
 BPF_HASH(currsock, u32, struct sock *);
 
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
 // separate data structs for ipv4 and ipv6
 struct ipv4_data_t {
     u64 ts_us;
@@ -116,12 +116,9 @@
 
 int trace_connect_entry(struct pt_regs *ctx, struct sock *sk)
 {
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
+    if (container_should_be_filtered()) {
+        return 0;
     }
-#endif
 
     u64 pid_tgid = bpf_get_current_pid_tgid();
     u32 pid = pid_tgid >> 32;
@@ -248,11 +245,7 @@
 if args.uid:
     bpf_text = bpf_text.replace('FILTER_UID',
         'if (uid != %s) { return 0; }' % args.uid)
-if args.cgroupmap:
-    bpf_text = bpf_text.replace('CGROUPSET', '1')
-    bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
-    bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
 
 bpf_text = bpf_text.replace('FILTER_PID', '')
 bpf_text = bpf_text.replace('FILTER_PORT', '')
diff --git a/tools/tcpconnect_example.txt b/tools/tcpconnect_example.txt
index cf97562..7efac4a 100644
--- a/tools/tcpconnect_example.txt
+++ b/tools/tcpconnect_example.txt
@@ -73,14 +73,14 @@
 
 # ./tcpconnect --cgroupmap /sys/fs/bpf/test01
 
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
 
 
 USAGE message:
 
 # ./tcpconnect -h
 usage: tcpconnect.py [-h] [-t] [-p PID] [-P PORT] [-U] [-u UID] [-c]
-                     [--cgroupmap CGROUPMAP]
+                     [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP]
 
 Trace TCP connects
 
@@ -104,4 +104,5 @@
     ./tcpconnect -U        # include UID
     ./tcpconnect -u 1000   # only trace UID 1000
     ./tcpconnect -c        # count connects per src ip and dest ip/port
-    ./tcpconnect --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./tcpconnect --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./tcpconnect --mntnsmap mappath   # only trace mount namespaces in the map
\ No newline at end of file
diff --git a/tools/tcptop.py b/tools/tcptop.py
index 9fb3ca2..510c4e8 100755
--- a/tools/tcptop.py
+++ b/tools/tcptop.py
@@ -26,6 +26,7 @@
 
 from __future__ import print_function
 from bcc import BPF
+from bcc.containers import filter_by_containers
 import argparse
 from socket import inet_ntop, AF_INET, AF_INET6
 from struct import pack
@@ -45,7 +46,8 @@
     ./tcptop           # trace TCP send/recv by host
     ./tcptop -C        # don't clear the screen
     ./tcptop -p 181    # only trace PID 181
-    ./tcptop --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./tcptop --cgroupmap mappath  # only trace cgroups in this BPF map
+    ./tcptop --mntnsmap mappath   # only trace mount namespaces in the map
 """
 parser = argparse.ArgumentParser(
     description="Summarize TCP send/recv throughput by host",
@@ -63,6 +65,8 @@
     help="number of outputs")
 parser.add_argument("--cgroupmap",
     help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+    help="trace mount namespaces in this BPF map only")
 parser.add_argument("--ebpf", action="store_true",
     help=argparse.SUPPRESS)
 args = parser.parse_args()
@@ -98,21 +102,16 @@
 BPF_HASH(ipv6_send_bytes, struct ipv6_key_t);
 BPF_HASH(ipv6_recv_bytes, struct ipv6_key_t);
 
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
 int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk,
     struct msghdr *msg, size_t size)
 {
-    u32 pid = bpf_get_current_pid_tgid() >> 32;
-    FILTER
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
+    if (container_should_be_filtered()) {
         return 0;
     }
-#endif
+
+    u32 pid = bpf_get_current_pid_tgid() >> 32;
+    FILTER_PID
+
     u16 dport = 0, family = sk->__sk_common.skc_family;
 
     if (family == AF_INET) {
@@ -148,14 +147,13 @@
  */
 int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied)
 {
-    u32 pid = bpf_get_current_pid_tgid() >> 32;
-    FILTER
-#if CGROUPSET
-    u64 cgroupid = bpf_get_current_cgroup_id();
-    if (cgroupset.lookup(&cgroupid) == NULL) {
+    if (container_should_be_filtered()) {
         return 0;
     }
-#endif
+
+    u32 pid = bpf_get_current_pid_tgid() >> 32;
+    FILTER_PID
+
     u16 dport = 0, family = sk->__sk_common.skc_family;
     u64 *val, zero = 0;
 
@@ -190,15 +188,11 @@
 
 # code substitutions
 if args.pid:
-    bpf_text = bpf_text.replace('FILTER',
+    bpf_text = bpf_text.replace('FILTER_PID',
         'if (pid != %s) { return 0; }' % args.pid)
 else:
-    bpf_text = bpf_text.replace('FILTER', '')
-if args.cgroupmap:
-    bpf_text = bpf_text.replace('CGROUPSET', '1')
-    bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
-    bpf_text = bpf_text.replace('CGROUPSET', '0')
+    bpf_text = bpf_text.replace('FILTER_PID', '')
+bpf_text = filter_by_containers(args) + bpf_text
 if debug or args.ebpf:
     print(bpf_text)
     if args.ebpf:
diff --git a/tools/tcptop_example.txt b/tools/tcptop_example.txt
index 379aff2..e29e2fa 100644
--- a/tools/tcptop_example.txt
+++ b/tools/tcptop_example.txt
@@ -97,13 +97,14 @@
 
 # tcptop --cgroupmap /sys/fs/bpf/test01
 
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md
 
 
 USAGE:
 
 # tcptop -h
 usage: tcptop.py [-h] [-C] [-S] [-p PID] [--cgroupmap CGROUPMAP]
+                 [--mntnsmap MNTNSMAP]
                  [interval] [count]
 
 Summarize TCP send/recv throughput by host
@@ -125,3 +126,4 @@
     ./tcptop -C        # don't clear the screen
     ./tcptop -p 181    # only trace PID 181
     ./tcptop --cgroupmap ./mappath  # only trace cgroups in this BPF map
+    ./tcptop --mntnsmap mappath   # only trace mount namespaces in the map
diff --git a/tools/tcptracer.py b/tools/tcptracer.py
index 8e6e1ec..7f67d33 100755
--- a/tools/tcptracer.py
+++ b/tools/tcptracer.py
@@ -16,6 +16,7 @@
 # Licensed under the Apache License, Version 2.0 (the "License")
 from __future__ import print_function
 from bcc import BPF
+from bcc.containers import filter_by_containers
 
 import argparse as ap
 from socket import inet_ntop, AF_INET, AF_INET6
@@ -31,6 +32,8 @@
                     help="trace this Network Namespace only")
 parser.add_argument("--cgroupmap",
                     help="trace cgroups in this BPF map only")
+parser.add_argument("--mntnsmap",
+                    help="trace mount namespaces in this BPF map only")
 parser.add_argument("-v", "--verbose", action="store_true",
                     help="include Network Namespace in the output")
 parser.add_argument("--ebpf", action="store_true",
@@ -79,10 +82,6 @@
 };
 BPF_PERF_OUTPUT(tcp_ipv6_event);
 
-#if CGROUPSET
-BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH");
-#endif
-
 // tcp_set_state doesn't run in the context of the process that initiated the
 // connection so we need to store a map TUPLE -> PID to send the right PID on
 // the event
@@ -179,12 +178,9 @@
 
 int trace_connect_v4_entry(struct pt_regs *ctx, struct sock *sk)
 {
-#if CGROUPSET
-  u64 cgroupid = bpf_get_current_cgroup_id();
-  if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
+  if (container_should_be_filtered()) {
+    return 0;
   }
-#endif
 
   u64 pid = bpf_get_current_pid_tgid();
 
@@ -233,12 +229,9 @@
 
 int trace_connect_v6_entry(struct pt_regs *ctx, struct sock *sk)
 {
-#if CGROUPSET
-  u64 cgroupid = bpf_get_current_cgroup_id();
-  if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
+  if (container_should_be_filtered()) {
+    return 0;
   }
-#endif
   u64 pid = bpf_get_current_pid_tgid();
 
   ##FILTER_PID##
@@ -371,12 +364,9 @@
 
 int trace_close_entry(struct pt_regs *ctx, struct sock *skp)
 {
-#if CGROUPSET
-  u64 cgroupid = bpf_get_current_cgroup_id();
-  if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
+  if (container_should_be_filtered()) {
+    return 0;
   }
-#endif
 
   u64 pid = bpf_get_current_pid_tgid();
 
@@ -439,12 +429,9 @@
 
 int trace_accept_return(struct pt_regs *ctx)
 {
-#if CGROUPSET
-  u64 cgroupid = bpf_get_current_cgroup_id();
-  if (cgroupset.lookup(&cgroupid) == NULL) {
-      return 0;
+  if (container_should_be_filtered()) {
+    return 0;
   }
-#endif
 
   struct sock *newsk = (struct sock *)PT_REGS_RC(ctx);
   u64 pid = bpf_get_current_pid_tgid();
@@ -614,11 +601,7 @@
 
 bpf_text = bpf_text.replace('##FILTER_PID##', pid_filter)
 bpf_text = bpf_text.replace('##FILTER_NETNS##', netns_filter)
-if args.cgroupmap:
-    bpf_text = bpf_text.replace('CGROUPSET', '1')
-    bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap)
-else:
-    bpf_text = bpf_text.replace('CGROUPSET', '0')
+bpf_text = filter_by_containers(args) + bpf_text
 
 if args.ebpf:
     print(bpf_text)
diff --git a/tools/tcptracer_example.txt b/tools/tcptracer_example.txt
index b6e5258..0f61ecc 100644
--- a/tools/tcptracer_example.txt
+++ b/tools/tcptracer_example.txt
@@ -42,4 +42,4 @@
 
 # ./tcptracer --cgroupmap /sys/fs/bpf/test01
 
-For more details, see docs/filtering_by_cgroups.md
+For more details, see docs/special_filtering.md