tools/opensnoop: Snoop all open related syscall stubs

kernel v5.6 introduces fddb5d430ad9 ("open: introduce openat2(2) syscall").
Even though do_sys_open still exists, it might be optimized off final binary
depending on compilers. So we can't catch do_sys_open in some cases.

This patch uses ksymname to try to get entries of open, openat and openat2,
and changes the definitions of the trace functions to snoop them all. This
works for both kprobe and kfunc.

Credit to Yonghong Song for better code organization.

Signed-off-by: He Zhe <[email protected]>
diff --git a/tools/opensnoop.py b/tools/opensnoop.py
index 51d3dc0..3b50887 100755
--- a/tools/opensnoop.py
+++ b/tools/opensnoop.py
@@ -112,32 +112,6 @@
 bpf_text_kprobe = """
 BPF_HASH(infotmp, u64, struct val_t);
 
-int trace_entry(struct pt_regs *ctx, int dfd, const char __user *filename, int flags)
-{
-    struct val_t val = {};
-    u64 id = bpf_get_current_pid_tgid();
-    u32 pid = id >> 32; // PID is higher part
-    u32 tid = id;       // Cast and get the lower part
-    u32 uid = bpf_get_current_uid_gid();
-
-    PID_TID_FILTER
-    UID_FILTER
-    FLAGS_FILTER
-
-    if (container_should_be_filtered()) {
-        return 0;
-    }
-
-    if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
-        val.id = id;
-        val.fname = filename;
-        val.flags = flags; // EXTENDED_STRUCT_MEMBER
-        infotmp.update(&id, &val);
-    }
-
-    return 0;
-};
-
 int trace_return(struct pt_regs *ctx)
 {
     u64 id = bpf_get_current_pid_tgid();
@@ -166,9 +140,91 @@
 }
 """
 
-bpf_text_kfunc= """
-KRETFUNC_PROBE(do_sys_open, int dfd, const char __user *filename, int flags, int mode, int ret)
+bpf_text_kprobe_header_open = """
+int syscall__trace_entry_open(struct pt_regs *ctx, const char __user *filename, int flags)
 {
+"""
+
+bpf_text_kprobe_header_openat = """
+int syscall__trace_entry_openat(struct pt_regs *ctx, int dfd, const char __user *filename, int flags)
+{
+"""
+
+bpf_text_kprobe_header_openat2 = """
+#include <uapi/linux/openat2.h>
+int syscall__trace_entry_openat2(struct pt_regs *ctx, int dfd, const char __user *filename, struct open_how *how)
+{
+    int flags = how->flags;
+"""
+
+bpf_text_kprobe_body = """
+    struct val_t val = {};
+    u64 id = bpf_get_current_pid_tgid();
+    u32 pid = id >> 32; // PID is higher part
+    u32 tid = id;       // Cast and get the lower part
+    u32 uid = bpf_get_current_uid_gid();
+
+    PID_TID_FILTER
+    UID_FILTER
+    FLAGS_FILTER
+
+    if (container_should_be_filtered()) {
+        return 0;
+    }
+
+    if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
+        val.id = id;
+        val.fname = filename;
+        val.flags = flags; // EXTENDED_STRUCT_MEMBER
+        infotmp.update(&id, &val);
+    }
+
+    return 0;
+};
+"""
+
+bpf_text_kfunc_header_open = """
+#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)
+KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret)
+{
+    const char __user *filename = (char *)PT_REGS_PARM1(regs);
+    int flags = PT_REGS_PARM2(regs);
+#else
+KRETFUNC_PROBE(FNNAME, const char __user *filename, int flags, int ret)
+{
+#endif
+"""
+
+bpf_text_kfunc_header_openat = """
+#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)
+KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret)
+{
+    int dfd = PT_REGS_PARM1(regs);
+    const char __user *filename = (char *)PT_REGS_PARM2(regs);
+    int flags = PT_REGS_PARM3(regs);
+#else
+KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, int flags, int ret)
+{
+#endif
+"""
+
+bpf_text_kfunc_header_openat2 = """
+#include <uapi/linux/openat2.h>
+#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)
+KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret)
+{
+    int dfd = PT_REGS_PARM1(regs);
+    const char __user *filename = (char *)PT_REGS_PARM2(regs);
+    struct open_how __user *how = (struct open_how *)PT_REGS_PARM3(regs);
+    int flags = how->flags;
+#else
+KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, struct open_how __user *how, int ret)
+{
+    int flags = how->flags;
+#endif
+"""
+
+bpf_text_kfunc_body = """
     u64 id = bpf_get_current_pid_tgid();
     u32 pid = id >> 32; // PID is higher part
     u32 tid = id;       // Cast and get the lower part
@@ -199,12 +255,38 @@
 }
 """
 
+b = BPF(text='')
+# open and openat are always in place since 2.6.16
+fnname_open = b.get_syscall_prefix().decode() + 'open'
+fnname_openat = b.get_syscall_prefix().decode() + 'openat'
+fnname_openat2 = b.get_syscall_prefix().decode() + 'openat2'
+if b.ksymname(fnname_openat2) == -1:
+    fnname_openat2 = None
+
 is_support_kfunc = BPF.support_kfunc()
 if is_support_kfunc:
-    bpf_text += bpf_text_kfunc
+    bpf_text += bpf_text_kfunc_header_open.replace('FNNAME', fnname_open)
+    bpf_text += bpf_text_kfunc_body
+
+    bpf_text += bpf_text_kfunc_header_openat.replace('FNNAME', fnname_openat)
+    bpf_text += bpf_text_kfunc_body
+
+    if fnname_openat2:
+        bpf_text += bpf_text_kfunc_header_openat2.replace('FNNAME', fnname_openat2)
+        bpf_text += bpf_text_kfunc_body
 else:
     bpf_text += bpf_text_kprobe
 
+    bpf_text += bpf_text_kprobe_header_open
+    bpf_text += bpf_text_kprobe_body
+
+    bpf_text += bpf_text_kprobe_header_openat
+    bpf_text += bpf_text_kprobe_body
+
+    if fnname_openat2:
+        bpf_text += bpf_text_kprobe_header_openat2
+        bpf_text += bpf_text_kprobe_body
+
 if args.tid:  # TID trumps PID
     bpf_text = bpf_text.replace('PID_TID_FILTER',
         'if (tid != %s) { return 0; }' % args.tid)
@@ -235,8 +317,15 @@
 # initialize BPF
 b = BPF(text=bpf_text)
 if not is_support_kfunc:
-    b.attach_kprobe(event="do_sys_open", fn_name="trace_entry")
-    b.attach_kretprobe(event="do_sys_open", fn_name="trace_return")
+    b.attach_kprobe(event=fnname_open, fn_name="syscall__trace_entry_open")
+    b.attach_kretprobe(event=fnname_open, fn_name="trace_return")
+
+    b.attach_kprobe(event=fnname_openat, fn_name="syscall__trace_entry_openat")
+    b.attach_kretprobe(event=fnname_openat, fn_name="trace_return")
+
+    if fnname_openat2:
+        b.attach_kprobe(event=fnname_openat2, fn_name="syscall__trace_entry_openat2")
+        b.attach_kretprobe(event=fnname_openat2, fn_name="trace_return")
 
 initial_ts = 0