Merge pull request #1593 from netgroup-polito/bpf_prog_table_delete_element
Allow to delete elements from prog tables
diff --git a/.travis.yml b/.travis.yml
index 736ddad..dd36669 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,4 +3,5 @@
- sudo apt-get install -y python-pip
- sudo pip install pep8
script:
+ - ./scripts/check-helpers.sh
- find tools/ -type f -name "*.py" | xargs pep8 -r --show-source --ignore=E123,E125,E126,E127,E128,E302
diff --git a/docs/kernel-versions.md b/docs/kernel-versions.md
index 833ba91..3e7cbe5 100644
--- a/docs/kernel-versions.md
+++ b/docs/kernel-versions.md
@@ -164,3 +164,4 @@
`BPF_FUNC_xdp_adjust_head()` | 4.10 | [`17bedab27231`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=17bedab2723145d17b14084430743549e6943d03)
`BPF_FUNC_xdp_adjust_meta()` | 4.15 | [`de8f3a83b0a0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=de8f3a83b0a0fddb2cf56e7a718127e9619ea3da)
`BPF_FUNC_override_return()` | 4.16 | [`9802d86585db`](https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=9802d86585db91655c7d1929a4f6bbe0952ea88e)
+`BPF_FUNC_sock_ops_cb_flags_set()` | 4.16 | [`b13d88072172`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b13d880721729384757f235166068c315326f4a1)
diff --git a/scripts/check-helpers.sh b/scripts/check-helpers.sh
new file mode 100755
index 0000000..9350aed
--- /dev/null
+++ b/scripts/check-helpers.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+ret=0
+
+grep -oP '(?<={")\w+(?=", "\d\.\d+")' src/cc/libbpf.c | sort > /tmp/libbpf.txt
+grep -oP "(?<=BPF_FUNC_)\w+" docs/kernel-versions.md | sort > /tmp/doc.txt
+dif=`diff /tmp/libbpf.txt /tmp/doc.txt`
+if [ $? -ne 0 ]; then
+ echo "The lists of helpers in src/cc/libbpf.c and docs/kernel-versions.md differ:"
+ echo -e "$dif\n"
+ ((ret++))
+fi
+
+grep -oP "(?<=^\sFN\()\w+" src/cc/compat/linux/bpf.h | tail -n +2 | sort > /tmp/compat.txt
+dif=`diff /tmp/doc.txt /tmp/compat.txt`
+if [ $? -ne 0 ]; then
+ echo "The lists of helpers in docs/kernel-versions.md and src/cc/compat/linux/bpf.h differ:"
+ echo -e "$dif\n"
+ ((ret++))
+fi
+
+grep -oP "(?<=BPF_FUNC_)\w+" src/cc/export/helpers.h | sort -u > /tmp/export.txt
+dif=`diff /tmp/compat.txt /tmp/export.txt`
+if [ $? -ne 0 ]; then
+ echo "The lists of helpers in src/cc/compat/linux/bpf.h and src/cc/export/helpers.h differ:"
+ echo "$dif"
+ ((ret++))
+fi
+
+exit $ret
diff --git a/src/cc/compat/linux/bpf.h b/src/cc/compat/linux/bpf.h
index b24df88..3f3ff80 100644
--- a/src/cc/compat/linux/bpf.h
+++ b/src/cc/compat/linux/bpf.h
@@ -642,6 +642,14 @@
* @optlen: length of optval in bytes
* Return: 0 or negative error
*
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
* Grow or shrink room in sk_buff.
* @skb: pointer to skb
@@ -748,7 +756,8 @@
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
- FN(override_return),
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -952,8 +961,9 @@
struct bpf_sock_ops {
__u32 op;
union {
- __u32 reply;
- __u32 replylong[4];
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
};
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -968,8 +978,39 @@
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
};
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -1003,8 +1044,46 @@
* a congestion threshold. RTTs above
* this indicate congestion
*/
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
};
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
+};
+
+
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h
index ac0eca1..469a421 100644
--- a/src/cc/compat/linux/virtual_bpf.h
+++ b/src/cc/compat/linux/virtual_bpf.h
@@ -641,6 +641,14 @@
* @optlen: length of optval in bytes
* Return: 0 or negative error
*
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
* Grow or shrink room in sk_buff.
* @skb: pointer to skb
@@ -747,7 +755,8 @@
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
- FN(override_return),
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -951,8 +960,9 @@
struct bpf_sock_ops {
__u32 op;
union {
- __u32 reply;
- __u32 replylong[4];
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
};
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -967,8 +977,39 @@
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
};
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -1002,6 +1043,43 @@
* a congestion threshold. RTTs above
* this indicate congestion
*/
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h
index 1ffc0d6..cb72552 100644
--- a/src/cc/export/helpers.h
+++ b/src/cc/export/helpers.h
@@ -328,6 +328,8 @@
(void *) BPF_FUNC_xdp_adjust_head;
static int (*bpf_override_return)(void *pt_regs, unsigned long rc) =
(void *) BPF_FUNC_override_return;
+static int (*bpf_sock_ops_cb_flags_set)(void *skops, int flags) =
+ (void *)BPF_FUNC_sock_ops_cb_flags_set;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
index 41f1560..ea4b101 100644
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -148,6 +148,7 @@
{"perf_prog_read_value", "4.15"},
{"getsockopt", "4.15"},
{"override_return", "4.16"},
+ {"sock_ops_cb_flags_set", "4.16"},
};
static int probe_perf_reader_page_cnt = 8;