Merge pull request #1593 from netgroup-polito/bpf_prog_table_delete_element

Allow to delete elements from prog tables
diff --git a/.travis.yml b/.travis.yml
index 736ddad..dd36669 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,4 +3,5 @@
   - sudo apt-get install -y python-pip
   - sudo pip install pep8
 script:
+  - ./scripts/check-helpers.sh
   - find tools/ -type f -name "*.py" | xargs pep8 -r --show-source --ignore=E123,E125,E126,E127,E128,E302
diff --git a/docs/kernel-versions.md b/docs/kernel-versions.md
index 833ba91..3e7cbe5 100644
--- a/docs/kernel-versions.md
+++ b/docs/kernel-versions.md
@@ -164,3 +164,4 @@
 `BPF_FUNC_xdp_adjust_head()` | 4.10 | [`17bedab27231`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=17bedab2723145d17b14084430743549e6943d03)
 `BPF_FUNC_xdp_adjust_meta()` | 4.15 | [`de8f3a83b0a0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=de8f3a83b0a0fddb2cf56e7a718127e9619ea3da)
 `BPF_FUNC_override_return()` | 4.16 | [`9802d86585db`](https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/commit/?id=9802d86585db91655c7d1929a4f6bbe0952ea88e)
+`BPF_FUNC_sock_ops_cb_flags_set()` | 4.16 | [`b13d88072172`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b13d880721729384757f235166068c315326f4a1)
diff --git a/scripts/check-helpers.sh b/scripts/check-helpers.sh
new file mode 100755
index 0000000..9350aed
--- /dev/null
+++ b/scripts/check-helpers.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+ret=0
+
+grep -oP '(?<={")\w+(?=", "\d\.\d+")' src/cc/libbpf.c | sort > /tmp/libbpf.txt
+grep -oP "(?<=BPF_FUNC_)\w+" docs/kernel-versions.md | sort > /tmp/doc.txt
+dif=`diff /tmp/libbpf.txt /tmp/doc.txt`
+if [ $? -ne 0 ]; then
+	echo "The lists of helpers in src/cc/libbpf.c and docs/kernel-versions.md differ:"
+	echo -e "$dif\n"
+	((ret++))
+fi
+
+grep -oP "(?<=^\sFN\()\w+" src/cc/compat/linux/bpf.h | tail -n +2 | sort > /tmp/compat.txt
+dif=`diff /tmp/doc.txt /tmp/compat.txt`
+if [ $? -ne 0 ]; then
+	echo "The lists of helpers in docs/kernel-versions.md and src/cc/compat/linux/bpf.h differ:"
+	echo -e "$dif\n"
+	((ret++))
+fi
+
+grep -oP "(?<=BPF_FUNC_)\w+" src/cc/export/helpers.h | sort -u > /tmp/export.txt
+dif=`diff /tmp/compat.txt /tmp/export.txt`
+if [ $? -ne 0 ]; then
+	echo "The lists of helpers in src/cc/compat/linux/bpf.h and src/cc/export/helpers.h differ:"
+	echo "$dif"
+	((ret++))
+fi
+
+exit $ret
diff --git a/src/cc/compat/linux/bpf.h b/src/cc/compat/linux/bpf.h
index b24df88..3f3ff80 100644
--- a/src/cc/compat/linux/bpf.h
+++ b/src/cc/compat/linux/bpf.h
@@ -642,6 +642,14 @@
  *     @optlen: length of optval in bytes
  *     Return: 0 or negative error
  *
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ *     Set callback flags for sock_ops
+ *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ *     @flags: flags value
+ *     Return: 0 for no error
+ *             -EINVAL if there is no full tcp socket
+ *             bits in flags that are not supported by current kernel
+ *
  * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
  *     Grow or shrink room in sk_buff.
  *     @skb: pointer to skb
@@ -748,7 +756,8 @@
 	FN(perf_event_read_value),	\
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
-	FN(override_return),
+	FN(override_return),		\
+	FN(sock_ops_cb_flags_set),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -952,8 +961,9 @@
 struct bpf_sock_ops {
 	__u32 op;
 	union {
-		__u32 reply;
-		__u32 replylong[4];
+		__u32 args[4];		/* Optionally passed to bpf program */
+		__u32 reply;		/* Returned by bpf program */
+		__u32 replylong[4];	/* Optionally returned by bpf prog */
 	};
 	__u32 family;
 	__u32 remote_ip4;	/* Stored in network byte order */
@@ -968,8 +978,39 @@
 				 */
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
+	__u32 bpf_sock_ops_cb_flags;	/* flags defined in uapi/linux/tcp.h */
+	__u32 state;
+	__u32 rtt_min;
+	__u32 snd_ssthresh;
+	__u32 rcv_nxt;
+	__u32 snd_nxt;
+	__u32 snd_una;
+	__u32 mss_cache;
+	__u32 ecn_flags;
+	__u32 rate_delivered;
+	__u32 rate_interval_us;
+	__u32 packets_out;
+	__u32 retrans_out;
+	__u32 total_retrans;
+	__u32 segs_in;
+	__u32 data_segs_in;
+	__u32 segs_out;
+	__u32 data_segs_out;
+	__u32 lost_out;
+	__u32 sacked_out;
+	__u32 sk_txhash;
+	__u64 bytes_received;
+	__u64 bytes_acked;
 };
 
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS	0x7	/* Mask of all currently
+						 * supported cb flags
+						 */
+
 /* List of known BPF sock_ops operators.
  * New entries can only be added at the end
  */
@@ -1003,8 +1044,46 @@
 					 * a congestion threshold. RTTs above
 					 * this indicate congestion
 					 */
+	BPF_SOCK_OPS_RTO_CB,		/* Called when an RTO has triggered.
+					 * Arg1: value of icsk_retransmits
+					 * Arg2: value of icsk_rto
+					 * Arg3: whether RTO has expired
+					 */
+	BPF_SOCK_OPS_RETRANS_CB,	/* Called when skb is retransmitted.
+					 * Arg1: sequence number of 1st byte
+					 * Arg2: # segments
+					 * Arg3: return value of
+					 *       tcp_transmit_skb (0 => success)
+					 */
+	BPF_SOCK_OPS_STATE_CB,		/* Called when TCP changes state.
+					 * Arg1: old_state
+					 * Arg2: new_state
+					 */
 };
 
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+	BPF_TCP_ESTABLISHED = 1,
+	BPF_TCP_SYN_SENT,
+	BPF_TCP_SYN_RECV,
+	BPF_TCP_FIN_WAIT1,
+	BPF_TCP_FIN_WAIT2,
+	BPF_TCP_TIME_WAIT,
+	BPF_TCP_CLOSE,
+	BPF_TCP_CLOSE_WAIT,
+	BPF_TCP_LAST_ACK,
+	BPF_TCP_LISTEN,
+	BPF_TCP_CLOSING,        /* Now a valid state */
+	BPF_TCP_NEW_SYN_RECV,
+
+	BPF_TCP_MAX_STATES      /* Leave at the end! */
+};
+
+
 #define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
 #define TCP_BPF_SNDCWND_CLAMP	1002	/* Set sndcwnd_clamp */
 
diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h
index ac0eca1..469a421 100644
--- a/src/cc/compat/linux/virtual_bpf.h
+++ b/src/cc/compat/linux/virtual_bpf.h
@@ -641,6 +641,14 @@
  *     @optlen: length of optval in bytes
  *     Return: 0 or negative error
  *
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ *     Set callback flags for sock_ops
+ *     @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ *     @flags: flags value
+ *     Return: 0 for no error
+ *             -EINVAL if there is no full tcp socket
+ *             bits in flags that are not supported by current kernel
+ *
  * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
  *     Grow or shrink room in sk_buff.
  *     @skb: pointer to skb
@@ -747,7 +755,8 @@
 	FN(perf_event_read_value),	\
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
-	FN(override_return),
+	FN(override_return),		\
+	FN(sock_ops_cb_flags_set),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -951,8 +960,9 @@
 struct bpf_sock_ops {
 	__u32 op;
 	union {
-		__u32 reply;
-		__u32 replylong[4];
+		__u32 args[4];		/* Optionally passed to bpf program */
+		__u32 reply;		/* Returned by bpf program */
+		__u32 replylong[4];	/* Optionally returned by bpf prog */
 	};
 	__u32 family;
 	__u32 remote_ip4;	/* Stored in network byte order */
@@ -967,8 +977,39 @@
 				 */
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
+	__u32 bpf_sock_ops_cb_flags;    /* flags defined in uapi/linux/tcp.h */
+	__u32 state;
+	__u32 rtt_min;
+	__u32 snd_ssthresh;
+	__u32 rcv_nxt;
+	__u32 snd_nxt;
+	__u32 snd_una;
+	__u32 mss_cache;
+	__u32 ecn_flags;
+	__u32 rate_delivered;
+	__u32 rate_interval_us;
+	__u32 packets_out;
+	__u32 retrans_out;
+	__u32 total_retrans;
+	__u32 segs_in;
+	__u32 data_segs_in;
+	__u32 segs_out;
+	__u32 data_segs_out;
+	__u32 lost_out;
+	__u32 sacked_out;
+	__u32 sk_txhash;
+	__u64 bytes_received;
+	__u64 bytes_acked;
 };
 
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS	0x7	/* Mask of all currently
+						 * supported cb flags
+						 */
+
 /* List of known BPF sock_ops operators.
  * New entries can only be added at the end
  */
@@ -1002,6 +1043,43 @@
 					 * a congestion threshold. RTTs above
 					 * this indicate congestion
 					 */
+	BPF_SOCK_OPS_RTO_CB,		/* Called when an RTO has triggered.
+					 * Arg1: value of icsk_retransmits
+					 * Arg2: value of icsk_rto
+					 * Arg3: whether RTO has expired
+					 */
+	BPF_SOCK_OPS_RETRANS_CB,	/* Called when skb is retransmitted.
+					 * Arg1: sequence number of 1st byte
+					 * Arg2: # segments
+					 * Arg3: return value of
+					 *       tcp_transmit_skb (0 => success)
+					 */
+	BPF_SOCK_OPS_STATE_CB,		/* Called when TCP changes state.
+					 * Arg1: old_state
+					 * Arg2: new_state
+					 */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+	BPF_TCP_ESTABLISHED = 1,
+	BPF_TCP_SYN_SENT,
+	BPF_TCP_SYN_RECV,
+	BPF_TCP_FIN_WAIT1,
+	BPF_TCP_FIN_WAIT2,
+	BPF_TCP_TIME_WAIT,
+	BPF_TCP_CLOSE,
+	BPF_TCP_CLOSE_WAIT,
+	BPF_TCP_LAST_ACK,
+	BPF_TCP_LISTEN,
+	BPF_TCP_CLOSING,	/* Now a valid state */
+	BPF_TCP_NEW_SYN_RECV,
+
+	BPF_TCP_MAX_STATES	/* Leave at the end! */
 };
 
 #define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h
index 1ffc0d6..cb72552 100644
--- a/src/cc/export/helpers.h
+++ b/src/cc/export/helpers.h
@@ -328,6 +328,8 @@
   (void *) BPF_FUNC_xdp_adjust_head;
 static int (*bpf_override_return)(void *pt_regs, unsigned long rc) =
   (void *) BPF_FUNC_override_return;
+static int (*bpf_sock_ops_cb_flags_set)(void *skops, int flags) =
+  (void *)BPF_FUNC_sock_ops_cb_flags_set;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
index 41f1560..ea4b101 100644
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -148,6 +148,7 @@
   {"perf_prog_read_value", "4.15"},
   {"getsockopt", "4.15"},
   {"override_return", "4.16"},
+  {"sock_ops_cb_flags_set", "4.16"},
 };
 
 static int probe_perf_reader_page_cnt = 8;