1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2020 Facebook */
3#include "bpf_iter.h"
4#include "bpf_tracing_net.h"
5#include <bpf/bpf_helpers.h>
6#include <bpf/bpf_endian.h>
7
8char _license[] SEC("license") = "GPL";
9
10static int hlist_unhashed_lockless(const struct hlist_node *h)
11{
12        return !(h->pprev);
13}
14
15static int timer_pending(const struct timer_list * timer)
16{
17	return !hlist_unhashed_lockless(&timer->entry);
18}
19
20extern unsigned CONFIG_HZ __kconfig;
21
22#define USER_HZ		100
23#define NSEC_PER_SEC	1000000000ULL
24static clock_t jiffies_to_clock_t(unsigned long x)
25{
26	/* The implementation here tailored to a particular
27	 * setting of USER_HZ.
28	 */
29	u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
30	u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
31
32	if ((tick_nsec % user_hz_nsec) == 0) {
33		if (CONFIG_HZ < USER_HZ)
34			return x * (USER_HZ / CONFIG_HZ);
35		else
36			return x / (CONFIG_HZ / USER_HZ);
37	}
38	return x * tick_nsec/user_hz_nsec;
39}
40
41static clock_t jiffies_delta_to_clock_t(long delta)
42{
43	if (delta <= 0)
44		return 0;
45
46	return jiffies_to_clock_t(delta);
47}
48
49static long sock_i_ino(const struct sock *sk)
50{
51	const struct socket *sk_socket = sk->sk_socket;
52	const struct inode *inode;
53	unsigned long ino;
54
55	if (!sk_socket)
56		return 0;
57
58	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
59	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
60	return ino;
61}
62
63static bool
64inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
65{
66	return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
67}
68
69static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
70{
71	return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
72}
73
74static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
75			 uid_t uid, __u32 seq_num)
76{
77	const struct inet_connection_sock *icsk;
78	const struct fastopen_queue *fastopenq;
79	const struct in6_addr *dest, *src;
80	const struct inet_sock *inet;
81	unsigned long timer_expires;
82	const struct sock *sp;
83	__u16 destp, srcp;
84	int timer_active;
85	int rx_queue;
86	int state;
87
88	icsk = &tp->tcp.inet_conn;
89	inet = &icsk->icsk_inet;
90	sp = &inet->sk;
91	fastopenq = &icsk->icsk_accept_queue.fastopenq;
92
93	dest = &sp->sk_v6_daddr;
94	src = &sp->sk_v6_rcv_saddr;
95	destp = bpf_ntohs(inet->inet_dport);
96	srcp = bpf_ntohs(inet->inet_sport);
97
98	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
99	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
100	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
101		timer_active = 1;
102		timer_expires = icsk->icsk_timeout;
103	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
104		timer_active = 4;
105		timer_expires = icsk->icsk_timeout;
106	} else if (timer_pending(&sp->sk_timer)) {
107		timer_active = 2;
108		timer_expires = sp->sk_timer.expires;
109	} else {
110		timer_active = 0;
111		timer_expires = bpf_jiffies64();
112	}
113
114	state = sp->sk_state;
115	if (state == TCP_LISTEN) {
116		rx_queue = sp->sk_ack_backlog;
117	} else {
118		rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
119		if (rx_queue < 0)
120			rx_queue = 0;
121	}
122
123	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
124		       seq_num,
125		       src->s6_addr32[0], src->s6_addr32[1],
126		       src->s6_addr32[2], src->s6_addr32[3], srcp,
127		       dest->s6_addr32[0], dest->s6_addr32[1],
128		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
129	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
130		       state,
131		       tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
132		       timer_active,
133		       jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
134		       icsk->icsk_retransmits, uid,
135		       icsk->icsk_probes_out,
136		       sock_i_ino(sp),
137		       sp->sk_refcnt.refs.counter);
138	BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
139		       tp,
140		       jiffies_to_clock_t(icsk->icsk_rto),
141		       jiffies_to_clock_t(icsk->icsk_ack.ato),
142		       (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
143		       tp->tcp.snd_cwnd,
144		       state == TCP_LISTEN ? fastopenq->max_qlen
145				: (tcp_in_initial_slowstart(&tp->tcp) ? -1
146								      : tp->tcp.snd_ssthresh)
147		      );
148
149	return 0;
150}
151
152static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
153			uid_t uid, __u32 seq_num)
154{
155	struct inet_timewait_sock *tw = &ttw->tw_sk;
156	const struct in6_addr *dest, *src;
157	__u16 destp, srcp;
158	long delta;
159
160	delta = tw->tw_timer.expires - bpf_jiffies64();
161	dest = &tw->tw_v6_daddr;
162	src  = &tw->tw_v6_rcv_saddr;
163	destp = bpf_ntohs(tw->tw_dport);
164	srcp  = bpf_ntohs(tw->tw_sport);
165
166	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
167		       seq_num,
168		       src->s6_addr32[0], src->s6_addr32[1],
169		       src->s6_addr32[2], src->s6_addr32[3], srcp,
170		       dest->s6_addr32[0], dest->s6_addr32[1],
171		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
172
173	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
174		       tw->tw_substate, 0, 0,
175		       3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
176		       tw->tw_refcnt.refs.counter, tw);
177
178	return 0;
179}
180
181static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
182			 uid_t uid, __u32 seq_num)
183{
184	struct inet_request_sock *irsk = &treq->req;
185	struct request_sock *req = &irsk->req;
186	struct in6_addr *src, *dest;
187	long ttd;
188
189	ttd = req->rsk_timer.expires - bpf_jiffies64();
190	src = &irsk->ir_v6_loc_addr;
191	dest = &irsk->ir_v6_rmt_addr;
192
193	if (ttd < 0)
194		ttd = 0;
195
196	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
197		       seq_num,
198		       src->s6_addr32[0], src->s6_addr32[1],
199		       src->s6_addr32[2], src->s6_addr32[3],
200		       irsk->ir_num,
201		       dest->s6_addr32[0], dest->s6_addr32[1],
202		       dest->s6_addr32[2], dest->s6_addr32[3],
203		       bpf_ntohs(irsk->ir_rmt_port));
204	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
205		       TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
206		       req->num_timeout, uid, 0, 0, 0, req);
207
208	return 0;
209}
210
211SEC("iter/tcp")
212int dump_tcp6(struct bpf_iter__tcp *ctx)
213{
214	struct sock_common *sk_common = ctx->sk_common;
215	struct seq_file *seq = ctx->meta->seq;
216	struct tcp_timewait_sock *tw;
217	struct tcp_request_sock *req;
218	struct tcp6_sock *tp;
219	uid_t uid = ctx->uid;
220	__u32 seq_num;
221
222	if (sk_common == (void *)0)
223		return 0;
224
225	seq_num = ctx->meta->seq_num;
226	if (seq_num == 0)
227		BPF_SEQ_PRINTF(seq, "  sl  "
228				    "local_address                         "
229				    "remote_address                        "
230				    "st tx_queue rx_queue tr tm->when retrnsmt"
231				    "   uid  timeout inode\n");
232
233	if (sk_common->skc_family != AF_INET6)
234		return 0;
235
236	tp = bpf_skc_to_tcp6_sock(sk_common);
237	if (tp)
238		return dump_tcp6_sock(seq, tp, uid, seq_num);
239
240	tw = bpf_skc_to_tcp_timewait_sock(sk_common);
241	if (tw)
242		return dump_tw_sock(seq, tw, uid, seq_num);
243
244	req = bpf_skc_to_tcp_request_sock(sk_common);
245	if (req)
246		return dump_req_sock(seq, req, uid, seq_num);
247
248	return 0;
249}
250