1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012 Chelsio Communications, Inc.
5 * All rights reserved.
6 * Written by: Navdeep Parhar <np@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include "opt_inet.h"
34#include "opt_inet6.h"
35
36#include <sys/param.h>
37#include <sys/eventhandler.h>
38#include <sys/kernel.h>
39#include <sys/systm.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/module.h>
43#include <sys/types.h>
44#include <sys/sockopt.h>
45#include <sys/sysctl.h>
46#include <sys/socket.h>
47
48#include <net/ethernet.h>
49#include <net/if.h>
50#include <net/if_var.h>
51#include <net/if_types.h>
52#include <net/if_vlan_var.h>
53#include <net/if_llatbl.h>
54#include <net/route.h>
55
56#include <netinet/if_ether.h>
57#include <netinet/in.h>
58#include <netinet/in_pcb.h>
59#include <netinet/in_var.h>
60#include <netinet6/in6_var.h>
61#include <netinet6/in6_pcb.h>
62#include <netinet6/nd6.h>
63#define TCPSTATES
64#include <netinet/tcp.h>
65#include <netinet/tcp_fsm.h>
66#include <netinet/tcp_timer.h>
67#include <netinet/tcp_var.h>
68#include <netinet/tcp_syncache.h>
69#include <netinet/tcp_offload.h>
70#include <netinet/toecore.h>
71
72static struct mtx toedev_lock;
73static TAILQ_HEAD(, toedev) toedev_list;
74static eventhandler_tag listen_start_eh;
75static eventhandler_tag listen_stop_eh;
76static eventhandler_tag lle_event_eh;
77
78static int
79toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
80    struct nhop_object *nh __unused, struct sockaddr *nam __unused)
81{
82
83	return (ENOTSUP);
84}
85
86static int
87toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
88{
89
90	return (ENOTSUP);
91}
92
93static int
94toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
95{
96
97	return (ENOTSUP);
98}
99
100static void
101toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
102    struct mbuf *m)
103{
104
105	m_freem(m);
106	return;
107}
108
109static void
110toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
111{
112
113	return;
114}
115
116static int
117toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
118{
119
120	return (ENOTSUP);
121}
122
123static void
124toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
125{
126
127	return;
128}
129
130static void
131toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
132    struct sockaddr *sa __unused, uint8_t *lladdr __unused,
133    uint16_t vtag __unused)
134{
135
136	return;
137}
138
139static void
140toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
141    struct nhop_object *nh0 __unused, struct nhop_object *nh1 __unused)
142{
143
144	return;
145}
146
147static void
148toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
149{
150
151	return;
152}
153
154static void
155toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
156{
157
158	return;
159}
160
161static int
162toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
163    struct mbuf *m)
164{
165
166	m_freem(m);
167	return (0);
168}
169
170static void
171toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
172    struct socket *so __unused)
173{
174
175	return;
176}
177
178static void
179toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
180    int sopt_dir __unused, int sopt_name __unused)
181{
182
183	return;
184}
185
186static void
187toedev_tcp_info(struct toedev *tod __unused, struct tcpcb *tp __unused,
188    struct tcp_info *ti __unused)
189{
190
191	return;
192}
193
194static int
195toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused,
196    struct ktls_session *tls __unused, int direction __unused)
197{
198
199	return (EINVAL);
200}
201
202/*
203 * Inform one or more TOE devices about a listening socket.
204 */
205static void
206toe_listen_start(struct inpcb *inp, void *arg)
207{
208	struct toedev *t, *tod;
209	struct tcpcb *tp;
210
211	INP_WLOCK_ASSERT(inp);
212	KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
213	    ("%s: inp is not a TCP inp", __func__));
214
215	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
216		return;
217
218	tp = intotcpcb(inp);
219	if (tp->t_state != TCPS_LISTEN)
220		return;
221
222	t = arg;
223	mtx_lock(&toedev_lock);
224	TAILQ_FOREACH(tod, &toedev_list, link) {
225		if (t == NULL || t == tod)
226			tod->tod_listen_start(tod, tp);
227	}
228	mtx_unlock(&toedev_lock);
229}
230
231static void
232toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
233{
234	struct inpcb *inp = tp->t_inpcb;
235
236	INP_WLOCK_ASSERT(inp);
237	KASSERT(tp->t_state == TCPS_LISTEN,
238	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
239
240	toe_listen_start(inp, NULL);
241}
242
243static void
244toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
245{
246	struct toedev *tod;
247#ifdef INVARIANTS
248	struct inpcb *inp = tp->t_inpcb;
249#endif
250
251	INP_WLOCK_ASSERT(inp);
252	KASSERT(tp->t_state == TCPS_LISTEN,
253	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
254
255	mtx_lock(&toedev_lock);
256	TAILQ_FOREACH(tod, &toedev_list, link)
257	    tod->tod_listen_stop(tod, tp);
258	mtx_unlock(&toedev_lock);
259}
260
261/*
262 * Fill up a freshly allocated toedev struct with reasonable defaults.
263 */
264void
265init_toedev(struct toedev *tod)
266{
267
268	tod->tod_softc = NULL;
269
270	/*
271	 * Provide no-op defaults so that the kernel can call any toedev
272	 * function without having to check whether the TOE driver supplied one
273	 * or not.
274	 */
275	tod->tod_connect = toedev_connect;
276	tod->tod_listen_start = toedev_listen_start;
277	tod->tod_listen_stop = toedev_listen_stop;
278	tod->tod_input = toedev_input;
279	tod->tod_rcvd = toedev_rcvd;
280	tod->tod_output = toedev_output;
281	tod->tod_send_rst = toedev_output;
282	tod->tod_send_fin = toedev_output;
283	tod->tod_pcb_detach = toedev_pcb_detach;
284	tod->tod_l2_update = toedev_l2_update;
285	tod->tod_route_redirect = toedev_route_redirect;
286	tod->tod_syncache_added = toedev_syncache_added;
287	tod->tod_syncache_removed = toedev_syncache_removed;
288	tod->tod_syncache_respond = toedev_syncache_respond;
289	tod->tod_offload_socket = toedev_offload_socket;
290	tod->tod_ctloutput = toedev_ctloutput;
291	tod->tod_tcp_info = toedev_tcp_info;
292	tod->tod_alloc_tls_session = toedev_alloc_tls_session;
293}
294
295/*
296 * Register an active TOE device with the system.  This allows it to receive
297 * notifications from the kernel.
298 */
299int
300register_toedev(struct toedev *tod)
301{
302	struct toedev *t;
303
304	mtx_lock(&toedev_lock);
305	TAILQ_FOREACH(t, &toedev_list, link) {
306		if (t == tod) {
307			mtx_unlock(&toedev_lock);
308			return (EEXIST);
309		}
310	}
311
312	TAILQ_INSERT_TAIL(&toedev_list, tod, link);
313	registered_toedevs++;
314	mtx_unlock(&toedev_lock);
315
316	inp_apply_all(toe_listen_start, tod);
317
318	return (0);
319}
320
321/*
322 * Remove the TOE device from the global list of active TOE devices.  It is the
323 * caller's responsibility to ensure that the TOE device is quiesced prior to
324 * this call.
325 */
326int
327unregister_toedev(struct toedev *tod)
328{
329	struct toedev *t, *t2;
330	int rc = ENODEV;
331
332	mtx_lock(&toedev_lock);
333	TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
334		if (t == tod) {
335			TAILQ_REMOVE(&toedev_list, tod, link);
336			registered_toedevs--;
337			rc = 0;
338			break;
339		}
340	}
341	KASSERT(registered_toedevs >= 0,
342	    ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
343	mtx_unlock(&toedev_lock);
344	return (rc);
345}
346
347void
348toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
349    struct inpcb *inp, void *tod, void *todctx, uint8_t iptos)
350{
351	struct socket *lso = inp->inp_socket;
352
353	INP_WLOCK_ASSERT(inp);
354
355	syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx, iptos, htons(0));
356}
357
358int
359toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
360    struct tcphdr *th, struct socket **lsop)
361{
362
363	NET_EPOCH_ASSERT();
364
365	return (syncache_expand(inc, to, th, lsop, NULL, htons(0)));
366}
367
368/*
369 * General purpose check to see if a 4-tuple is in use by the kernel.  If a TCP
370 * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
371 * in TIME_WAIT may be assassinated freeing it up for re-use.
372 *
373 * Note that the TCP header must have been run through tcp_fields_to_host() or
374 * equivalent.
375 */
376int
377toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
378{
379	struct inpcb *inp;
380
381	if (inc->inc_flags & INC_ISIPV6) {
382		inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr,
383		    inc->inc_fport, &inc->inc6_laddr, inc->inc_lport,
384		    INPLOOKUP_WLOCKPCB, ifp);
385	} else {
386		inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
387		    inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
388	}
389	if (inp != NULL) {
390		INP_WLOCK_ASSERT(inp);
391
392		if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
393			if (!tcp_twcheck(inp, NULL, th, NULL, 0))
394				return (EADDRINUSE);
395		} else {
396			INP_WUNLOCK(inp);
397			return (EADDRINUSE);
398		}
399	}
400
401	return (0);
402}
403
404static void
405toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
406{
407	struct toedev *tod;
408	struct ifnet *ifp;
409	struct sockaddr *sa;
410	uint8_t *lladdr;
411	uint16_t vid, pcp;
412	int family;
413	struct sockaddr_in6 sin6;
414
415	LLE_WLOCK_ASSERT(lle);
416
417	ifp = lltable_get_ifp(lle->lle_tbl);
418	family = lltable_get_af(lle->lle_tbl);
419
420	if (family != AF_INET && family != AF_INET6)
421		return;
422	/*
423	 * Not interested if the interface's TOE capability is not enabled.
424	 */
425	if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
426	    (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
427		return;
428
429	tod = TOEDEV(ifp);
430	if (tod == NULL)
431		return;
432
433	sa = (struct sockaddr *)&sin6;
434	lltable_fill_sa_entry(lle, sa);
435
436	vid = 0xfff;
437	pcp = 0;
438	if (evt != LLENTRY_RESOLVED) {
439		/*
440		 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
441		 * this entry is going to be deleted.
442		 */
443
444		lladdr = NULL;
445	} else {
446		KASSERT(lle->la_flags & LLE_VALID,
447		    ("%s: %p resolved but not valid?", __func__, lle));
448
449		lladdr = (uint8_t *)lle->ll_addr;
450		VLAN_TAG(ifp, &vid);
451		VLAN_PCP(ifp, &pcp);
452	}
453
454	tod->tod_l2_update(tod, ifp, sa, lladdr, EVL_MAKETAG(vid, pcp, 0));
455}
456
457/*
458 * Returns 0 or EWOULDBLOCK on success (any other value is an error).  0 means
459 * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
460 * tod_l2_update will be called later, when the entry is resolved or times out.
461 */
462int
463toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
464    uint8_t *lladdr, uint16_t *vtag)
465{
466	int rc;
467	uint16_t vid, pcp;
468
469	switch (sa->sa_family) {
470#ifdef INET
471	case AF_INET:
472		rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL, NULL);
473		break;
474#endif
475#ifdef INET6
476	case AF_INET6:
477		rc = nd6_resolve(ifp, 0, NULL, sa, lladdr, NULL, NULL);
478		break;
479#endif
480	default:
481		return (EPROTONOSUPPORT);
482	}
483
484	if (rc == 0) {
485		vid = 0xfff;
486		pcp = 0;
487		if (ifp->if_type == IFT_L2VLAN) {
488			VLAN_TAG(ifp, &vid);
489			VLAN_PCP(ifp, &pcp);
490		} else if (ifp->if_pcp != IFNET_PCP_NONE) {
491			vid = 0;
492			pcp = ifp->if_pcp;
493		}
494		*vtag = EVL_MAKETAG(vid, pcp, 0);
495	}
496
497	return (rc);
498}
499
500void
501toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
502{
503
504	NET_EPOCH_ASSERT();
505	INP_WLOCK_ASSERT(inp);
506
507	if (!(inp->inp_flags & INP_DROPPED)) {
508		struct tcpcb *tp = intotcpcb(inp);
509
510		KASSERT(tp->t_flags & TF_TOE,
511		    ("%s: tp %p not offloaded.", __func__, tp));
512
513		if (err == EAGAIN) {
514			/*
515			 * Temporary failure during offload, take this PCB back.
516			 * Detach from the TOE driver and do the rest of what
517			 * TCP's pru_connect would have done if the connection
518			 * wasn't offloaded.
519			 */
520
521			tod->tod_pcb_detach(tod, tp);
522			KASSERT(!(tp->t_flags & TF_TOE),
523			    ("%s: tp %p still offloaded.", __func__, tp));
524			tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
525			(void) tp->t_fb->tfb_tcp_output(tp);
526		} else {
527			tp = tcp_drop(tp, err);
528			if (tp == NULL)
529				INP_WLOCK(inp);	/* re-acquire */
530		}
531	}
532	INP_WLOCK_ASSERT(inp);
533}
534
535static int
536toecore_load(void)
537{
538
539	mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
540	TAILQ_INIT(&toedev_list);
541
542	listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
543	    toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
544	listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
545	    toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
546	lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
547	    EVENTHANDLER_PRI_ANY);
548
549	return (0);
550}
551
552static int
553toecore_unload(void)
554{
555
556	mtx_lock(&toedev_lock);
557	if (!TAILQ_EMPTY(&toedev_list)) {
558		mtx_unlock(&toedev_lock);
559		return (EBUSY);
560	}
561
562	EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
563	EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
564	EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
565
566	mtx_unlock(&toedev_lock);
567	mtx_destroy(&toedev_lock);
568
569	return (0);
570}
571
572static int
573toecore_mod_handler(module_t mod, int cmd, void *arg)
574{
575
576	if (cmd == MOD_LOAD)
577		return (toecore_load());
578
579	if (cmd == MOD_UNLOAD)
580		return (toecore_unload());
581
582	return (EOPNOTSUPP);
583}
584
585static moduledata_t mod_data= {
586	"toecore",
587	toecore_mod_handler,
588	0
589};
590
591MODULE_VERSION(toecore, 1);
592DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
593