1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012 Chelsio Communications, Inc.
5 * All rights reserved.
6 * Written by: Navdeep Parhar <np@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include "opt_inet.h"
34#include "opt_inet6.h"
35
36#include <sys/param.h>
37#include <sys/kernel.h>
38#include <sys/systm.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/module.h>
42#include <sys/types.h>
43#include <sys/sockopt.h>
44#include <sys/sysctl.h>
45#include <sys/socket.h>
46
47#include <net/ethernet.h>
48#include <net/if.h>
49#include <net/if_var.h>
50#include <net/if_types.h>
51#include <net/if_vlan_var.h>
52#include <net/if_llatbl.h>
53#include <net/route.h>
54
55#include <netinet/if_ether.h>
56#include <netinet/in.h>
57#include <netinet/in_pcb.h>
58#include <netinet/in_var.h>
59#include <netinet6/in6_var.h>
60#include <netinet6/in6_pcb.h>
61#include <netinet6/nd6.h>
62#define TCPSTATES
63#include <netinet/tcp.h>
64#include <netinet/tcp_fsm.h>
65#include <netinet/tcp_timer.h>
66#include <netinet/tcp_var.h>
67#include <netinet/tcp_syncache.h>
68#include <netinet/tcp_offload.h>
69#include <netinet/toecore.h>
70
71static struct mtx toedev_lock;
72static TAILQ_HEAD(, toedev) toedev_list;
73static eventhandler_tag listen_start_eh;
74static eventhandler_tag listen_stop_eh;
75static eventhandler_tag lle_event_eh;
76
77static int
78toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
79    struct rtentry *rt __unused, struct sockaddr *nam __unused)
80{
81
82	return (ENOTSUP);
83}
84
85static int
86toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
87{
88
89	return (ENOTSUP);
90}
91
92static int
93toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
94{
95
96	return (ENOTSUP);
97}
98
99static void
100toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
101    struct mbuf *m)
102{
103
104	m_freem(m);
105	return;
106}
107
108static void
109toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
110{
111
112	return;
113}
114
115static int
116toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
117{
118
119	return (ENOTSUP);
120}
121
122static void
123toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
124{
125
126	return;
127}
128
129static void
130toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
131    struct sockaddr *sa __unused, uint8_t *lladdr __unused,
132    uint16_t vtag __unused)
133{
134
135	return;
136}
137
138static void
139toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
140    struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
141{
142
143	return;
144}
145
146static void
147toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
148{
149
150	return;
151}
152
153static void
154toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
155{
156
157	return;
158}
159
160static int
161toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
162    struct mbuf *m)
163{
164
165	m_freem(m);
166	return (0);
167}
168
169static void
170toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
171    struct socket *so __unused)
172{
173
174	return;
175}
176
177static void
178toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
179    int sopt_dir __unused, int sopt_name __unused)
180{
181
182	return;
183}
184
185static void
186toedev_tcp_info(struct toedev *tod __unused, struct tcpcb *tp __unused,
187    struct tcp_info *ti __unused)
188{
189
190	return;
191}
192
193/*
194 * Inform one or more TOE devices about a listening socket.
195 */
196static void
197toe_listen_start(struct inpcb *inp, void *arg)
198{
199	struct toedev *t, *tod;
200	struct tcpcb *tp;
201
202	INP_WLOCK_ASSERT(inp);
203	KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
204	    ("%s: inp is not a TCP inp", __func__));
205
206	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
207		return;
208
209	tp = intotcpcb(inp);
210	if (tp->t_state != TCPS_LISTEN)
211		return;
212
213	t = arg;
214	mtx_lock(&toedev_lock);
215	TAILQ_FOREACH(tod, &toedev_list, link) {
216		if (t == NULL || t == tod)
217			tod->tod_listen_start(tod, tp);
218	}
219	mtx_unlock(&toedev_lock);
220}
221
222static void
223toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
224{
225	struct inpcb *inp = tp->t_inpcb;
226
227	INP_WLOCK_ASSERT(inp);
228	KASSERT(tp->t_state == TCPS_LISTEN,
229	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
230
231	toe_listen_start(inp, NULL);
232}
233
234static void
235toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
236{
237	struct toedev *tod;
238#ifdef INVARIANTS
239	struct inpcb *inp = tp->t_inpcb;
240#endif
241
242	INP_WLOCK_ASSERT(inp);
243	KASSERT(tp->t_state == TCPS_LISTEN,
244	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
245
246	mtx_lock(&toedev_lock);
247	TAILQ_FOREACH(tod, &toedev_list, link)
248	    tod->tod_listen_stop(tod, tp);
249	mtx_unlock(&toedev_lock);
250}
251
252/*
253 * Fill up a freshly allocated toedev struct with reasonable defaults.
254 */
255void
256init_toedev(struct toedev *tod)
257{
258
259	tod->tod_softc = NULL;
260
261	/*
262	 * Provide no-op defaults so that the kernel can call any toedev
263	 * function without having to check whether the TOE driver supplied one
264	 * or not.
265	 */
266	tod->tod_connect = toedev_connect;
267	tod->tod_listen_start = toedev_listen_start;
268	tod->tod_listen_stop = toedev_listen_stop;
269	tod->tod_input = toedev_input;
270	tod->tod_rcvd = toedev_rcvd;
271	tod->tod_output = toedev_output;
272	tod->tod_send_rst = toedev_output;
273	tod->tod_send_fin = toedev_output;
274	tod->tod_pcb_detach = toedev_pcb_detach;
275	tod->tod_l2_update = toedev_l2_update;
276	tod->tod_route_redirect = toedev_route_redirect;
277	tod->tod_syncache_added = toedev_syncache_added;
278	tod->tod_syncache_removed = toedev_syncache_removed;
279	tod->tod_syncache_respond = toedev_syncache_respond;
280	tod->tod_offload_socket = toedev_offload_socket;
281	tod->tod_ctloutput = toedev_ctloutput;
282	tod->tod_tcp_info = toedev_tcp_info;
283}
284
285/*
286 * Register an active TOE device with the system.  This allows it to receive
287 * notifications from the kernel.
288 */
289int
290register_toedev(struct toedev *tod)
291{
292	struct toedev *t;
293
294	mtx_lock(&toedev_lock);
295	TAILQ_FOREACH(t, &toedev_list, link) {
296		if (t == tod) {
297			mtx_unlock(&toedev_lock);
298			return (EEXIST);
299		}
300	}
301
302	TAILQ_INSERT_TAIL(&toedev_list, tod, link);
303	registered_toedevs++;
304	mtx_unlock(&toedev_lock);
305
306	inp_apply_all(toe_listen_start, tod);
307
308	return (0);
309}
310
311/*
312 * Remove the TOE device from the global list of active TOE devices.  It is the
313 * caller's responsibility to ensure that the TOE device is quiesced prior to
314 * this call.
315 */
316int
317unregister_toedev(struct toedev *tod)
318{
319	struct toedev *t, *t2;
320	int rc = ENODEV;
321
322	mtx_lock(&toedev_lock);
323	TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
324		if (t == tod) {
325			TAILQ_REMOVE(&toedev_list, tod, link);
326			registered_toedevs--;
327			rc = 0;
328			break;
329		}
330	}
331	KASSERT(registered_toedevs >= 0,
332	    ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
333	mtx_unlock(&toedev_lock);
334	return (rc);
335}
336
337void
338toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
339    struct inpcb *inp, void *tod, void *todctx, uint8_t iptos)
340{
341	struct socket *lso = inp->inp_socket;
342
343	INP_WLOCK_ASSERT(inp);
344
345	syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx, iptos);
346}
347
348int
349toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
350    struct tcphdr *th, struct socket **lsop)
351{
352
353	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
354
355	return (syncache_expand(inc, to, th, lsop, NULL));
356}
357
358/*
359 * General purpose check to see if a 4-tuple is in use by the kernel.  If a TCP
360 * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
361 * in TIME_WAIT may be assassinated freeing it up for re-use.
362 *
363 * Note that the TCP header must have been run through tcp_fields_to_host() or
364 * equivalent.
365 */
366int
367toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
368{
369	struct inpcb *inp;
370
371	if (inc->inc_flags & INC_ISIPV6) {
372		inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr,
373		    inc->inc_fport, &inc->inc6_laddr, inc->inc_lport,
374		    INPLOOKUP_WLOCKPCB, ifp);
375	} else {
376		inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
377		    inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
378	}
379	if (inp != NULL) {
380		INP_WLOCK_ASSERT(inp);
381
382		if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
383
384			INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
385			if (!tcp_twcheck(inp, NULL, th, NULL, 0))
386				return (EADDRINUSE);
387		} else {
388			INP_WUNLOCK(inp);
389			return (EADDRINUSE);
390		}
391	}
392
393	return (0);
394}
395
396static void
397toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
398{
399	struct toedev *tod;
400	struct ifnet *ifp;
401	struct sockaddr *sa;
402	uint8_t *lladdr;
403	uint16_t vid, pcp;
404	int family;
405	struct sockaddr_in6 sin6;
406
407	LLE_WLOCK_ASSERT(lle);
408
409	ifp = lltable_get_ifp(lle->lle_tbl);
410	family = lltable_get_af(lle->lle_tbl);
411
412	if (family != AF_INET && family != AF_INET6)
413		return;
414	/*
415	 * Not interested if the interface's TOE capability is not enabled.
416	 */
417	if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
418	    (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
419		return;
420
421	tod = TOEDEV(ifp);
422	if (tod == NULL)
423		return;
424
425	sa = (struct sockaddr *)&sin6;
426	lltable_fill_sa_entry(lle, sa);
427
428	vid = 0xfff;
429	pcp = 0;
430	if (evt != LLENTRY_RESOLVED) {
431
432		/*
433		 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
434		 * this entry is going to be deleted.
435		 */
436
437		lladdr = NULL;
438	} else {
439
440		KASSERT(lle->la_flags & LLE_VALID,
441		    ("%s: %p resolved but not valid?", __func__, lle));
442
443		lladdr = (uint8_t *)lle->ll_addr;
444		VLAN_TAG(ifp, &vid);
445		VLAN_PCP(ifp, &pcp);
446	}
447
448	tod->tod_l2_update(tod, ifp, sa, lladdr, EVL_MAKETAG(vid, pcp, 0));
449}
450
451/*
452 * Returns 0 or EWOULDBLOCK on success (any other value is an error).  0 means
453 * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
454 * tod_l2_update will be called later, when the entry is resolved or times out.
455 */
456int
457toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
458    uint8_t *lladdr, uint16_t *vtag)
459{
460	int rc;
461	uint16_t vid, pcp;
462
463	switch (sa->sa_family) {
464#ifdef INET
465	case AF_INET:
466		rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL, NULL);
467		break;
468#endif
469#ifdef INET6
470	case AF_INET6:
471		rc = nd6_resolve(ifp, 0, NULL, sa, lladdr, NULL, NULL);
472		break;
473#endif
474	default:
475		return (EPROTONOSUPPORT);
476	}
477
478	if (rc == 0) {
479		vid = 0xfff;
480		pcp = 0;
481		if (ifp->if_type == IFT_L2VLAN) {
482			VLAN_TAG(ifp, &vid);
483			VLAN_PCP(ifp, &pcp);
484		} else if (ifp->if_pcp != IFNET_PCP_NONE) {
485			vid = 0;
486			pcp = ifp->if_pcp;
487		}
488		*vtag = EVL_MAKETAG(vid, pcp, 0);
489	}
490
491	return (rc);
492}
493
494void
495toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
496{
497
498	INP_WLOCK_ASSERT(inp);
499
500	if (!(inp->inp_flags & INP_DROPPED)) {
501		struct tcpcb *tp = intotcpcb(inp);
502
503		KASSERT(tp->t_flags & TF_TOE,
504		    ("%s: tp %p not offloaded.", __func__, tp));
505
506		if (err == EAGAIN) {
507
508			/*
509			 * Temporary failure during offload, take this PCB back.
510			 * Detach from the TOE driver and do the rest of what
511			 * TCP's pru_connect would have done if the connection
512			 * wasn't offloaded.
513			 */
514
515			tod->tod_pcb_detach(tod, tp);
516			KASSERT(!(tp->t_flags & TF_TOE),
517			    ("%s: tp %p still offloaded.", __func__, tp));
518			tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
519			(void) tp->t_fb->tfb_tcp_output(tp);
520		} else {
521
522			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
523			tp = tcp_drop(tp, err);
524			if (tp == NULL)
525				INP_WLOCK(inp);	/* re-acquire */
526		}
527	}
528	INP_WLOCK_ASSERT(inp);
529}
530
531static int
532toecore_load(void)
533{
534
535	mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
536	TAILQ_INIT(&toedev_list);
537
538	listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
539	    toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
540	listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
541	    toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
542	lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
543	    EVENTHANDLER_PRI_ANY);
544
545	return (0);
546}
547
548static int
549toecore_unload(void)
550{
551
552	mtx_lock(&toedev_lock);
553	if (!TAILQ_EMPTY(&toedev_list)) {
554		mtx_unlock(&toedev_lock);
555		return (EBUSY);
556	}
557
558	EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
559	EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
560	EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
561
562	mtx_unlock(&toedev_lock);
563	mtx_destroy(&toedev_lock);
564
565	return (0);
566}
567
568static int
569toecore_mod_handler(module_t mod, int cmd, void *arg)
570{
571
572	if (cmd == MOD_LOAD)
573		return (toecore_load());
574
575	if (cmd == MOD_UNLOAD)
576		return (toecore_unload());
577
578	return (EOPNOTSUPP);
579}
580
581static moduledata_t mod_data= {
582	"toecore",
583	toecore_mod_handler,
584	0
585};
586
587MODULE_VERSION(toecore, 1);
588DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
589