11573Srgrimes/*-
250476Speter * Copyright (c) 2012 Chelsio Communications, Inc.
3156813Sru * All rights reserved.
4156837Sru * Written by: Navdeep Parhar <np@FreeBSD.org>
5156837Sru *
6156813Sru * Redistribution and use in source and binary forms, with or without
7156813Sru * modification, are permitted provided that the following conditions
8211822Snwhitehorn * are met:
9211822Snwhitehorn * 1. Redistributions of source code must retain the above copyright
10211822Snwhitehorn *    notice, this list of conditions and the following disclaimer.
11211822Snwhitehorn * 2. Redistributions in binary form must reproduce the above copyright
12211822Snwhitehorn *    notice, this list of conditions and the following disclaimer in the
13211774Simp *    documentation and/or other materials provided with the distribution.
14211774Simp *
15211778Simp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16211774Simp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17211774Simp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1893048Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
191573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2093253Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21123440Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22123440Sbde * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23123440Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
241573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25158794Sume * SUCH DAMAGE.
26251668Sjlh */
27124723Snectar
28117120Sru#include <sys/cdefs.h>
29211774Simp__FBSDID("$FreeBSD: releng/10.3/sys/netinet/toecore.c 245932 2013-01-26 00:57:29Z np $");
30235720Sgleb
31189765Sgabor#include "opt_inet.h"
32235720Sgleb#include "opt_inet6.h"
331573Srgrimes
34136910Sru#include <sys/param.h>
35136910Sru#include <sys/kernel.h>
361573Srgrimes#include <sys/systm.h>
37213153Sdavidxu#include <sys/mbuf.h>
38213153Sdavidxu#include <sys/module.h>
39213153Sdavidxu#include <sys/types.h>
40213153Sdavidxu#include <sys/sockopt.h>
41213153Sdavidxu#include <sys/sysctl.h>
42169720Skan#include <sys/socket.h>
43169720Skan
44169720Skan#include <net/ethernet.h>
45172401Sru#include <net/if.h>
46169771Skan#include <net/if_types.h>
47235653Smarcel#include <net/if_vlan_var.h>
48169720Skan#include <net/if_llatbl.h>
49235653Smarcel#include <net/route.h>
50235653Smarcel
51235653Smarcel#include <netinet/if_ether.h>
52235653Smarcel#include <netinet/in.h>
53258750Sgjb#include <netinet/in_pcb.h>
54258750Sgjb#include <netinet/in_var.h>
55258750Sgjb#include <netinet6/in6_var.h>
56107052Sru#include <netinet6/in6_pcb.h>
57107052Sru#include <netinet6/nd6.h>
58107052Sru#define TCPSTATES
59107052Sru#include <netinet/tcp.h>
60107052Sru#include <netinet/tcp_fsm.h>
61107052Sru#include <netinet/tcp_timer.h>
62107052Sru#include <netinet/tcp_var.h>
63107052Sru#include <netinet/tcp_syncache.h>
64211774Simp#include <netinet/tcp_offload.h>
65107052Sru#include <netinet/toecore.h>
66107052Sru
67112202Sobrienstatic struct mtx toedev_lock;
68107052Srustatic TAILQ_HEAD(, toedev) toedev_list;
69107052Srustatic eventhandler_tag listen_start_eh;
70219019Sgaborstatic eventhandler_tag listen_stop_eh;
71219019Sgaborstatic eventhandler_tag lle_event_eh;
72219019Sgaborstatic eventhandler_tag route_redirect_eh;
73156960Sume
74156960Sumestatic int
75107052Srutoedev_connect(struct toedev *tod __unused, struct socket *so __unused,
76156960Sume    struct rtentry *rt __unused, struct sockaddr *nam __unused)
77107052Sru{
78107052Sru
79107052Sru	return (ENOTSUP);
80211774Simp}
81211774Simp
82211774Simpstatic int
83211774Simptoedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
84217942Sjchandra{
85217123Simp
86107052Sru	return (ENOTSUP);
87107052Sru}
88107052Sru
89156960Sumestatic int
90107052Srutoedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
91107052Sru{
92234370Sjasone
93107052Sru	return (ENOTSUP);
94107052Sru}
95107052Sru
96107052Srustatic void
97107052Srutoedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
98107052Sru    struct mbuf *m)
99211774Simp{
100129202Scognet
101129202Scognet	m_freem(m);
102156813Sru	return;
103107052Sru}
104107052Sru
105107052Srustatic void
106255219Spjdtoedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
107156813Sru{
108107052Sru
109107052Sru	return;
110156813Sru}
111128820Sdas
112128820Sdasstatic int
113158115Sumetoedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
114158115Sume{
115158115Sume
116167199Ssimon	return (ENOTSUP);
117167199Ssimon}
118167199Ssimon
119107052Srustatic void
120258750Sgjbtoedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
121258750Sgjb{
122156773Sdeischen
123156773Sdeischen	return;
124156773Sdeischen}
125156773Sdeischen
126107052Srustatic void
127107052Srutoedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
128107052Sru    struct sockaddr *sa __unused, uint8_t *lladdr __unused,
129107052Sru    uint16_t vtag __unused)
130107052Sru{
131107052Sru
132107052Sru	return;
133107052Sru}
134107052Sru
135107052Srustatic void
136107052Srutoedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
137107052Sru    struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
138107052Sru{
139107052Sru
140107052Sru	return;
1411573Srgrimes}
1421573Srgrimes
1431573Srgrimesstatic void
144229368Sedtoedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
145229368Sed{
1461573Srgrimes
147211774Simp	return;
1481573Srgrimes}
1491573Srgrimes
15026047Sasamistatic void
1511573Srgrimestoedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
152211774Simp{
1531573Srgrimes
154211774Simp	return;
1551573Srgrimes}
156211704Skib
1571573Srgrimesstatic int
158124374Srutoedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
159124374Sru    struct mbuf *m)
160210731Srpaulo{
161180012Sru
162180012Sru	m_freem(m);
163180012Sru	return (0);
164180012Sru}
165180012Sru
166180012Srustatic void
167213153Sdavidxutoedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
168213153Sdavidxu    struct socket *so __unused)
169{
170
171	return;
172}
173
174static void
175toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
176    int sopt_dir __unused, int sopt_name __unused)
177{
178
179	return;
180}
181
182/*
183 * Inform one or more TOE devices about a listening socket.
184 */
185static void
186toe_listen_start(struct inpcb *inp, void *arg)
187{
188	struct toedev *t, *tod;
189	struct tcpcb *tp;
190
191	INP_WLOCK_ASSERT(inp);
192	KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
193	    ("%s: inp is not a TCP inp", __func__));
194
195	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
196		return;
197
198	tp = intotcpcb(inp);
199	if (tp->t_state != TCPS_LISTEN)
200		return;
201
202	t = arg;
203	mtx_lock(&toedev_lock);
204	TAILQ_FOREACH(tod, &toedev_list, link) {
205		if (t == NULL || t == tod)
206			tod->tod_listen_start(tod, tp);
207	}
208	mtx_unlock(&toedev_lock);
209}
210
211static void
212toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
213{
214	struct inpcb *inp = tp->t_inpcb;
215
216	INP_WLOCK_ASSERT(inp);
217	KASSERT(tp->t_state == TCPS_LISTEN,
218	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
219
220	toe_listen_start(inp, NULL);
221}
222
223static void
224toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
225{
226	struct toedev *tod;
227#ifdef INVARIANTS
228	struct inpcb *inp = tp->t_inpcb;
229#endif
230
231	INP_WLOCK_ASSERT(inp);
232	KASSERT(tp->t_state == TCPS_LISTEN,
233	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
234
235	mtx_lock(&toedev_lock);
236	TAILQ_FOREACH(tod, &toedev_list, link)
237	    tod->tod_listen_stop(tod, tp);
238	mtx_unlock(&toedev_lock);
239}
240
241/*
242 * Fill up a freshly allocated toedev struct with reasonable defaults.
243 */
244void
245init_toedev(struct toedev *tod)
246{
247
248	tod->tod_softc = NULL;
249
250	/*
251	 * Provide no-op defaults so that the kernel can call any toedev
252	 * function without having to check whether the TOE driver supplied one
253	 * or not.
254	 */
255	tod->tod_connect = toedev_connect;
256	tod->tod_listen_start = toedev_listen_start;
257	tod->tod_listen_stop = toedev_listen_stop;
258	tod->tod_input = toedev_input;
259	tod->tod_rcvd = toedev_rcvd;
260	tod->tod_output = toedev_output;
261	tod->tod_send_rst = toedev_output;
262	tod->tod_send_fin = toedev_output;
263	tod->tod_pcb_detach = toedev_pcb_detach;
264	tod->tod_l2_update = toedev_l2_update;
265	tod->tod_route_redirect = toedev_route_redirect;
266	tod->tod_syncache_added = toedev_syncache_added;
267	tod->tod_syncache_removed = toedev_syncache_removed;
268	tod->tod_syncache_respond = toedev_syncache_respond;
269	tod->tod_offload_socket = toedev_offload_socket;
270	tod->tod_ctloutput = toedev_ctloutput;
271}
272
273/*
274 * Register an active TOE device with the system.  This allows it to receive
275 * notifications from the kernel.
276 */
277int
278register_toedev(struct toedev *tod)
279{
280	struct toedev *t;
281
282	mtx_lock(&toedev_lock);
283	TAILQ_FOREACH(t, &toedev_list, link) {
284		if (t == tod) {
285			mtx_unlock(&toedev_lock);
286			return (EEXIST);
287		}
288	}
289
290	TAILQ_INSERT_TAIL(&toedev_list, tod, link);
291	registered_toedevs++;
292	mtx_unlock(&toedev_lock);
293
294	inp_apply_all(toe_listen_start, tod);
295
296	return (0);
297}
298
299/*
300 * Remove the TOE device from the global list of active TOE devices.  It is the
301 * caller's responsibility to ensure that the TOE device is quiesced prior to
302 * this call.
303 */
304int
305unregister_toedev(struct toedev *tod)
306{
307	struct toedev *t, *t2;
308	int rc = ENODEV;
309
310	mtx_lock(&toedev_lock);
311	TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
312		if (t == tod) {
313			TAILQ_REMOVE(&toedev_list, tod, link);
314			registered_toedevs--;
315			rc = 0;
316			break;
317		}
318	}
319	KASSERT(registered_toedevs >= 0,
320	    ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
321	mtx_unlock(&toedev_lock);
322	return (rc);
323}
324
325void
326toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
327    struct inpcb *inp, void *tod, void *todctx)
328{
329	struct socket *lso = inp->inp_socket;
330
331	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
332	INP_WLOCK_ASSERT(inp);
333
334	syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
335}
336
337int
338toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
339    struct tcphdr *th, struct socket **lsop)
340{
341
342	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
343
344	return (syncache_expand(inc, to, th, lsop, NULL));
345}
346
347/*
348 * General purpose check to see if a 4-tuple is in use by the kernel.  If a TCP
349 * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
350 * in TIME_WAIT may be assassinated freeing it up for re-use.
351 *
352 * Note that the TCP header must have been run through tcp_fields_to_host() or
353 * equivalent.
354 */
355int
356toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
357{
358	struct inpcb *inp;
359
360	if (inc->inc_flags & INC_ISIPV6) {
361		inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr,
362		    inc->inc_fport, &inc->inc6_laddr, inc->inc_lport,
363		    INPLOOKUP_WLOCKPCB, ifp);
364	} else {
365		inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
366		    inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
367	}
368	if (inp != NULL) {
369		INP_WLOCK_ASSERT(inp);
370
371		if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
372
373			INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
374			if (!tcp_twcheck(inp, NULL, th, NULL, 0))
375				return (EADDRINUSE);
376		} else {
377			INP_WUNLOCK(inp);
378			return (EADDRINUSE);
379		}
380	}
381
382	return (0);
383}
384
385static void
386toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
387{
388	struct toedev *tod;
389	struct ifnet *ifp;
390	struct sockaddr *sa;
391	uint8_t *lladdr;
392	uint16_t vtag;
393
394	LLE_WLOCK_ASSERT(lle);
395
396	ifp = lle->lle_tbl->llt_ifp;
397	sa = L3_ADDR(lle);
398
399	KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
400	    ("%s: lle_event %d for lle %p but sa %p !INET && !INET6",
401	    __func__, evt, lle, sa));
402
403	/*
404	 * Not interested if the interface's TOE capability is not enabled.
405	 */
406	if ((sa->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
407	    (sa->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
408		return;
409
410	tod = TOEDEV(ifp);
411	if (tod == NULL)
412		return;
413
414	vtag = 0xfff;
415	if (evt != LLENTRY_RESOLVED) {
416
417		/*
418		 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
419		 * this entry is going to be deleted.
420		 */
421
422		lladdr = NULL;
423	} else {
424
425		KASSERT(lle->la_flags & LLE_VALID,
426		    ("%s: %p resolved but not valid?", __func__, lle));
427
428		lladdr = (uint8_t *)&lle->ll_addr;
429#ifdef VLAN_TAG
430		VLAN_TAG(ifp, &vtag);
431#endif
432	}
433
434	tod->tod_l2_update(tod, ifp, sa, lladdr, vtag);
435}
436
437/*
438 * XXX: implement.
439 */
440static void
441toe_route_redirect_event(void *arg __unused, struct rtentry *rt0,
442    struct rtentry *rt1, struct sockaddr *sa)
443{
444
445	return;
446}
447
448#ifdef INET6
449/*
450 * XXX: no checks to verify that sa is really a neighbor because we assume it is
451 * the result of a route lookup and is on-link on the given ifp.
452 */
453static int
454toe_nd6_resolve(struct ifnet *ifp, struct sockaddr *sa, uint8_t *lladdr)
455{
456	struct llentry *lle;
457	struct sockaddr_in6 *sin6 = (void *)sa;
458	int rc, flags = 0;
459
460restart:
461	IF_AFDATA_RLOCK(ifp);
462	lle = lla_lookup(LLTABLE6(ifp), flags, sa);
463	IF_AFDATA_RUNLOCK(ifp);
464	if (lle == NULL) {
465		IF_AFDATA_LOCK(ifp);
466		lle = nd6_lookup(&sin6->sin6_addr, ND6_CREATE | ND6_EXCLUSIVE,
467		    ifp);
468		IF_AFDATA_UNLOCK(ifp);
469		if (lle == NULL)
470			return (ENOMEM); /* Couldn't create entry in cache. */
471		lle->ln_state = ND6_LLINFO_INCOMPLETE;
472		nd6_llinfo_settimer_locked(lle,
473		    (long)ND_IFINFO(ifp)->retrans * hz / 1000);
474		LLE_WUNLOCK(lle);
475
476		nd6_ns_output(ifp, NULL, &sin6->sin6_addr, NULL, 0);
477
478		return (EWOULDBLOCK);
479	}
480
481	if (lle->ln_state == ND6_LLINFO_STALE) {
482		if ((flags & LLE_EXCLUSIVE) == 0) {
483			LLE_RUNLOCK(lle);
484			flags |= LLE_EXCLUSIVE;
485			goto restart;
486		}
487
488		LLE_WLOCK_ASSERT(lle);
489
490		lle->la_asked = 0;
491		lle->ln_state = ND6_LLINFO_DELAY;
492		nd6_llinfo_settimer_locked(lle, (long)V_nd6_delay * hz);
493	}
494
495	if (lle->la_flags & LLE_VALID) {
496		memcpy(lladdr, &lle->ll_addr, ifp->if_addrlen);
497		rc = 0;
498	} else
499		rc = EWOULDBLOCK;
500
501	if (flags & LLE_EXCLUSIVE)
502		LLE_WUNLOCK(lle);
503	else
504		LLE_RUNLOCK(lle);
505
506	return (rc);
507}
508#endif
509
510/*
511 * Returns 0 or EWOULDBLOCK on success (any other value is an error).  0 means
512 * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
513 * tod_l2_update will be called later, when the entry is resolved or times out.
514 */
515int
516toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
517    uint8_t *lladdr, uint16_t *vtag)
518{
519#ifdef INET
520	struct llentry *lle;
521#endif
522	int rc;
523
524	switch (sa->sa_family) {
525#ifdef INET
526	case AF_INET:
527		rc = arpresolve(ifp, NULL, NULL, sa, lladdr, &lle);
528		break;
529#endif
530#ifdef INET6
531	case AF_INET6:
532		rc = toe_nd6_resolve(ifp, sa, lladdr);
533		break;
534#endif
535	default:
536		return (EPROTONOSUPPORT);
537	}
538
539	if (rc == 0) {
540#ifdef VLAN_TAG
541		if (VLAN_TAG(ifp, vtag) != 0)
542#endif
543			*vtag = 0xfff;
544	}
545
546	return (rc);
547}
548
549void
550toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
551{
552
553	INP_WLOCK_ASSERT(inp);
554
555	if (!(inp->inp_flags & INP_DROPPED)) {
556		struct tcpcb *tp = intotcpcb(inp);
557
558		KASSERT(tp->t_flags & TF_TOE,
559		    ("%s: tp %p not offloaded.", __func__, tp));
560
561		if (err == EAGAIN) {
562
563			/*
564			 * Temporary failure during offload, take this PCB back.
565			 * Detach from the TOE driver and do the rest of what
566			 * TCP's pru_connect would have done if the connection
567			 * wasn't offloaded.
568			 */
569
570			tod->tod_pcb_detach(tod, tp);
571			KASSERT(!(tp->t_flags & TF_TOE),
572			    ("%s: tp %p still offloaded.", __func__, tp));
573			tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
574			(void) tcp_output(tp);
575		} else {
576
577			INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
578			tp = tcp_drop(tp, err);
579			if (tp == NULL)
580				INP_WLOCK(inp);	/* re-acquire */
581		}
582	}
583	INP_WLOCK_ASSERT(inp);
584}
585
586static int
587toecore_load(void)
588{
589
590	mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
591	TAILQ_INIT(&toedev_list);
592
593	listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
594	    toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
595	listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
596	    toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
597	lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
598	    EVENTHANDLER_PRI_ANY);
599	route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event,
600	    toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY);
601
602	return (0);
603}
604
605static int
606toecore_unload(void)
607{
608
609	mtx_lock(&toedev_lock);
610	if (!TAILQ_EMPTY(&toedev_list)) {
611		mtx_unlock(&toedev_lock);
612		return (EBUSY);
613	}
614
615	EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
616	EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
617	EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
618	EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh);
619
620	mtx_unlock(&toedev_lock);
621	mtx_destroy(&toedev_lock);
622
623	return (0);
624}
625
626static int
627toecore_mod_handler(module_t mod, int cmd, void *arg)
628{
629
630	if (cmd == MOD_LOAD)
631		return (toecore_load());
632
633	if (cmd == MOD_UNLOAD)
634		return (toecore_unload());
635
636	return (EOPNOTSUPP);
637}
638
639static moduledata_t mod_data= {
640	"toecore",
641	toecore_mod_handler,
642	0
643};
644
645MODULE_VERSION(toecore, 1);
646DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
647