1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31#include "opt_inet.h" 32#include "opt_inet6.h" 33 34#include <sys/param.h> 35#include <sys/eventhandler.h> 36#include <sys/kernel.h> 37#include <sys/systm.h> 38#include <sys/malloc.h> 39#include <sys/mbuf.h> 40#include <sys/module.h> 41#include <sys/types.h> 42#include <sys/sockopt.h> 43#include <sys/sysctl.h> 44#include <sys/socket.h> 45 46#include <net/ethernet.h> 47#include <net/if.h> 48#include <net/if_var.h> 49#include <net/if_private.h> 50#include <net/if_types.h> 51#include <net/if_vlan_var.h> 52#include <net/if_llatbl.h> 53#include <net/route.h> 54 55#include <netinet/if_ether.h> 56#include <netinet/in.h> 57#include <netinet/in_pcb.h> 58#include <netinet/in_var.h> 59#include <netinet6/in6_var.h> 60#include <netinet6/in6_pcb.h> 61#include <netinet6/nd6.h> 62#define TCPSTATES 63#include <netinet/tcp.h> 64#include <netinet/tcp_fsm.h> 65#include <netinet/tcp_timer.h> 66#include <netinet/tcp_var.h> 67#include <netinet/tcp_syncache.h> 68#include <netinet/tcp_offload.h> 69#include <netinet/toecore.h> 70 71static struct mtx toedev_lock; 72static TAILQ_HEAD(, toedev) toedev_list; 73static eventhandler_tag listen_start_eh; 74static eventhandler_tag listen_stop_eh; 75static eventhandler_tag lle_event_eh; 76 77static int 78toedev_connect(struct toedev *tod __unused, struct socket *so __unused, 79 struct nhop_object *nh __unused, struct sockaddr *nam __unused) 80{ 81 82 return (ENOTSUP); 83} 84 85static int 86toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused) 87{ 88 89 return (ENOTSUP); 90} 91 92static int 93toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused) 94{ 95 96 return (ENOTSUP); 97} 98 99static void 100toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused, 101 struct mbuf *m) 102{ 103 104 m_freem(m); 105 return; 106} 107 108static void 109toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused) 110{ 111 112 return; 113} 114 115static int 116toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused) 117{ 118 119 return (ENOTSUP); 120} 121 122static void 123toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused) 124{ 125 126 return; 127} 128 129static void 130toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused, 131 struct sockaddr *sa __unused, uint8_t *lladdr __unused, 132 uint16_t vtag __unused) 133{ 134 135 return; 136} 137 138static void 139toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused, 140 struct nhop_object *nh0 __unused, struct nhop_object *nh1 __unused) 141{ 142 143 return; 144} 145 146static void 147toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused) 148{ 149 150 return; 151} 152 153static void 154toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused) 155{ 156 157 return; 158} 159 160static int 161toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused, 162 struct mbuf *m) 163{ 164 165 m_freem(m); 166 return (0); 167} 168 169static void 170toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused, 171 struct socket *so __unused) 172{ 173 174 return; 175} 176 177static void 178toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused, 179 int sopt_dir __unused, int sopt_name __unused) 180{ 181 182 return; 183} 184 185static void 186toedev_tcp_info(struct toedev *tod __unused, const struct tcpcb *tp __unused, 187 struct tcp_info *ti __unused) 188{ 189 190 return; 191} 192 193static int 194toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused, 195 struct ktls_session *tls __unused, int direction __unused) 196{ 197 198 return (EINVAL); 199} 200 201static void 202toedev_pmtu_update(struct toedev *tod __unused, struct tcpcb *tp __unused, 203 tcp_seq seq __unused, int mtu __unused) 204{ 205 206 return; 207} 208 209/* 210 * Inform one or more TOE devices about a listening socket. 211 */ 212static void 213toe_listen_start(struct inpcb *inp, void *arg) 214{ 215 struct toedev *t, *tod; 216 struct tcpcb *tp; 217 218 INP_WLOCK_ASSERT(inp); 219 KASSERT(inp->inp_pcbinfo == &V_tcbinfo, 220 ("%s: inp is not a TCP inp", __func__)); 221 222 if (inp->inp_flags & INP_DROPPED) 223 return; 224 225 tp = intotcpcb(inp); 226 if (tp->t_state != TCPS_LISTEN) 227 return; 228 229 t = arg; 230 mtx_lock(&toedev_lock); 231 TAILQ_FOREACH(tod, &toedev_list, link) { 232 if (t == NULL || t == tod) 233 tod->tod_listen_start(tod, tp); 234 } 235 mtx_unlock(&toedev_lock); 236} 237 238static void 239toe_listen_start_event(void *arg __unused, struct tcpcb *tp) 240{ 241 struct inpcb *inp = tptoinpcb(tp); 242 243 INP_WLOCK_ASSERT(inp); 244 KASSERT(tp->t_state == TCPS_LISTEN, 245 ("%s: t_state %s", __func__, tcpstates[tp->t_state])); 246 247 toe_listen_start(inp, NULL); 248} 249 250static void 251toe_listen_stop_event(void *arg __unused, struct tcpcb *tp) 252{ 253 struct toedev *tod; 254#ifdef INVARIANTS 255 struct inpcb *inp = tptoinpcb(tp); 256#endif 257 258 INP_WLOCK_ASSERT(inp); 259 KASSERT(tp->t_state == TCPS_LISTEN, 260 ("%s: t_state %s", __func__, tcpstates[tp->t_state])); 261 262 mtx_lock(&toedev_lock); 263 TAILQ_FOREACH(tod, &toedev_list, link) 264 tod->tod_listen_stop(tod, tp); 265 mtx_unlock(&toedev_lock); 266} 267 268/* 269 * Fill up a freshly allocated toedev struct with reasonable defaults. 270 */ 271void 272init_toedev(struct toedev *tod) 273{ 274 275 tod->tod_softc = NULL; 276 277 /* 278 * Provide no-op defaults so that the kernel can call any toedev 279 * function without having to check whether the TOE driver supplied one 280 * or not. 281 */ 282 tod->tod_connect = toedev_connect; 283 tod->tod_listen_start = toedev_listen_start; 284 tod->tod_listen_stop = toedev_listen_stop; 285 tod->tod_input = toedev_input; 286 tod->tod_rcvd = toedev_rcvd; 287 tod->tod_output = toedev_output; 288 tod->tod_send_rst = toedev_output; 289 tod->tod_send_fin = toedev_output; 290 tod->tod_pcb_detach = toedev_pcb_detach; 291 tod->tod_l2_update = toedev_l2_update; 292 tod->tod_route_redirect = toedev_route_redirect; 293 tod->tod_syncache_added = toedev_syncache_added; 294 tod->tod_syncache_removed = toedev_syncache_removed; 295 tod->tod_syncache_respond = toedev_syncache_respond; 296 tod->tod_offload_socket = toedev_offload_socket; 297 tod->tod_ctloutput = toedev_ctloutput; 298 tod->tod_tcp_info = toedev_tcp_info; 299 tod->tod_alloc_tls_session = toedev_alloc_tls_session; 300 tod->tod_pmtu_update = toedev_pmtu_update; 301} 302 303/* 304 * Register an active TOE device with the system. This allows it to receive 305 * notifications from the kernel. 306 */ 307int 308register_toedev(struct toedev *tod) 309{ 310 struct toedev *t; 311 312 mtx_lock(&toedev_lock); 313 TAILQ_FOREACH(t, &toedev_list, link) { 314 if (t == tod) { 315 mtx_unlock(&toedev_lock); 316 return (EEXIST); 317 } 318 } 319 320 TAILQ_INSERT_TAIL(&toedev_list, tod, link); 321 registered_toedevs++; 322 mtx_unlock(&toedev_lock); 323 324 inp_apply_all(&V_tcbinfo, toe_listen_start, tod); 325 326 return (0); 327} 328 329/* 330 * Remove the TOE device from the global list of active TOE devices. It is the 331 * caller's responsibility to ensure that the TOE device is quiesced prior to 332 * this call. 333 */ 334int 335unregister_toedev(struct toedev *tod) 336{ 337 struct toedev *t, *t2; 338 int rc = ENODEV; 339 340 mtx_lock(&toedev_lock); 341 TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) { 342 if (t == tod) { 343 TAILQ_REMOVE(&toedev_list, tod, link); 344 registered_toedevs--; 345 rc = 0; 346 break; 347 } 348 } 349 KASSERT(registered_toedevs >= 0, 350 ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs)); 351 mtx_unlock(&toedev_lock); 352 return (rc); 353} 354 355void 356toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, 357 struct inpcb *inp, void *tod, void *todctx, uint8_t iptos) 358{ 359 360 INP_RLOCK_ASSERT(inp); 361 362 (void )syncache_add(inc, to, th, inp, inp->inp_socket, NULL, tod, 363 todctx, iptos, htons(0)); 364} 365 366int 367toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to, 368 struct tcphdr *th, struct socket **lsop) 369{ 370 371 NET_EPOCH_ASSERT(); 372 373 return (syncache_expand(inc, to, th, lsop, NULL, htons(0))); 374} 375 376/* 377 * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP 378 * header (presumably for an incoming SYN) is also provided, an existing 4-tuple 379 * in TIME_WAIT may be assassinated freeing it up for re-use. 380 * 381 * Note that the TCP header must have been run through tcp_fields_to_host() or 382 * equivalent. 383 */ 384int 385toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp) 386{ 387 struct inpcb *inp; 388 struct tcpcb *tp; 389 390 if (inc->inc_flags & INC_ISIPV6) { 391 inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr, 392 inc->inc_fport, &inc->inc6_laddr, inc->inc_lport, 393 INPLOOKUP_RLOCKPCB, ifp); 394 } else { 395 inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport, 396 inc->inc_laddr, inc->inc_lport, INPLOOKUP_RLOCKPCB, ifp); 397 } 398 if (inp != NULL) { 399 INP_RLOCK_ASSERT(inp); 400 401 tp = intotcpcb(inp); 402 if (tp->t_state == TCPS_TIME_WAIT && th != NULL) { 403 if (!tcp_twcheck(inp, NULL, th, NULL, 0)) 404 return (EADDRINUSE); 405 } else { 406 INP_RUNLOCK(inp); 407 return (EADDRINUSE); 408 } 409 } 410 411 return (0); 412} 413 414static void 415toe_lle_event(void *arg __unused, struct llentry *lle, int evt) 416{ 417 struct toedev *tod; 418 struct ifnet *ifp; 419 struct sockaddr *sa; 420 uint8_t *lladdr; 421 uint16_t vid, pcp; 422 int family; 423 struct sockaddr_in6 sin6; 424 425 LLE_WLOCK_ASSERT(lle); 426 427 ifp = lltable_get_ifp(lle->lle_tbl); 428 family = lltable_get_af(lle->lle_tbl); 429 430 if (family != AF_INET && family != AF_INET6) 431 return; 432 /* 433 * Not interested if the interface's TOE capability is not enabled. 434 */ 435 if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) || 436 (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))) 437 return; 438 439 tod = TOEDEV(ifp); 440 if (tod == NULL) 441 return; 442 443 sa = (struct sockaddr *)&sin6; 444 lltable_fill_sa_entry(lle, sa); 445 446 vid = 0xfff; 447 pcp = 0; 448 if (evt != LLENTRY_RESOLVED) { 449 /* 450 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean 451 * this entry is going to be deleted. 452 */ 453 454 lladdr = NULL; 455 } else { 456 KASSERT(lle->la_flags & LLE_VALID, 457 ("%s: %p resolved but not valid?", __func__, lle)); 458 459 lladdr = (uint8_t *)lle->ll_addr; 460 VLAN_TAG(ifp, &vid); 461 VLAN_PCP(ifp, &pcp); 462 } 463 464 tod->tod_l2_update(tod, ifp, sa, lladdr, EVL_MAKETAG(vid, pcp, 0)); 465} 466 467/* 468 * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means 469 * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's 470 * tod_l2_update will be called later, when the entry is resolved or times out. 471 */ 472int 473toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, 474 uint8_t *lladdr, uint16_t *vtag) 475{ 476 int rc; 477 uint16_t vid, pcp; 478 479 switch (sa->sa_family) { 480#ifdef INET 481 case AF_INET: 482 rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL, NULL); 483 break; 484#endif 485#ifdef INET6 486 case AF_INET6: 487 rc = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), NULL, sa, lladdr, 488 NULL, NULL); 489 break; 490#endif 491 default: 492 return (EPROTONOSUPPORT); 493 } 494 495 if (rc == 0) { 496 vid = 0xfff; 497 pcp = 0; 498 if (ifp->if_type == IFT_L2VLAN) { 499 VLAN_TAG(ifp, &vid); 500 VLAN_PCP(ifp, &pcp); 501 } else if (ifp->if_pcp != IFNET_PCP_NONE) { 502 vid = 0; 503 pcp = ifp->if_pcp; 504 } 505 *vtag = EVL_MAKETAG(vid, pcp, 0); 506 } 507 508 return (rc); 509} 510 511void 512toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err) 513{ 514 515 NET_EPOCH_ASSERT(); 516 INP_WLOCK_ASSERT(inp); 517 518 if (!(inp->inp_flags & INP_DROPPED)) { 519 struct tcpcb *tp = intotcpcb(inp); 520 521 KASSERT(tp->t_flags & TF_TOE, 522 ("%s: tp %p not offloaded.", __func__, tp)); 523 524 if (err == EAGAIN) { 525 /* 526 * Temporary failure during offload, take this PCB back. 527 * Detach from the TOE driver and do the rest of what 528 * TCP's pru_connect would have done if the connection 529 * wasn't offloaded. 530 */ 531 532 tod->tod_pcb_detach(tod, tp); 533 KASSERT(!(tp->t_flags & TF_TOE), 534 ("%s: tp %p still offloaded.", __func__, tp)); 535 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); 536 if (tcp_output(tp) < 0) 537 INP_WLOCK(inp); /* re-acquire */ 538 } else { 539 tp = tcp_drop(tp, err); 540 if (tp == NULL) 541 INP_WLOCK(inp); /* re-acquire */ 542 } 543 } 544 INP_WLOCK_ASSERT(inp); 545} 546 547static int 548toecore_load(void) 549{ 550 551 mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF); 552 TAILQ_INIT(&toedev_list); 553 554 listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start, 555 toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY); 556 listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop, 557 toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY); 558 lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL, 559 EVENTHANDLER_PRI_ANY); 560 561 return (0); 562} 563 564static int 565toecore_unload(void) 566{ 567 568 mtx_lock(&toedev_lock); 569 if (!TAILQ_EMPTY(&toedev_list)) { 570 mtx_unlock(&toedev_lock); 571 return (EBUSY); 572 } 573 574 EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh); 575 EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh); 576 EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); 577 578 mtx_unlock(&toedev_lock); 579 mtx_destroy(&toedev_lock); 580 581 return (0); 582} 583 584static int 585toecore_mod_handler(module_t mod, int cmd, void *arg) 586{ 587 588 if (cmd == MOD_LOAD) 589 return (toecore_load()); 590 591 if (cmd == MOD_UNLOAD) 592 return (toecore_unload()); 593 594 return (EOPNOTSUPP); 595} 596 597static moduledata_t mod_data= { 598 "toecore", 599 toecore_mod_handler, 600 0 601}; 602 603MODULE_VERSION(toecore, 1); 604DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY); 605