if_pfsync.c revision 314667
1/*- 2 * Copyright (c) 2002 Michael Shalayeff 3 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 25 * THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28/*- 29 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 30 * 31 * Permission to use, copy, modify, and distribute this software for any 32 * purpose with or without fee is hereby granted, provided that the above 33 * copyright notice and this permission notice appear in all copies. 34 * 35 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 36 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 37 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 38 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 39 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 40 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 41 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 42 */ 43 44/* 45 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 46 * 47 * Revisions picked from OpenBSD after revision 1.110 import: 48 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 49 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 50 * 1.120, 1.175 - use monotonic time_uptime 51 * 1.122 - reduce number of updates for non-TCP sessions 52 * 1.125, 1.127 - rewrite merge or stale processing 53 * 1.128 - cleanups 54 * 1.146 - bzero() mbuf before sparsely filling it with data 55 * 1.170 - SIOCSIFMTU checks 56 * 1.126, 1.142 - deferred packets processing 57 * 1.173 - correct expire time processing 58 */ 59 60#include <sys/cdefs.h> 61__FBSDID("$FreeBSD: stable/10/sys/netpfil/pf/if_pfsync.c 314667 2017-03-04 13:03:31Z avg $"); 62 63#include "opt_inet.h" 64#include "opt_inet6.h" 65#include "opt_pf.h" 66 67#include <sys/param.h> 68#include <sys/bus.h> 69#include <sys/endian.h> 70#include <sys/interrupt.h> 71#include <sys/kernel.h> 72#include <sys/lock.h> 73#include <sys/mbuf.h> 74#include <sys/module.h> 75#include <sys/mutex.h> 76#include <sys/priv.h> 77#include <sys/protosw.h> 78#include <sys/socket.h> 79#include <sys/sockio.h> 80#include <sys/sysctl.h> 81 82#include <net/bpf.h> 83#include <net/if.h> 84#include <net/if_clone.h> 85#include <net/if_types.h> 86#include <net/pfvar.h> 87#include <net/if_pfsync.h> 88 89#include <netinet/if_ether.h> 90#include <netinet/in.h> 91#include <netinet/in_var.h> 92#include <netinet/ip.h> 93#include <netinet/ip_carp.h> 94#include <netinet/ip_var.h> 95#include <netinet/tcp.h> 96#include <netinet/tcp_fsm.h> 97#include <netinet/tcp_seq.h> 98 99#define PFSYNC_MINPKT ( \ 100 sizeof(struct ip) + \ 101 sizeof(struct pfsync_header) + \ 102 sizeof(struct pfsync_subheader) ) 103 104struct pfsync_pkt { 105 struct ip *ip; 106 struct in_addr src; 107 u_int8_t flags; 108}; 109 110static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 111 struct pfsync_state_peer *); 112static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); 113static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); 114static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); 115static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); 116static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); 117static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); 118static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); 119static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); 120static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); 121static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); 122static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); 123static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); 124 125static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { 126 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 127 pfsync_in_ins, /* PFSYNC_ACT_INS */ 128 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 129 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 130 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 131 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 132 pfsync_in_del, /* PFSYNC_ACT_DEL */ 133 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 134 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 135 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 136 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 137 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 138 pfsync_in_eof /* PFSYNC_ACT_EOF */ 139}; 140 141struct pfsync_q { 142 void (*write)(struct pf_state *, void *); 143 size_t len; 144 u_int8_t action; 145}; 146 147/* we have one of these for every PFSYNC_S_ */ 148static void pfsync_out_state(struct pf_state *, void *); 149static void pfsync_out_iack(struct pf_state *, void *); 150static void pfsync_out_upd_c(struct pf_state *, void *); 151static void pfsync_out_del(struct pf_state *, void *); 152 153static struct pfsync_q pfsync_qs[] = { 154 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 155 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 156 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 157 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 158 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 159}; 160 161static void pfsync_q_ins(struct pf_state *, int); 162static void pfsync_q_del(struct pf_state *); 163 164static void pfsync_update_state(struct pf_state *); 165 166struct pfsync_upd_req_item { 167 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 168 struct pfsync_upd_req ur_msg; 169}; 170 171struct pfsync_deferral { 172 struct pfsync_softc *pd_sc; 173 TAILQ_ENTRY(pfsync_deferral) pd_entry; 174 u_int pd_refs; 175 struct callout pd_tmo; 176 177 struct pf_state *pd_st; 178 struct mbuf *pd_m; 179}; 180 181struct pfsync_softc { 182 /* Configuration */ 183 struct ifnet *sc_ifp; 184 struct ifnet *sc_sync_if; 185 struct ip_moptions sc_imo; 186 struct in_addr sc_sync_peer; 187 uint32_t sc_flags; 188#define PFSYNCF_OK 0x00000001 189#define PFSYNCF_DEFER 0x00000002 190#define PFSYNCF_PUSH 0x00000004 191 uint8_t sc_maxupdates; 192 struct ip sc_template; 193 struct callout sc_tmo; 194 struct mtx sc_mtx; 195 196 /* Queued data */ 197 size_t sc_len; 198 TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; 199 TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; 200 TAILQ_HEAD(, pfsync_deferral) sc_deferrals; 201 u_int sc_deferred; 202 void *sc_plus; 203 size_t sc_pluslen; 204 205 /* Bulk update info */ 206 struct mtx sc_bulk_mtx; 207 uint32_t sc_ureq_sent; 208 int sc_bulk_tries; 209 uint32_t sc_ureq_received; 210 int sc_bulk_hashid; 211 uint64_t sc_bulk_stateid; 212 uint32_t sc_bulk_creatorid; 213 struct callout sc_bulk_tmo; 214 struct callout sc_bulkfail_tmo; 215}; 216 217#define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 218#define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 219#define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 220 221#define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 222#define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 223#define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 224 225static const char pfsyncname[] = "pfsync"; 226static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 227static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; 228#define V_pfsyncif VNET(pfsyncif) 229static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; 230#define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 231static VNET_DEFINE(struct pfsyncstats, pfsyncstats); 232#define V_pfsyncstats VNET(pfsyncstats) 233static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; 234#define V_pfsync_carp_adj VNET(pfsync_carp_adj) 235 236static void pfsync_timeout(void *); 237static void pfsync_push(struct pfsync_softc *); 238static void pfsyncintr(void *); 239static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 240 void *); 241static void pfsync_multicast_cleanup(struct pfsync_softc *); 242static void pfsync_pointers_init(void); 243static void pfsync_pointers_uninit(void); 244static int pfsync_init(void); 245static void pfsync_uninit(void); 246 247SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); 248SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, 249 &VNET_NAME(pfsyncstats), pfsyncstats, 250 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 251SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, 252 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 253 254static int pfsync_clone_create(struct if_clone *, int, caddr_t); 255static void pfsync_clone_destroy(struct ifnet *); 256static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 257 struct pf_state_peer *); 258static int pfsyncoutput(struct ifnet *, struct mbuf *, 259 const struct sockaddr *, struct route *); 260static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 261 262static int pfsync_defer(struct pf_state *, struct mbuf *); 263static void pfsync_undefer(struct pfsync_deferral *, int); 264static void pfsync_undefer_state(struct pf_state *, int); 265static void pfsync_defer_tmo(void *); 266 267static void pfsync_request_update(u_int32_t, u_int64_t); 268static void pfsync_update_state_req(struct pf_state *); 269 270static void pfsync_drop(struct pfsync_softc *); 271static void pfsync_sendout(int); 272static void pfsync_send_plus(void *, size_t); 273 274static void pfsync_bulk_start(void); 275static void pfsync_bulk_status(u_int8_t); 276static void pfsync_bulk_update(void *); 277static void pfsync_bulk_fail(void *); 278 279#ifdef IPSEC 280static void pfsync_update_net_tdb(struct pfsync_tdb *); 281#endif 282 283#define PFSYNC_MAX_BULKTRIES 12 284 285VNET_DEFINE(struct if_clone *, pfsync_cloner); 286#define V_pfsync_cloner VNET(pfsync_cloner) 287 288static int 289pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 290{ 291 struct pfsync_softc *sc; 292 struct ifnet *ifp; 293 int q; 294 295 if (unit != 0) 296 return (EINVAL); 297 298 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 299 sc->sc_flags |= PFSYNCF_OK; 300 301 for (q = 0; q < PFSYNC_S_COUNT; q++) 302 TAILQ_INIT(&sc->sc_qs[q]); 303 304 TAILQ_INIT(&sc->sc_upd_req_list); 305 TAILQ_INIT(&sc->sc_deferrals); 306 307 sc->sc_len = PFSYNC_MINPKT; 308 sc->sc_maxupdates = 128; 309 310 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 311 if (ifp == NULL) { 312 free(sc, M_PFSYNC); 313 return (ENOSPC); 314 } 315 if_initname(ifp, pfsyncname, unit); 316 ifp->if_softc = sc; 317 ifp->if_ioctl = pfsyncioctl; 318 ifp->if_output = pfsyncoutput; 319 ifp->if_type = IFT_PFSYNC; 320 ifp->if_snd.ifq_maxlen = ifqmaxlen; 321 ifp->if_hdrlen = sizeof(struct pfsync_header); 322 ifp->if_mtu = ETHERMTU; 323 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 324 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 325 callout_init(&sc->sc_tmo, 1); 326 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 327 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 328 329 if_attach(ifp); 330 331 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 332 333 V_pfsyncif = sc; 334 335 return (0); 336} 337 338static void 339pfsync_clone_destroy(struct ifnet *ifp) 340{ 341 struct pfsync_softc *sc = ifp->if_softc; 342 343 /* 344 * At this stage, everything should have already been 345 * cleared by pfsync_uninit(), and we have only to 346 * drain callouts. 347 */ 348 while (sc->sc_deferred > 0) { 349 struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); 350 351 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 352 sc->sc_deferred--; 353 if (callout_stop(&pd->pd_tmo)) { 354 pf_release_state(pd->pd_st); 355 m_freem(pd->pd_m); 356 free(pd, M_PFSYNC); 357 } else { 358 pd->pd_refs++; 359 callout_drain(&pd->pd_tmo); 360 free(pd, M_PFSYNC); 361 } 362 } 363 364 callout_drain(&sc->sc_tmo); 365 callout_drain(&sc->sc_bulkfail_tmo); 366 callout_drain(&sc->sc_bulk_tmo); 367 368 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 369 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 370 bpfdetach(ifp); 371 if_detach(ifp); 372 373 pfsync_drop(sc); 374 375 if_free(ifp); 376 if (sc->sc_imo.imo_membership) 377 pfsync_multicast_cleanup(sc); 378 mtx_destroy(&sc->sc_mtx); 379 mtx_destroy(&sc->sc_bulk_mtx); 380 free(sc, M_PFSYNC); 381 382 V_pfsyncif = NULL; 383} 384 385static int 386pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 387 struct pf_state_peer *d) 388{ 389 if (s->scrub.scrub_flag && d->scrub == NULL) { 390 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 391 if (d->scrub == NULL) 392 return (ENOMEM); 393 } 394 395 return (0); 396} 397 398 399static int 400pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 401{ 402 struct pfsync_softc *sc = V_pfsyncif; 403#ifndef __NO_STRICT_ALIGNMENT 404 struct pfsync_state_key key[2]; 405#endif 406 struct pfsync_state_key *kw, *ks; 407 struct pf_state *st = NULL; 408 struct pf_state_key *skw = NULL, *sks = NULL; 409 struct pf_rule *r = NULL; 410 struct pfi_kif *kif; 411 int error; 412 413 PF_RULES_RASSERT(); 414 415 if (sp->creatorid == 0) { 416 if (V_pf_status.debug >= PF_DEBUG_MISC) 417 printf("%s: invalid creator id: %08x\n", __func__, 418 ntohl(sp->creatorid)); 419 return (EINVAL); 420 } 421 422 if ((kif = pfi_kif_find(sp->ifname)) == NULL) { 423 if (V_pf_status.debug >= PF_DEBUG_MISC) 424 printf("%s: unknown interface: %s\n", __func__, 425 sp->ifname); 426 if (flags & PFSYNC_SI_IOCTL) 427 return (EINVAL); 428 return (0); /* skip this state */ 429 } 430 431 /* 432 * If the ruleset checksums match or the state is coming from the ioctl, 433 * it's safe to associate the state with the rule of that number. 434 */ 435 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 436 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 437 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 438 r = pf_main_ruleset.rules[ 439 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 440 else 441 r = &V_pf_default_rule; 442 443 if ((r->max_states && 444 counter_u64_fetch(r->states_cur) >= r->max_states)) 445 goto cleanup; 446 447 /* 448 * XXXGL: consider M_WAITOK in ioctl path after. 449 */ 450 if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) 451 goto cleanup; 452 453 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 454 goto cleanup; 455 456#ifndef __NO_STRICT_ALIGNMENT 457 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 458 kw = &key[PF_SK_WIRE]; 459 ks = &key[PF_SK_STACK]; 460#else 461 kw = &sp->key[PF_SK_WIRE]; 462 ks = &sp->key[PF_SK_STACK]; 463#endif 464 465 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 466 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 467 kw->port[0] != ks->port[0] || 468 kw->port[1] != ks->port[1]) { 469 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 470 if (sks == NULL) 471 goto cleanup; 472 } else 473 sks = skw; 474 475 /* allocate memory for scrub info */ 476 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 477 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 478 goto cleanup; 479 480 /* Copy to state key(s). */ 481 skw->addr[0] = kw->addr[0]; 482 skw->addr[1] = kw->addr[1]; 483 skw->port[0] = kw->port[0]; 484 skw->port[1] = kw->port[1]; 485 skw->proto = sp->proto; 486 skw->af = sp->af; 487 if (sks != skw) { 488 sks->addr[0] = ks->addr[0]; 489 sks->addr[1] = ks->addr[1]; 490 sks->port[0] = ks->port[0]; 491 sks->port[1] = ks->port[1]; 492 sks->proto = sp->proto; 493 sks->af = sp->af; 494 } 495 496 /* copy to state */ 497 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 498 st->creation = time_uptime - ntohl(sp->creation); 499 st->expire = time_uptime; 500 if (sp->expire) { 501 uint32_t timeout; 502 503 timeout = r->timeout[sp->timeout]; 504 if (!timeout) 505 timeout = V_pf_default_rule.timeout[sp->timeout]; 506 507 /* sp->expire may have been adaptively scaled by export. */ 508 st->expire -= timeout - ntohl(sp->expire); 509 } 510 511 st->direction = sp->direction; 512 st->log = sp->log; 513 st->timeout = sp->timeout; 514 st->state_flags = sp->state_flags; 515 516 st->id = sp->id; 517 st->creatorid = sp->creatorid; 518 pf_state_peer_ntoh(&sp->src, &st->src); 519 pf_state_peer_ntoh(&sp->dst, &st->dst); 520 521 st->rule.ptr = r; 522 st->nat_rule.ptr = NULL; 523 st->anchor.ptr = NULL; 524 st->rt_kif = NULL; 525 526 st->pfsync_time = time_uptime; 527 st->sync_state = PFSYNC_S_NONE; 528 529 if (!(flags & PFSYNC_SI_IOCTL)) 530 st->state_flags |= PFSTATE_NOSYNC; 531 532 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) 533 goto cleanup_state; 534 535 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 536 counter_u64_add(r->states_cur, 1); 537 counter_u64_add(r->states_tot, 1); 538 539 if (!(flags & PFSYNC_SI_IOCTL)) { 540 st->state_flags &= ~PFSTATE_NOSYNC; 541 if (st->state_flags & PFSTATE_ACK) { 542 pfsync_q_ins(st, PFSYNC_S_IACK); 543 pfsync_push(sc); 544 } 545 } 546 st->state_flags &= ~PFSTATE_ACK; 547 PF_STATE_UNLOCK(st); 548 549 return (0); 550 551cleanup: 552 error = ENOMEM; 553 if (skw == sks) 554 sks = NULL; 555 if (skw != NULL) 556 uma_zfree(V_pf_state_key_z, skw); 557 if (sks != NULL) 558 uma_zfree(V_pf_state_key_z, sks); 559 560cleanup_state: /* pf_state_insert() frees the state keys. */ 561 if (st) { 562 if (st->dst.scrub) 563 uma_zfree(V_pf_state_scrub_z, st->dst.scrub); 564 if (st->src.scrub) 565 uma_zfree(V_pf_state_scrub_z, st->src.scrub); 566 uma_zfree(V_pf_state_z, st); 567 } 568 return (error); 569} 570 571static void 572pfsync_input(struct mbuf *m, __unused int off) 573{ 574 struct pfsync_softc *sc = V_pfsyncif; 575 struct pfsync_pkt pkt; 576 struct ip *ip = mtod(m, struct ip *); 577 struct pfsync_header *ph; 578 struct pfsync_subheader subh; 579 580 int offset, len; 581 int rv; 582 uint16_t count; 583 584 V_pfsyncstats.pfsyncs_ipackets++; 585 586 /* Verify that we have a sync interface configured. */ 587 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 588 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 589 goto done; 590 591 /* verify that the packet came in on the right interface */ 592 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 593 V_pfsyncstats.pfsyncs_badif++; 594 goto done; 595 } 596 597 sc->sc_ifp->if_ipackets++; 598 sc->sc_ifp->if_ibytes += m->m_pkthdr.len; 599 /* verify that the IP TTL is 255. */ 600 if (ip->ip_ttl != PFSYNC_DFLTTL) { 601 V_pfsyncstats.pfsyncs_badttl++; 602 goto done; 603 } 604 605 offset = ip->ip_hl << 2; 606 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 607 V_pfsyncstats.pfsyncs_hdrops++; 608 goto done; 609 } 610 611 if (offset + sizeof(*ph) > m->m_len) { 612 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 613 V_pfsyncstats.pfsyncs_hdrops++; 614 return; 615 } 616 ip = mtod(m, struct ip *); 617 } 618 ph = (struct pfsync_header *)((char *)ip + offset); 619 620 /* verify the version */ 621 if (ph->version != PFSYNC_VERSION) { 622 V_pfsyncstats.pfsyncs_badver++; 623 goto done; 624 } 625 626 len = ntohs(ph->len) + offset; 627 if (m->m_pkthdr.len < len) { 628 V_pfsyncstats.pfsyncs_badlen++; 629 goto done; 630 } 631 632 /* Cheaper to grab this now than having to mess with mbufs later */ 633 pkt.ip = ip; 634 pkt.src = ip->ip_src; 635 pkt.flags = 0; 636 637 /* 638 * Trusting pf_chksum during packet processing, as well as seeking 639 * in interface name tree, require holding PF_RULES_RLOCK(). 640 */ 641 PF_RULES_RLOCK(); 642 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 643 pkt.flags |= PFSYNC_SI_CKSUM; 644 645 offset += sizeof(*ph); 646 while (offset <= len - sizeof(subh)) { 647 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 648 offset += sizeof(subh); 649 650 if (subh.action >= PFSYNC_ACT_MAX) { 651 V_pfsyncstats.pfsyncs_badact++; 652 PF_RULES_RUNLOCK(); 653 goto done; 654 } 655 656 count = ntohs(subh.count); 657 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 658 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); 659 if (rv == -1) { 660 PF_RULES_RUNLOCK(); 661 return; 662 } 663 664 offset += rv; 665 } 666 PF_RULES_RUNLOCK(); 667 668done: 669 m_freem(m); 670} 671 672static int 673pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 674{ 675 struct pfsync_clr *clr; 676 struct mbuf *mp; 677 int len = sizeof(*clr) * count; 678 int i, offp; 679 u_int32_t creatorid; 680 681 mp = m_pulldown(m, offset, len, &offp); 682 if (mp == NULL) { 683 V_pfsyncstats.pfsyncs_badlen++; 684 return (-1); 685 } 686 clr = (struct pfsync_clr *)(mp->m_data + offp); 687 688 for (i = 0; i < count; i++) { 689 creatorid = clr[i].creatorid; 690 691 if (clr[i].ifname[0] != '\0' && 692 pfi_kif_find(clr[i].ifname) == NULL) 693 continue; 694 695 for (int i = 0; i <= pf_hashmask; i++) { 696 struct pf_idhash *ih = &V_pf_idhash[i]; 697 struct pf_state *s; 698relock: 699 PF_HASHROW_LOCK(ih); 700 LIST_FOREACH(s, &ih->states, entry) { 701 if (s->creatorid == creatorid) { 702 s->state_flags |= PFSTATE_NOSYNC; 703 pf_unlink_state(s, PF_ENTER_LOCKED); 704 goto relock; 705 } 706 } 707 PF_HASHROW_UNLOCK(ih); 708 } 709 } 710 711 return (len); 712} 713 714static int 715pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 716{ 717 struct mbuf *mp; 718 struct pfsync_state *sa, *sp; 719 int len = sizeof(*sp) * count; 720 int i, offp; 721 722 mp = m_pulldown(m, offset, len, &offp); 723 if (mp == NULL) { 724 V_pfsyncstats.pfsyncs_badlen++; 725 return (-1); 726 } 727 sa = (struct pfsync_state *)(mp->m_data + offp); 728 729 for (i = 0; i < count; i++) { 730 sp = &sa[i]; 731 732 /* Check for invalid values. */ 733 if (sp->timeout >= PFTM_MAX || 734 sp->src.state > PF_TCPS_PROXY_DST || 735 sp->dst.state > PF_TCPS_PROXY_DST || 736 sp->direction > PF_OUT || 737 (sp->af != AF_INET && sp->af != AF_INET6)) { 738 if (V_pf_status.debug >= PF_DEBUG_MISC) 739 printf("%s: invalid value\n", __func__); 740 V_pfsyncstats.pfsyncs_badval++; 741 continue; 742 } 743 744 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) 745 /* Drop out, but process the rest of the actions. */ 746 break; 747 } 748 749 return (len); 750} 751 752static int 753pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 754{ 755 struct pfsync_ins_ack *ia, *iaa; 756 struct pf_state *st; 757 758 struct mbuf *mp; 759 int len = count * sizeof(*ia); 760 int offp, i; 761 762 mp = m_pulldown(m, offset, len, &offp); 763 if (mp == NULL) { 764 V_pfsyncstats.pfsyncs_badlen++; 765 return (-1); 766 } 767 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 768 769 for (i = 0; i < count; i++) { 770 ia = &iaa[i]; 771 772 st = pf_find_state_byid(ia->id, ia->creatorid); 773 if (st == NULL) 774 continue; 775 776 if (st->state_flags & PFSTATE_ACK) { 777 PFSYNC_LOCK(V_pfsyncif); 778 pfsync_undefer_state(st, 0); 779 PFSYNC_UNLOCK(V_pfsyncif); 780 } 781 PF_STATE_UNLOCK(st); 782 } 783 /* 784 * XXX this is not yet implemented, but we know the size of the 785 * message so we can skip it. 786 */ 787 788 return (count * sizeof(struct pfsync_ins_ack)); 789} 790 791static int 792pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 793 struct pfsync_state_peer *dst) 794{ 795 int sync = 0; 796 797 PF_STATE_LOCK_ASSERT(st); 798 799 /* 800 * The state should never go backwards except 801 * for syn-proxy states. Neither should the 802 * sequence window slide backwards. 803 */ 804 if ((st->src.state > src->state && 805 (st->src.state < PF_TCPS_PROXY_SRC || 806 src->state >= PF_TCPS_PROXY_SRC)) || 807 808 (st->src.state == src->state && 809 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 810 sync++; 811 else 812 pf_state_peer_ntoh(src, &st->src); 813 814 if ((st->dst.state > dst->state) || 815 816 (st->dst.state >= TCPS_SYN_SENT && 817 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 818 sync++; 819 else 820 pf_state_peer_ntoh(dst, &st->dst); 821 822 return (sync); 823} 824 825static int 826pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 827{ 828 struct pfsync_softc *sc = V_pfsyncif; 829 struct pfsync_state *sa, *sp; 830 struct pf_state *st; 831 int sync; 832 833 struct mbuf *mp; 834 int len = count * sizeof(*sp); 835 int offp, i; 836 837 mp = m_pulldown(m, offset, len, &offp); 838 if (mp == NULL) { 839 V_pfsyncstats.pfsyncs_badlen++; 840 return (-1); 841 } 842 sa = (struct pfsync_state *)(mp->m_data + offp); 843 844 for (i = 0; i < count; i++) { 845 sp = &sa[i]; 846 847 /* check for invalid values */ 848 if (sp->timeout >= PFTM_MAX || 849 sp->src.state > PF_TCPS_PROXY_DST || 850 sp->dst.state > PF_TCPS_PROXY_DST) { 851 if (V_pf_status.debug >= PF_DEBUG_MISC) { 852 printf("pfsync_input: PFSYNC_ACT_UPD: " 853 "invalid value\n"); 854 } 855 V_pfsyncstats.pfsyncs_badval++; 856 continue; 857 } 858 859 st = pf_find_state_byid(sp->id, sp->creatorid); 860 if (st == NULL) { 861 /* insert the update */ 862 if (pfsync_state_import(sp, 0)) 863 V_pfsyncstats.pfsyncs_badstate++; 864 continue; 865 } 866 867 if (st->state_flags & PFSTATE_ACK) { 868 PFSYNC_LOCK(sc); 869 pfsync_undefer_state(st, 1); 870 PFSYNC_UNLOCK(sc); 871 } 872 873 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 874 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 875 else { 876 sync = 0; 877 878 /* 879 * Non-TCP protocol state machine always go 880 * forwards 881 */ 882 if (st->src.state > sp->src.state) 883 sync++; 884 else 885 pf_state_peer_ntoh(&sp->src, &st->src); 886 if (st->dst.state > sp->dst.state) 887 sync++; 888 else 889 pf_state_peer_ntoh(&sp->dst, &st->dst); 890 } 891 if (sync < 2) { 892 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 893 pf_state_peer_ntoh(&sp->dst, &st->dst); 894 st->expire = time_uptime; 895 st->timeout = sp->timeout; 896 } 897 st->pfsync_time = time_uptime; 898 899 if (sync) { 900 V_pfsyncstats.pfsyncs_stale++; 901 902 pfsync_update_state(st); 903 PF_STATE_UNLOCK(st); 904 PFSYNC_LOCK(sc); 905 pfsync_push(sc); 906 PFSYNC_UNLOCK(sc); 907 continue; 908 } 909 PF_STATE_UNLOCK(st); 910 } 911 912 return (len); 913} 914 915static int 916pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 917{ 918 struct pfsync_softc *sc = V_pfsyncif; 919 struct pfsync_upd_c *ua, *up; 920 struct pf_state *st; 921 int len = count * sizeof(*up); 922 int sync; 923 struct mbuf *mp; 924 int offp, i; 925 926 mp = m_pulldown(m, offset, len, &offp); 927 if (mp == NULL) { 928 V_pfsyncstats.pfsyncs_badlen++; 929 return (-1); 930 } 931 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 932 933 for (i = 0; i < count; i++) { 934 up = &ua[i]; 935 936 /* check for invalid values */ 937 if (up->timeout >= PFTM_MAX || 938 up->src.state > PF_TCPS_PROXY_DST || 939 up->dst.state > PF_TCPS_PROXY_DST) { 940 if (V_pf_status.debug >= PF_DEBUG_MISC) { 941 printf("pfsync_input: " 942 "PFSYNC_ACT_UPD_C: " 943 "invalid value\n"); 944 } 945 V_pfsyncstats.pfsyncs_badval++; 946 continue; 947 } 948 949 st = pf_find_state_byid(up->id, up->creatorid); 950 if (st == NULL) { 951 /* We don't have this state. Ask for it. */ 952 PFSYNC_LOCK(sc); 953 pfsync_request_update(up->creatorid, up->id); 954 PFSYNC_UNLOCK(sc); 955 continue; 956 } 957 958 if (st->state_flags & PFSTATE_ACK) { 959 PFSYNC_LOCK(sc); 960 pfsync_undefer_state(st, 1); 961 PFSYNC_UNLOCK(sc); 962 } 963 964 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 965 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 966 else { 967 sync = 0; 968 969 /* 970 * Non-TCP protocol state machine always go 971 * forwards 972 */ 973 if (st->src.state > up->src.state) 974 sync++; 975 else 976 pf_state_peer_ntoh(&up->src, &st->src); 977 if (st->dst.state > up->dst.state) 978 sync++; 979 else 980 pf_state_peer_ntoh(&up->dst, &st->dst); 981 } 982 if (sync < 2) { 983 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 984 pf_state_peer_ntoh(&up->dst, &st->dst); 985 st->expire = time_uptime; 986 st->timeout = up->timeout; 987 } 988 st->pfsync_time = time_uptime; 989 990 if (sync) { 991 V_pfsyncstats.pfsyncs_stale++; 992 993 pfsync_update_state(st); 994 PF_STATE_UNLOCK(st); 995 PFSYNC_LOCK(sc); 996 pfsync_push(sc); 997 PFSYNC_UNLOCK(sc); 998 continue; 999 } 1000 PF_STATE_UNLOCK(st); 1001 } 1002 1003 return (len); 1004} 1005 1006static int 1007pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1008{ 1009 struct pfsync_upd_req *ur, *ura; 1010 struct mbuf *mp; 1011 int len = count * sizeof(*ur); 1012 int i, offp; 1013 1014 struct pf_state *st; 1015 1016 mp = m_pulldown(m, offset, len, &offp); 1017 if (mp == NULL) { 1018 V_pfsyncstats.pfsyncs_badlen++; 1019 return (-1); 1020 } 1021 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1022 1023 for (i = 0; i < count; i++) { 1024 ur = &ura[i]; 1025 1026 if (ur->id == 0 && ur->creatorid == 0) 1027 pfsync_bulk_start(); 1028 else { 1029 st = pf_find_state_byid(ur->id, ur->creatorid); 1030 if (st == NULL) { 1031 V_pfsyncstats.pfsyncs_badstate++; 1032 continue; 1033 } 1034 if (st->state_flags & PFSTATE_NOSYNC) { 1035 PF_STATE_UNLOCK(st); 1036 continue; 1037 } 1038 1039 pfsync_update_state_req(st); 1040 PF_STATE_UNLOCK(st); 1041 } 1042 } 1043 1044 return (len); 1045} 1046 1047static int 1048pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1049{ 1050 struct mbuf *mp; 1051 struct pfsync_state *sa, *sp; 1052 struct pf_state *st; 1053 int len = count * sizeof(*sp); 1054 int offp, i; 1055 1056 mp = m_pulldown(m, offset, len, &offp); 1057 if (mp == NULL) { 1058 V_pfsyncstats.pfsyncs_badlen++; 1059 return (-1); 1060 } 1061 sa = (struct pfsync_state *)(mp->m_data + offp); 1062 1063 for (i = 0; i < count; i++) { 1064 sp = &sa[i]; 1065 1066 st = pf_find_state_byid(sp->id, sp->creatorid); 1067 if (st == NULL) { 1068 V_pfsyncstats.pfsyncs_badstate++; 1069 continue; 1070 } 1071 st->state_flags |= PFSTATE_NOSYNC; 1072 pf_unlink_state(st, PF_ENTER_LOCKED); 1073 } 1074 1075 return (len); 1076} 1077 1078static int 1079pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1080{ 1081 struct mbuf *mp; 1082 struct pfsync_del_c *sa, *sp; 1083 struct pf_state *st; 1084 int len = count * sizeof(*sp); 1085 int offp, i; 1086 1087 mp = m_pulldown(m, offset, len, &offp); 1088 if (mp == NULL) { 1089 V_pfsyncstats.pfsyncs_badlen++; 1090 return (-1); 1091 } 1092 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1093 1094 for (i = 0; i < count; i++) { 1095 sp = &sa[i]; 1096 1097 st = pf_find_state_byid(sp->id, sp->creatorid); 1098 if (st == NULL) { 1099 V_pfsyncstats.pfsyncs_badstate++; 1100 continue; 1101 } 1102 1103 st->state_flags |= PFSTATE_NOSYNC; 1104 pf_unlink_state(st, PF_ENTER_LOCKED); 1105 } 1106 1107 return (len); 1108} 1109 1110static int 1111pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1112{ 1113 struct pfsync_softc *sc = V_pfsyncif; 1114 struct pfsync_bus *bus; 1115 struct mbuf *mp; 1116 int len = count * sizeof(*bus); 1117 int offp; 1118 1119 PFSYNC_BLOCK(sc); 1120 1121 /* If we're not waiting for a bulk update, who cares. */ 1122 if (sc->sc_ureq_sent == 0) { 1123 PFSYNC_BUNLOCK(sc); 1124 return (len); 1125 } 1126 1127 mp = m_pulldown(m, offset, len, &offp); 1128 if (mp == NULL) { 1129 PFSYNC_BUNLOCK(sc); 1130 V_pfsyncstats.pfsyncs_badlen++; 1131 return (-1); 1132 } 1133 bus = (struct pfsync_bus *)(mp->m_data + offp); 1134 1135 switch (bus->status) { 1136 case PFSYNC_BUS_START: 1137 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1138 V_pf_limits[PF_LIMIT_STATES].limit / 1139 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1140 sizeof(struct pfsync_state)), 1141 pfsync_bulk_fail, sc); 1142 if (V_pf_status.debug >= PF_DEBUG_MISC) 1143 printf("pfsync: received bulk update start\n"); 1144 break; 1145 1146 case PFSYNC_BUS_END: 1147 if (time_uptime - ntohl(bus->endtime) >= 1148 sc->sc_ureq_sent) { 1149 /* that's it, we're happy */ 1150 sc->sc_ureq_sent = 0; 1151 sc->sc_bulk_tries = 0; 1152 callout_stop(&sc->sc_bulkfail_tmo); 1153 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1154 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1155 "pfsync bulk done"); 1156 sc->sc_flags |= PFSYNCF_OK; 1157 if (V_pf_status.debug >= PF_DEBUG_MISC) 1158 printf("pfsync: received valid " 1159 "bulk update end\n"); 1160 } else { 1161 if (V_pf_status.debug >= PF_DEBUG_MISC) 1162 printf("pfsync: received invalid " 1163 "bulk update end: bad timestamp\n"); 1164 } 1165 break; 1166 } 1167 PFSYNC_BUNLOCK(sc); 1168 1169 return (len); 1170} 1171 1172static int 1173pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1174{ 1175 int len = count * sizeof(struct pfsync_tdb); 1176 1177#if defined(IPSEC) 1178 struct pfsync_tdb *tp; 1179 struct mbuf *mp; 1180 int offp; 1181 int i; 1182 int s; 1183 1184 mp = m_pulldown(m, offset, len, &offp); 1185 if (mp == NULL) { 1186 V_pfsyncstats.pfsyncs_badlen++; 1187 return (-1); 1188 } 1189 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1190 1191 for (i = 0; i < count; i++) 1192 pfsync_update_net_tdb(&tp[i]); 1193#endif 1194 1195 return (len); 1196} 1197 1198#if defined(IPSEC) 1199/* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1200static void 1201pfsync_update_net_tdb(struct pfsync_tdb *pt) 1202{ 1203 struct tdb *tdb; 1204 int s; 1205 1206 /* check for invalid values */ 1207 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1208 (pt->dst.sa.sa_family != AF_INET && 1209 pt->dst.sa.sa_family != AF_INET6)) 1210 goto bad; 1211 1212 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1213 if (tdb) { 1214 pt->rpl = ntohl(pt->rpl); 1215 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1216 1217 /* Neither replay nor byte counter should ever decrease. */ 1218 if (pt->rpl < tdb->tdb_rpl || 1219 pt->cur_bytes < tdb->tdb_cur_bytes) { 1220 goto bad; 1221 } 1222 1223 tdb->tdb_rpl = pt->rpl; 1224 tdb->tdb_cur_bytes = pt->cur_bytes; 1225 } 1226 return; 1227 1228bad: 1229 if (V_pf_status.debug >= PF_DEBUG_MISC) 1230 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1231 "invalid value\n"); 1232 V_pfsyncstats.pfsyncs_badstate++; 1233 return; 1234} 1235#endif 1236 1237 1238static int 1239pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1240{ 1241 /* check if we are at the right place in the packet */ 1242 if (offset != m->m_pkthdr.len) 1243 V_pfsyncstats.pfsyncs_badlen++; 1244 1245 /* we're done. free and let the caller return */ 1246 m_freem(m); 1247 return (-1); 1248} 1249 1250static int 1251pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1252{ 1253 V_pfsyncstats.pfsyncs_badact++; 1254 1255 m_freem(m); 1256 return (-1); 1257} 1258 1259static int 1260pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1261 struct route *rt) 1262{ 1263 m_freem(m); 1264 return (0); 1265} 1266 1267/* ARGSUSED */ 1268static int 1269pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1270{ 1271 struct pfsync_softc *sc = ifp->if_softc; 1272 struct ifreq *ifr = (struct ifreq *)data; 1273 struct pfsyncreq pfsyncr; 1274 int error; 1275 1276 switch (cmd) { 1277 case SIOCSIFFLAGS: 1278 PFSYNC_LOCK(sc); 1279 if (ifp->if_flags & IFF_UP) { 1280 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1281 PFSYNC_UNLOCK(sc); 1282 pfsync_pointers_init(); 1283 } else { 1284 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1285 PFSYNC_UNLOCK(sc); 1286 pfsync_pointers_uninit(); 1287 } 1288 break; 1289 case SIOCSIFMTU: 1290 if (!sc->sc_sync_if || 1291 ifr->ifr_mtu <= PFSYNC_MINPKT || 1292 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1293 return (EINVAL); 1294 if (ifr->ifr_mtu < ifp->if_mtu) { 1295 PFSYNC_LOCK(sc); 1296 if (sc->sc_len > PFSYNC_MINPKT) 1297 pfsync_sendout(1); 1298 PFSYNC_UNLOCK(sc); 1299 } 1300 ifp->if_mtu = ifr->ifr_mtu; 1301 break; 1302 case SIOCGETPFSYNC: 1303 bzero(&pfsyncr, sizeof(pfsyncr)); 1304 PFSYNC_LOCK(sc); 1305 if (sc->sc_sync_if) { 1306 strlcpy(pfsyncr.pfsyncr_syncdev, 1307 sc->sc_sync_if->if_xname, IFNAMSIZ); 1308 } 1309 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1310 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1311 pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == 1312 (sc->sc_flags & PFSYNCF_DEFER)); 1313 PFSYNC_UNLOCK(sc); 1314 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1315 1316 case SIOCSETPFSYNC: 1317 { 1318 struct ip_moptions *imo = &sc->sc_imo; 1319 struct ifnet *sifp; 1320 struct ip *ip; 1321 void *mship = NULL; 1322 1323 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1324 return (error); 1325 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1326 return (error); 1327 1328 if (pfsyncr.pfsyncr_maxupdates > 255) 1329 return (EINVAL); 1330 1331 if (pfsyncr.pfsyncr_syncdev[0] == 0) 1332 sifp = NULL; 1333 else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) 1334 return (EINVAL); 1335 1336 if (sifp != NULL && ( 1337 pfsyncr.pfsyncr_syncpeer.s_addr == 0 || 1338 pfsyncr.pfsyncr_syncpeer.s_addr == 1339 htonl(INADDR_PFSYNC_GROUP))) 1340 mship = malloc((sizeof(struct in_multi *) * 1341 IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); 1342 1343 PFSYNC_LOCK(sc); 1344 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1345 sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); 1346 else 1347 sc->sc_sync_peer.s_addr = 1348 pfsyncr.pfsyncr_syncpeer.s_addr; 1349 1350 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1351 if (pfsyncr.pfsyncr_defer) { 1352 sc->sc_flags |= PFSYNCF_DEFER; 1353 pfsync_defer_ptr = pfsync_defer; 1354 } else { 1355 sc->sc_flags &= ~PFSYNCF_DEFER; 1356 pfsync_defer_ptr = NULL; 1357 } 1358 1359 if (sifp == NULL) { 1360 if (sc->sc_sync_if) 1361 if_rele(sc->sc_sync_if); 1362 sc->sc_sync_if = NULL; 1363 if (imo->imo_membership) 1364 pfsync_multicast_cleanup(sc); 1365 PFSYNC_UNLOCK(sc); 1366 break; 1367 } 1368 1369 if (sc->sc_len > PFSYNC_MINPKT && 1370 (sifp->if_mtu < sc->sc_ifp->if_mtu || 1371 (sc->sc_sync_if != NULL && 1372 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1373 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 1374 pfsync_sendout(1); 1375 1376 if (imo->imo_membership) 1377 pfsync_multicast_cleanup(sc); 1378 1379 if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 1380 error = pfsync_multicast_setup(sc, sifp, mship); 1381 if (error) { 1382 if_rele(sifp); 1383 free(mship, M_PFSYNC); 1384 return (error); 1385 } 1386 } 1387 if (sc->sc_sync_if) 1388 if_rele(sc->sc_sync_if); 1389 sc->sc_sync_if = sifp; 1390 1391 ip = &sc->sc_template; 1392 bzero(ip, sizeof(*ip)); 1393 ip->ip_v = IPVERSION; 1394 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1395 ip->ip_tos = IPTOS_LOWDELAY; 1396 /* len and id are set later. */ 1397 ip->ip_off = htons(IP_DF); 1398 ip->ip_ttl = PFSYNC_DFLTTL; 1399 ip->ip_p = IPPROTO_PFSYNC; 1400 ip->ip_src.s_addr = INADDR_ANY; 1401 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1402 1403 /* Request a full state table update. */ 1404 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1405 (*carp_demote_adj_p)(V_pfsync_carp_adj, 1406 "pfsync bulk start"); 1407 sc->sc_flags &= ~PFSYNCF_OK; 1408 if (V_pf_status.debug >= PF_DEBUG_MISC) 1409 printf("pfsync: requesting bulk update\n"); 1410 pfsync_request_update(0, 0); 1411 PFSYNC_UNLOCK(sc); 1412 PFSYNC_BLOCK(sc); 1413 sc->sc_ureq_sent = time_uptime; 1414 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, 1415 sc); 1416 PFSYNC_BUNLOCK(sc); 1417 1418 break; 1419 } 1420 default: 1421 return (ENOTTY); 1422 } 1423 1424 return (0); 1425} 1426 1427static void 1428pfsync_out_state(struct pf_state *st, void *buf) 1429{ 1430 struct pfsync_state *sp = buf; 1431 1432 pfsync_state_export(sp, st); 1433} 1434 1435static void 1436pfsync_out_iack(struct pf_state *st, void *buf) 1437{ 1438 struct pfsync_ins_ack *iack = buf; 1439 1440 iack->id = st->id; 1441 iack->creatorid = st->creatorid; 1442} 1443 1444static void 1445pfsync_out_upd_c(struct pf_state *st, void *buf) 1446{ 1447 struct pfsync_upd_c *up = buf; 1448 1449 bzero(up, sizeof(*up)); 1450 up->id = st->id; 1451 pf_state_peer_hton(&st->src, &up->src); 1452 pf_state_peer_hton(&st->dst, &up->dst); 1453 up->creatorid = st->creatorid; 1454 up->timeout = st->timeout; 1455} 1456 1457static void 1458pfsync_out_del(struct pf_state *st, void *buf) 1459{ 1460 struct pfsync_del_c *dp = buf; 1461 1462 dp->id = st->id; 1463 dp->creatorid = st->creatorid; 1464 st->state_flags |= PFSTATE_NOSYNC; 1465} 1466 1467static void 1468pfsync_drop(struct pfsync_softc *sc) 1469{ 1470 struct pf_state *st, *next; 1471 struct pfsync_upd_req_item *ur; 1472 int q; 1473 1474 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1475 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1476 continue; 1477 1478 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { 1479 KASSERT(st->sync_state == q, 1480 ("%s: st->sync_state == q", 1481 __func__)); 1482 st->sync_state = PFSYNC_S_NONE; 1483 pf_release_state(st); 1484 } 1485 TAILQ_INIT(&sc->sc_qs[q]); 1486 } 1487 1488 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1489 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1490 free(ur, M_PFSYNC); 1491 } 1492 1493 sc->sc_plus = NULL; 1494 sc->sc_len = PFSYNC_MINPKT; 1495} 1496 1497static void 1498pfsync_sendout(int schedswi) 1499{ 1500 struct pfsync_softc *sc = V_pfsyncif; 1501 struct ifnet *ifp = sc->sc_ifp; 1502 struct mbuf *m; 1503 struct ip *ip; 1504 struct pfsync_header *ph; 1505 struct pfsync_subheader *subh; 1506 struct pf_state *st; 1507 struct pfsync_upd_req_item *ur; 1508 int offset; 1509 int q, count = 0; 1510 1511 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1512 KASSERT(sc->sc_len > PFSYNC_MINPKT, 1513 ("%s: sc_len %zu", __func__, sc->sc_len)); 1514 PFSYNC_LOCK_ASSERT(sc); 1515 1516 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1517 pfsync_drop(sc); 1518 return; 1519 } 1520 1521 m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1522 if (m == NULL) { 1523 sc->sc_ifp->if_oerrors++; 1524 V_pfsyncstats.pfsyncs_onomem++; 1525 return; 1526 } 1527 m->m_data += max_linkhdr; 1528 m->m_len = m->m_pkthdr.len = sc->sc_len; 1529 1530 /* build the ip header */ 1531 ip = (struct ip *)m->m_data; 1532 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1533 offset = sizeof(*ip); 1534 1535 ip->ip_len = htons(m->m_pkthdr.len); 1536 ip->ip_id = htons(ip_randomid()); 1537 1538 /* build the pfsync header */ 1539 ph = (struct pfsync_header *)(m->m_data + offset); 1540 bzero(ph, sizeof(*ph)); 1541 offset += sizeof(*ph); 1542 1543 ph->version = PFSYNC_VERSION; 1544 ph->len = htons(sc->sc_len - sizeof(*ip)); 1545 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1546 1547 /* walk the queues */ 1548 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1549 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1550 continue; 1551 1552 subh = (struct pfsync_subheader *)(m->m_data + offset); 1553 offset += sizeof(*subh); 1554 1555 count = 0; 1556 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1557 KASSERT(st->sync_state == q, 1558 ("%s: st->sync_state == q", 1559 __func__)); 1560 /* 1561 * XXXGL: some of write methods do unlocked reads 1562 * of state data :( 1563 */ 1564 pfsync_qs[q].write(st, m->m_data + offset); 1565 offset += pfsync_qs[q].len; 1566 st->sync_state = PFSYNC_S_NONE; 1567 pf_release_state(st); 1568 count++; 1569 } 1570 TAILQ_INIT(&sc->sc_qs[q]); 1571 1572 bzero(subh, sizeof(*subh)); 1573 subh->action = pfsync_qs[q].action; 1574 subh->count = htons(count); 1575 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1576 } 1577 1578 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1579 subh = (struct pfsync_subheader *)(m->m_data + offset); 1580 offset += sizeof(*subh); 1581 1582 count = 0; 1583 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1584 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1585 1586 bcopy(&ur->ur_msg, m->m_data + offset, 1587 sizeof(ur->ur_msg)); 1588 offset += sizeof(ur->ur_msg); 1589 free(ur, M_PFSYNC); 1590 count++; 1591 } 1592 1593 bzero(subh, sizeof(*subh)); 1594 subh->action = PFSYNC_ACT_UPD_REQ; 1595 subh->count = htons(count); 1596 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1597 } 1598 1599 /* has someone built a custom region for us to add? */ 1600 if (sc->sc_plus != NULL) { 1601 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1602 offset += sc->sc_pluslen; 1603 1604 sc->sc_plus = NULL; 1605 } 1606 1607 subh = (struct pfsync_subheader *)(m->m_data + offset); 1608 offset += sizeof(*subh); 1609 1610 bzero(subh, sizeof(*subh)); 1611 subh->action = PFSYNC_ACT_EOF; 1612 subh->count = htons(1); 1613 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1614 1615 /* we're done, let's put it on the wire */ 1616 if (ifp->if_bpf) { 1617 m->m_data += sizeof(*ip); 1618 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1619 BPF_MTAP(ifp, m); 1620 m->m_data -= sizeof(*ip); 1621 m->m_len = m->m_pkthdr.len = sc->sc_len; 1622 } 1623 1624 if (sc->sc_sync_if == NULL) { 1625 sc->sc_len = PFSYNC_MINPKT; 1626 m_freem(m); 1627 return; 1628 } 1629 1630 sc->sc_ifp->if_opackets++; 1631 sc->sc_ifp->if_obytes += m->m_pkthdr.len; 1632 sc->sc_len = PFSYNC_MINPKT; 1633 1634 if (!_IF_QFULL(&sc->sc_ifp->if_snd)) 1635 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1636 else { 1637 m_freem(m); 1638 sc->sc_ifp->if_snd.ifq_drops++; 1639 } 1640 if (schedswi) 1641 swi_sched(V_pfsync_swi_cookie, 0); 1642} 1643 1644static void 1645pfsync_insert_state(struct pf_state *st) 1646{ 1647 struct pfsync_softc *sc = V_pfsyncif; 1648 1649 if (st->state_flags & PFSTATE_NOSYNC) 1650 return; 1651 1652 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1653 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1654 st->state_flags |= PFSTATE_NOSYNC; 1655 return; 1656 } 1657 1658 KASSERT(st->sync_state == PFSYNC_S_NONE, 1659 ("%s: st->sync_state %u", __func__, st->sync_state)); 1660 1661 PFSYNC_LOCK(sc); 1662 if (sc->sc_len == PFSYNC_MINPKT) 1663 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1664 1665 pfsync_q_ins(st, PFSYNC_S_INS); 1666 PFSYNC_UNLOCK(sc); 1667 1668 st->sync_updates = 0; 1669} 1670 1671static int 1672pfsync_defer(struct pf_state *st, struct mbuf *m) 1673{ 1674 struct pfsync_softc *sc = V_pfsyncif; 1675 struct pfsync_deferral *pd; 1676 1677 if (m->m_flags & (M_BCAST|M_MCAST)) 1678 return (0); 1679 1680 PFSYNC_LOCK(sc); 1681 1682 if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || 1683 !(sc->sc_flags & PFSYNCF_DEFER)) { 1684 PFSYNC_UNLOCK(sc); 1685 return (0); 1686 } 1687 1688 if (sc->sc_deferred >= 128) 1689 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1690 1691 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1692 if (pd == NULL) 1693 return (0); 1694 sc->sc_deferred++; 1695 1696 m->m_flags |= M_SKIP_FIREWALL; 1697 st->state_flags |= PFSTATE_ACK; 1698 1699 pd->pd_sc = sc; 1700 pd->pd_refs = 0; 1701 pd->pd_st = st; 1702 pf_ref_state(st); 1703 pd->pd_m = m; 1704 1705 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1706 callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1707 callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); 1708 1709 pfsync_push(sc); 1710 1711 return (1); 1712} 1713 1714static void 1715pfsync_undefer(struct pfsync_deferral *pd, int drop) 1716{ 1717 struct pfsync_softc *sc = pd->pd_sc; 1718 struct mbuf *m = pd->pd_m; 1719 struct pf_state *st = pd->pd_st; 1720 1721 PFSYNC_LOCK_ASSERT(sc); 1722 1723 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1724 sc->sc_deferred--; 1725 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1726 free(pd, M_PFSYNC); 1727 pf_release_state(st); 1728 1729 if (drop) 1730 m_freem(m); 1731 else { 1732 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1733 pfsync_push(sc); 1734 } 1735} 1736 1737static void 1738pfsync_defer_tmo(void *arg) 1739{ 1740 struct pfsync_deferral *pd = arg; 1741 struct pfsync_softc *sc = pd->pd_sc; 1742 struct mbuf *m = pd->pd_m; 1743 struct pf_state *st = pd->pd_st; 1744 1745 PFSYNC_LOCK_ASSERT(sc); 1746 1747 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 1748 1749 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1750 sc->sc_deferred--; 1751 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1752 if (pd->pd_refs == 0) 1753 free(pd, M_PFSYNC); 1754 PFSYNC_UNLOCK(sc); 1755 1756 ip_output(m, NULL, NULL, 0, NULL, NULL); 1757 1758 pf_release_state(st); 1759 1760 CURVNET_RESTORE(); 1761} 1762 1763static void 1764pfsync_undefer_state(struct pf_state *st, int drop) 1765{ 1766 struct pfsync_softc *sc = V_pfsyncif; 1767 struct pfsync_deferral *pd; 1768 1769 PFSYNC_LOCK_ASSERT(sc); 1770 1771 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1772 if (pd->pd_st == st) { 1773 if (callout_stop(&pd->pd_tmo)) 1774 pfsync_undefer(pd, drop); 1775 return; 1776 } 1777 } 1778 1779 panic("%s: unable to find deferred state", __func__); 1780} 1781 1782static void 1783pfsync_update_state(struct pf_state *st) 1784{ 1785 struct pfsync_softc *sc = V_pfsyncif; 1786 int sync = 0; 1787 1788 PF_STATE_LOCK_ASSERT(st); 1789 PFSYNC_LOCK(sc); 1790 1791 if (st->state_flags & PFSTATE_ACK) 1792 pfsync_undefer_state(st, 0); 1793 if (st->state_flags & PFSTATE_NOSYNC) { 1794 if (st->sync_state != PFSYNC_S_NONE) 1795 pfsync_q_del(st); 1796 PFSYNC_UNLOCK(sc); 1797 return; 1798 } 1799 1800 if (sc->sc_len == PFSYNC_MINPKT) 1801 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1802 1803 switch (st->sync_state) { 1804 case PFSYNC_S_UPD_C: 1805 case PFSYNC_S_UPD: 1806 case PFSYNC_S_INS: 1807 /* we're already handling it */ 1808 1809 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1810 st->sync_updates++; 1811 if (st->sync_updates >= sc->sc_maxupdates) 1812 sync = 1; 1813 } 1814 break; 1815 1816 case PFSYNC_S_IACK: 1817 pfsync_q_del(st); 1818 case PFSYNC_S_NONE: 1819 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1820 st->sync_updates = 0; 1821 break; 1822 1823 default: 1824 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1825 } 1826 1827 if (sync || (time_uptime - st->pfsync_time) < 2) 1828 pfsync_push(sc); 1829 1830 PFSYNC_UNLOCK(sc); 1831} 1832 1833static void 1834pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1835{ 1836 struct pfsync_softc *sc = V_pfsyncif; 1837 struct pfsync_upd_req_item *item; 1838 size_t nlen = sizeof(struct pfsync_upd_req); 1839 1840 PFSYNC_LOCK_ASSERT(sc); 1841 1842 /* 1843 * This code does a bit to prevent multiple update requests for the 1844 * same state being generated. It searches current subheader queue, 1845 * but it doesn't lookup into queue of already packed datagrams. 1846 */ 1847 TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) 1848 if (item->ur_msg.id == id && 1849 item->ur_msg.creatorid == creatorid) 1850 return; 1851 1852 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1853 if (item == NULL) 1854 return; /* XXX stats */ 1855 1856 item->ur_msg.id = id; 1857 item->ur_msg.creatorid = creatorid; 1858 1859 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1860 nlen += sizeof(struct pfsync_subheader); 1861 1862 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1863 pfsync_sendout(1); 1864 1865 nlen = sizeof(struct pfsync_subheader) + 1866 sizeof(struct pfsync_upd_req); 1867 } 1868 1869 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1870 sc->sc_len += nlen; 1871} 1872 1873static void 1874pfsync_update_state_req(struct pf_state *st) 1875{ 1876 struct pfsync_softc *sc = V_pfsyncif; 1877 1878 PF_STATE_LOCK_ASSERT(st); 1879 PFSYNC_LOCK(sc); 1880 1881 if (st->state_flags & PFSTATE_NOSYNC) { 1882 if (st->sync_state != PFSYNC_S_NONE) 1883 pfsync_q_del(st); 1884 PFSYNC_UNLOCK(sc); 1885 return; 1886 } 1887 1888 switch (st->sync_state) { 1889 case PFSYNC_S_UPD_C: 1890 case PFSYNC_S_IACK: 1891 pfsync_q_del(st); 1892 case PFSYNC_S_NONE: 1893 pfsync_q_ins(st, PFSYNC_S_UPD); 1894 pfsync_push(sc); 1895 break; 1896 1897 case PFSYNC_S_INS: 1898 case PFSYNC_S_UPD: 1899 case PFSYNC_S_DEL: 1900 /* we're already handling it */ 1901 break; 1902 1903 default: 1904 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1905 } 1906 1907 PFSYNC_UNLOCK(sc); 1908} 1909 1910static void 1911pfsync_delete_state(struct pf_state *st) 1912{ 1913 struct pfsync_softc *sc = V_pfsyncif; 1914 1915 PFSYNC_LOCK(sc); 1916 if (st->state_flags & PFSTATE_ACK) 1917 pfsync_undefer_state(st, 1); 1918 if (st->state_flags & PFSTATE_NOSYNC) { 1919 if (st->sync_state != PFSYNC_S_NONE) 1920 pfsync_q_del(st); 1921 PFSYNC_UNLOCK(sc); 1922 return; 1923 } 1924 1925 if (sc->sc_len == PFSYNC_MINPKT) 1926 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1927 1928 switch (st->sync_state) { 1929 case PFSYNC_S_INS: 1930 /* We never got to tell the world so just forget about it. */ 1931 pfsync_q_del(st); 1932 break; 1933 1934 case PFSYNC_S_UPD_C: 1935 case PFSYNC_S_UPD: 1936 case PFSYNC_S_IACK: 1937 pfsync_q_del(st); 1938 /* FALLTHROUGH to putting it on the del list */ 1939 1940 case PFSYNC_S_NONE: 1941 pfsync_q_ins(st, PFSYNC_S_DEL); 1942 break; 1943 1944 default: 1945 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1946 } 1947 PFSYNC_UNLOCK(sc); 1948} 1949 1950static void 1951pfsync_clear_states(u_int32_t creatorid, const char *ifname) 1952{ 1953 struct pfsync_softc *sc = V_pfsyncif; 1954 struct { 1955 struct pfsync_subheader subh; 1956 struct pfsync_clr clr; 1957 } __packed r; 1958 1959 bzero(&r, sizeof(r)); 1960 1961 r.subh.action = PFSYNC_ACT_CLR; 1962 r.subh.count = htons(1); 1963 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 1964 1965 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 1966 r.clr.creatorid = creatorid; 1967 1968 PFSYNC_LOCK(sc); 1969 pfsync_send_plus(&r, sizeof(r)); 1970 PFSYNC_UNLOCK(sc); 1971} 1972 1973static void 1974pfsync_q_ins(struct pf_state *st, int q) 1975{ 1976 struct pfsync_softc *sc = V_pfsyncif; 1977 size_t nlen = pfsync_qs[q].len; 1978 1979 PFSYNC_LOCK_ASSERT(sc); 1980 1981 KASSERT(st->sync_state == PFSYNC_S_NONE, 1982 ("%s: st->sync_state %u", __func__, st->sync_state)); 1983 KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 1984 sc->sc_len)); 1985 1986 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1987 nlen += sizeof(struct pfsync_subheader); 1988 1989 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1990 pfsync_sendout(1); 1991 1992 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 1993 } 1994 1995 sc->sc_len += nlen; 1996 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 1997 st->sync_state = q; 1998 pf_ref_state(st); 1999} 2000 2001static void 2002pfsync_q_del(struct pf_state *st) 2003{ 2004 struct pfsync_softc *sc = V_pfsyncif; 2005 int q = st->sync_state; 2006 2007 PFSYNC_LOCK_ASSERT(sc); 2008 KASSERT(st->sync_state != PFSYNC_S_NONE, 2009 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2010 2011 sc->sc_len -= pfsync_qs[q].len; 2012 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2013 st->sync_state = PFSYNC_S_NONE; 2014 pf_release_state(st); 2015 2016 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2017 sc->sc_len -= sizeof(struct pfsync_subheader); 2018} 2019 2020static void 2021pfsync_bulk_start(void) 2022{ 2023 struct pfsync_softc *sc = V_pfsyncif; 2024 2025 if (V_pf_status.debug >= PF_DEBUG_MISC) 2026 printf("pfsync: received bulk update request\n"); 2027 2028 PFSYNC_BLOCK(sc); 2029 2030 sc->sc_ureq_received = time_uptime; 2031 sc->sc_bulk_hashid = 0; 2032 sc->sc_bulk_stateid = 0; 2033 pfsync_bulk_status(PFSYNC_BUS_START); 2034 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2035 PFSYNC_BUNLOCK(sc); 2036} 2037 2038static void 2039pfsync_bulk_update(void *arg) 2040{ 2041 struct pfsync_softc *sc = arg; 2042 struct pf_state *s; 2043 int i, sent = 0; 2044 2045 PFSYNC_BLOCK_ASSERT(sc); 2046 CURVNET_SET(sc->sc_ifp->if_vnet); 2047 2048 /* 2049 * Start with last state from previous invocation. 2050 * It may had gone, in this case start from the 2051 * hash slot. 2052 */ 2053 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2054 2055 if (s != NULL) 2056 i = PF_IDHASH(s); 2057 else 2058 i = sc->sc_bulk_hashid; 2059 2060 for (; i <= pf_hashmask; i++) { 2061 struct pf_idhash *ih = &V_pf_idhash[i]; 2062 2063 if (s != NULL) 2064 PF_HASHROW_ASSERT(ih); 2065 else { 2066 PF_HASHROW_LOCK(ih); 2067 s = LIST_FIRST(&ih->states); 2068 } 2069 2070 for (; s; s = LIST_NEXT(s, entry)) { 2071 2072 if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < 2073 sizeof(struct pfsync_state)) { 2074 /* We've filled a packet. */ 2075 sc->sc_bulk_hashid = i; 2076 sc->sc_bulk_stateid = s->id; 2077 sc->sc_bulk_creatorid = s->creatorid; 2078 PF_HASHROW_UNLOCK(ih); 2079 callout_reset(&sc->sc_bulk_tmo, 1, 2080 pfsync_bulk_update, sc); 2081 goto full; 2082 } 2083 2084 if (s->sync_state == PFSYNC_S_NONE && 2085 s->timeout < PFTM_MAX && 2086 s->pfsync_time <= sc->sc_ureq_received) { 2087 pfsync_update_state_req(s); 2088 sent++; 2089 } 2090 } 2091 PF_HASHROW_UNLOCK(ih); 2092 } 2093 2094 /* We're done. */ 2095 pfsync_bulk_status(PFSYNC_BUS_END); 2096 2097full: 2098 CURVNET_RESTORE(); 2099} 2100 2101static void 2102pfsync_bulk_status(u_int8_t status) 2103{ 2104 struct { 2105 struct pfsync_subheader subh; 2106 struct pfsync_bus bus; 2107 } __packed r; 2108 2109 struct pfsync_softc *sc = V_pfsyncif; 2110 2111 bzero(&r, sizeof(r)); 2112 2113 r.subh.action = PFSYNC_ACT_BUS; 2114 r.subh.count = htons(1); 2115 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2116 2117 r.bus.creatorid = V_pf_status.hostid; 2118 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2119 r.bus.status = status; 2120 2121 PFSYNC_LOCK(sc); 2122 pfsync_send_plus(&r, sizeof(r)); 2123 PFSYNC_UNLOCK(sc); 2124} 2125 2126static void 2127pfsync_bulk_fail(void *arg) 2128{ 2129 struct pfsync_softc *sc = arg; 2130 2131 CURVNET_SET(sc->sc_ifp->if_vnet); 2132 2133 PFSYNC_BLOCK_ASSERT(sc); 2134 2135 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2136 /* Try again */ 2137 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2138 pfsync_bulk_fail, V_pfsyncif); 2139 PFSYNC_LOCK(sc); 2140 pfsync_request_update(0, 0); 2141 PFSYNC_UNLOCK(sc); 2142 } else { 2143 /* Pretend like the transfer was ok. */ 2144 sc->sc_ureq_sent = 0; 2145 sc->sc_bulk_tries = 0; 2146 PFSYNC_LOCK(sc); 2147 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2148 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2149 "pfsync bulk fail"); 2150 sc->sc_flags |= PFSYNCF_OK; 2151 PFSYNC_UNLOCK(sc); 2152 if (V_pf_status.debug >= PF_DEBUG_MISC) 2153 printf("pfsync: failed to receive bulk update\n"); 2154 } 2155 2156 CURVNET_RESTORE(); 2157} 2158 2159static void 2160pfsync_send_plus(void *plus, size_t pluslen) 2161{ 2162 struct pfsync_softc *sc = V_pfsyncif; 2163 2164 PFSYNC_LOCK_ASSERT(sc); 2165 2166 if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) 2167 pfsync_sendout(1); 2168 2169 sc->sc_plus = plus; 2170 sc->sc_len += (sc->sc_pluslen = pluslen); 2171 2172 pfsync_sendout(1); 2173} 2174 2175static void 2176pfsync_timeout(void *arg) 2177{ 2178 struct pfsync_softc *sc = arg; 2179 2180 CURVNET_SET(sc->sc_ifp->if_vnet); 2181 PFSYNC_LOCK(sc); 2182 pfsync_push(sc); 2183 PFSYNC_UNLOCK(sc); 2184 CURVNET_RESTORE(); 2185} 2186 2187static void 2188pfsync_push(struct pfsync_softc *sc) 2189{ 2190 2191 PFSYNC_LOCK_ASSERT(sc); 2192 2193 sc->sc_flags |= PFSYNCF_PUSH; 2194 swi_sched(V_pfsync_swi_cookie, 0); 2195} 2196 2197static void 2198pfsyncintr(void *arg) 2199{ 2200 struct pfsync_softc *sc = arg; 2201 struct mbuf *m, *n; 2202 2203 CURVNET_SET(sc->sc_ifp->if_vnet); 2204 2205 PFSYNC_LOCK(sc); 2206 if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { 2207 pfsync_sendout(0); 2208 sc->sc_flags &= ~PFSYNCF_PUSH; 2209 } 2210 _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); 2211 PFSYNC_UNLOCK(sc); 2212 2213 for (; m != NULL; m = n) { 2214 2215 n = m->m_nextpkt; 2216 m->m_nextpkt = NULL; 2217 2218 /* 2219 * We distinguish between a deferral packet and our 2220 * own pfsync packet based on M_SKIP_FIREWALL 2221 * flag. This is XXX. 2222 */ 2223 if (m->m_flags & M_SKIP_FIREWALL) 2224 ip_output(m, NULL, NULL, 0, NULL, NULL); 2225 else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 2226 NULL) == 0) 2227 V_pfsyncstats.pfsyncs_opackets++; 2228 else 2229 V_pfsyncstats.pfsyncs_oerrors++; 2230 } 2231 CURVNET_RESTORE(); 2232} 2233 2234static int 2235pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) 2236{ 2237 struct ip_moptions *imo = &sc->sc_imo; 2238 int error; 2239 2240 if (!(ifp->if_flags & IFF_MULTICAST)) 2241 return (EADDRNOTAVAIL); 2242 2243 imo->imo_membership = (struct in_multi **)mship; 2244 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 2245 imo->imo_multicast_vif = -1; 2246 2247 if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, 2248 &imo->imo_membership[0])) != 0) { 2249 imo->imo_membership = NULL; 2250 return (error); 2251 } 2252 imo->imo_num_memberships++; 2253 imo->imo_multicast_ifp = ifp; 2254 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2255 imo->imo_multicast_loop = 0; 2256 2257 return (0); 2258} 2259 2260static void 2261pfsync_multicast_cleanup(struct pfsync_softc *sc) 2262{ 2263 struct ip_moptions *imo = &sc->sc_imo; 2264 2265 in_leavegroup(imo->imo_membership[0], NULL); 2266 free(imo->imo_membership, M_PFSYNC); 2267 imo->imo_membership = NULL; 2268 imo->imo_multicast_ifp = NULL; 2269} 2270 2271#ifdef INET 2272extern struct domain inetdomain; 2273static struct protosw in_pfsync_protosw = { 2274 .pr_type = SOCK_RAW, 2275 .pr_domain = &inetdomain, 2276 .pr_protocol = IPPROTO_PFSYNC, 2277 .pr_flags = PR_ATOMIC|PR_ADDR, 2278 .pr_input = pfsync_input, 2279 .pr_output = (pr_output_t *)rip_output, 2280 .pr_ctloutput = rip_ctloutput, 2281 .pr_usrreqs = &rip_usrreqs 2282}; 2283#endif 2284 2285static void 2286pfsync_pointers_init() 2287{ 2288 2289 PF_RULES_WLOCK(); 2290 pfsync_state_import_ptr = pfsync_state_import; 2291 pfsync_insert_state_ptr = pfsync_insert_state; 2292 pfsync_update_state_ptr = pfsync_update_state; 2293 pfsync_delete_state_ptr = pfsync_delete_state; 2294 pfsync_clear_states_ptr = pfsync_clear_states; 2295 pfsync_defer_ptr = pfsync_defer; 2296 PF_RULES_WUNLOCK(); 2297} 2298 2299static void 2300pfsync_pointers_uninit() 2301{ 2302 2303 PF_RULES_WLOCK(); 2304 pfsync_state_import_ptr = NULL; 2305 pfsync_insert_state_ptr = NULL; 2306 pfsync_update_state_ptr = NULL; 2307 pfsync_delete_state_ptr = NULL; 2308 pfsync_clear_states_ptr = NULL; 2309 pfsync_defer_ptr = NULL; 2310 PF_RULES_WUNLOCK(); 2311} 2312 2313static int 2314pfsync_init() 2315{ 2316 VNET_ITERATOR_DECL(vnet_iter); 2317 int error = 0; 2318 2319 VNET_LIST_RLOCK(); 2320 VNET_FOREACH(vnet_iter) { 2321 CURVNET_SET(vnet_iter); 2322 V_pfsync_cloner = if_clone_simple(pfsyncname, 2323 pfsync_clone_create, pfsync_clone_destroy, 1); 2324 error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, 2325 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2326 CURVNET_RESTORE(); 2327 if (error) 2328 goto fail_locked; 2329 } 2330 VNET_LIST_RUNLOCK(); 2331#ifdef INET 2332 error = pf_proto_register(PF_INET, &in_pfsync_protosw); 2333 if (error) 2334 goto fail; 2335 error = ipproto_register(IPPROTO_PFSYNC); 2336 if (error) { 2337 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2338 goto fail; 2339 } 2340#endif 2341 pfsync_pointers_init(); 2342 2343 return (0); 2344 2345fail: 2346 VNET_LIST_RLOCK(); 2347fail_locked: 2348 VNET_FOREACH(vnet_iter) { 2349 CURVNET_SET(vnet_iter); 2350 if (V_pfsync_swi_cookie) { 2351 swi_remove(V_pfsync_swi_cookie); 2352 if_clone_detach(V_pfsync_cloner); 2353 } 2354 CURVNET_RESTORE(); 2355 } 2356 VNET_LIST_RUNLOCK(); 2357 2358 return (error); 2359} 2360 2361static void 2362pfsync_uninit() 2363{ 2364 VNET_ITERATOR_DECL(vnet_iter); 2365 2366 pfsync_pointers_uninit(); 2367 2368 ipproto_unregister(IPPROTO_PFSYNC); 2369 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2370 VNET_LIST_RLOCK(); 2371 VNET_FOREACH(vnet_iter) { 2372 CURVNET_SET(vnet_iter); 2373 if_clone_detach(V_pfsync_cloner); 2374 swi_remove(V_pfsync_swi_cookie); 2375 CURVNET_RESTORE(); 2376 } 2377 VNET_LIST_RUNLOCK(); 2378} 2379 2380static int 2381pfsync_modevent(module_t mod, int type, void *data) 2382{ 2383 int error = 0; 2384 2385 switch (type) { 2386 case MOD_LOAD: 2387 error = pfsync_init(); 2388 break; 2389 case MOD_QUIESCE: 2390 /* 2391 * Module should not be unloaded due to race conditions. 2392 */ 2393 error = EBUSY; 2394 break; 2395 case MOD_UNLOAD: 2396 pfsync_uninit(); 2397 break; 2398 default: 2399 error = EINVAL; 2400 break; 2401 } 2402 2403 return (error); 2404} 2405 2406static moduledata_t pfsync_mod = { 2407 pfsyncname, 2408 pfsync_modevent, 2409 0 2410}; 2411 2412#define PFSYNC_MODVER 1 2413 2414DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); 2415MODULE_VERSION(pfsync, PFSYNC_MODVER); 2416MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2417