in_pcbgroup.c revision 263198
1222748Srwatson/*- 2222748Srwatson * Copyright (c) 2010-2011 Juniper Networks, Inc. 3222748Srwatson * All rights reserved. 4222748Srwatson * 5222748Srwatson * This software was developed by Robert N. M. Watson under contract 6222748Srwatson * to Juniper Networks, Inc. 7222748Srwatson * 8222748Srwatson * Redistribution and use in source and binary forms, with or without 9222748Srwatson * modification, are permitted provided that the following conditions 10222748Srwatson * are met: 11222748Srwatson * 1. Redistributions of source code must retain the above copyright 12222748Srwatson * notice, this list of conditions and the following disclaimer. 13222748Srwatson * 2. Redistributions in binary form must reproduce the above copyright 14222748Srwatson * notice, this list of conditions and the following disclaimer in the 15222748Srwatson * documentation and/or other materials provided with the distribution. 16222748Srwatson * 17222748Srwatson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18222748Srwatson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19222748Srwatson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20222748Srwatson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21222748Srwatson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22222748Srwatson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23222748Srwatson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24222748Srwatson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25222748Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26222748Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27222748Srwatson * SUCH DAMAGE. 28222748Srwatson */ 29222748Srwatson 30222748Srwatson#include <sys/cdefs.h> 31222748Srwatson 32222748Srwatson__FBSDID("$FreeBSD: head/sys/netinet/in_pcbgroup.c 263198 2014-03-15 00:57:50Z rwatson $"); 33222748Srwatson 34222748Srwatson#include "opt_inet6.h" 35263198Srwatson#include "opt_rss.h" 36222748Srwatson 37222748Srwatson#include <sys/param.h> 38222748Srwatson#include <sys/lock.h> 39222748Srwatson#include <sys/malloc.h> 40222748Srwatson#include <sys/mbuf.h> 41222748Srwatson#include <sys/mutex.h> 42222748Srwatson#include <sys/smp.h> 43222748Srwatson#include <sys/socketvar.h> 44222748Srwatson 45222748Srwatson#include <netinet/in.h> 46222748Srwatson#include <netinet/in_pcb.h> 47263198Srwatson#include <netinet/in_rss.h> 48222748Srwatson#ifdef INET6 49222748Srwatson#include <netinet6/in6_pcb.h> 50222748Srwatson#endif /* INET6 */ 51222748Srwatson 52222748Srwatson/* 53222748Srwatson * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's 54222748Srwatson * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization 55222748Srwatson * Strategies in Modern Operating Systems". This implementation differs 56222748Srwatson * significantly from that described in the paper, in that it attempts to 57222748Srwatson * introduce not just notions of affinity for connections and distribute work 58222748Srwatson * so as to reduce lock contention, but also align those notions with 59222748Srwatson * hardware work distribution strategies such as RSS. In this construction, 60222748Srwatson * connection groups supplement, rather than replace, existing reservation 61222748Srwatson * tables for protocol 4-tuples, offering CPU-affine lookup tables with 62222748Srwatson * minimal cache line migration and lock contention during steady state 63222748Srwatson * operation. 64222748Srwatson * 65263198Srwatson * Hardware-offloaded checksums are often inefficient in software -- for 66263198Srwatson * example, Toeplitz, specified by RSS, introduced a significant overhead if 67263198Srwatson * performed during per-packge processing. It is therefore desirable to fall 68263198Srwatson * back on traditional reservation table lookups without affinity where 69263198Srwatson * hardware-offloaded checksums aren't available, such as for traffic over 70263198Srwatson * non-RSS interfaces. 71263198Srwatson * 72222748Srwatson * Internet protocols, such as UDP and TCP, register to use connection groups 73222748Srwatson * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this 74222748Srwatson * indicates to the connection group code whether a 2-tuple or 4-tuple is 75222748Srwatson * used as an argument to hashes that assign a connection to a particular 76222748Srwatson * group. This must be aligned with any hardware offloaded distribution 77222748Srwatson * model, such as RSS or similar approaches taken in embedded network boards. 78222748Srwatson * Wildcard sockets require special handling, as in Willman 2006, and are 79222748Srwatson * shared between connection groups -- while being protected by group-local 80222748Srwatson * locks. This means that connection establishment and teardown can be 81222748Srwatson * signficantly more expensive than without connection groups, but that 82222748Srwatson * steady-state processing can be significantly faster. 83222748Srwatson * 84263198Srwatson * When RSS is used, certain connection group parameters, such as the number 85263198Srwatson * of groups, are provided by the RSS implementation, found in in_rss.c. 86263198Srwatson * Otherwise, in_pcbgroup.c selects possible sensible parameters 87263198Srwatson * corresponding to the degree of parallelism exposed by netisr. 88263198Srwatson * 89222748Srwatson * Most of the implementation of connection groups is in this file; however, 90222748Srwatson * connection group lookup is implemented in in_pcb.c alongside reservation 91222748Srwatson * table lookups -- see in_pcblookup_group(). 92222748Srwatson * 93222748Srwatson * TODO: 94222748Srwatson * 95222748Srwatson * Implement dynamic rebalancing of buckets with connection groups; when 96222748Srwatson * load is unevenly distributed, search for more optimal balancing on 97222748Srwatson * demand. This might require scaling up the number of connection groups 98222748Srwatson * by <<1. 99222748Srwatson * 100222748Srwatson * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection 101222748Srwatson * groups for ip_input and ip6_input, allowing non-offloaded work 102222748Srwatson * distribution. 103222748Srwatson * 104222748Srwatson * Expose effective CPU affinity of connections to userspace using socket 105222748Srwatson * options. 106222748Srwatson * 107222748Srwatson * Investigate per-connection affinity overrides based on socket options; an 108222748Srwatson * option could be set, certainly resulting in work being distributed 109222748Srwatson * differently in software, and possibly propagated to supporting hardware 110222748Srwatson * with TCAMs or hardware hash tables. This might require connections to 111222748Srwatson * exist in more than one connection group at a time. 112222748Srwatson * 113222748Srwatson * Hook netisr thread reconfiguration events, and propagate those to RSS so 114222748Srwatson * that rebalancing can occur when the thread pool grows or shrinks. 115222748Srwatson * 116222748Srwatson * Expose per-pcbgroup statistics to userspace monitoring tools such as 117222748Srwatson * netstat, in order to allow better debugging and profiling. 118222748Srwatson */ 119222748Srwatson 120222748Srwatsonvoid 121222748Srwatsonin_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields, 122222748Srwatson int hash_nelements) 123222748Srwatson{ 124222748Srwatson struct inpcbgroup *pcbgroup; 125222748Srwatson u_int numpcbgroups, pgn; 126222748Srwatson 127222748Srwatson /* 128222748Srwatson * Only enable connection groups for a protocol if it has been 129222748Srwatson * specifically requested. 130222748Srwatson */ 131222748Srwatson if (hashfields == IPI_HASHFIELDS_NONE) 132222748Srwatson return; 133222748Srwatson 134222748Srwatson /* 135222748Srwatson * Connection groups are about multi-processor load distribution, 136222748Srwatson * lock contention, and connection CPU affinity. As such, no point 137222748Srwatson * in turning them on for a uniprocessor machine, it only wastes 138222748Srwatson * memory. 139222748Srwatson */ 140222748Srwatson if (mp_ncpus == 1) 141222748Srwatson return; 142222748Srwatson 143263198Srwatson#ifdef RSS 144222748Srwatson /* 145263198Srwatson * If we're using RSS, then RSS determines the number of connection 146263198Srwatson * groups to use: one connection group per RSS bucket. If for some 147263198Srwatson * reason RSS isn't able to provide a number of buckets, disable 148263198Srwatson * connection groups entirely. 149263198Srwatson * 150263198Srwatson * XXXRW: Can this ever happen? 151222748Srwatson */ 152263198Srwatson numpcbgroups = rss_getnumbuckets(); 153263198Srwatson if (numpcbgroups == 0) 154263198Srwatson return; 155263198Srwatson#else 156263198Srwatson /* 157263198Srwatson * Otherwise, we'll just use one per CPU for now. If we decide to 158263198Srwatson * do dynamic rebalancing a la RSS, we'll need similar logic here. 159263198Srwatson */ 160222748Srwatson numpcbgroups = mp_ncpus; 161263198Srwatson#endif 162222748Srwatson 163222748Srwatson pcbinfo->ipi_hashfields = hashfields; 164222748Srwatson pcbinfo->ipi_pcbgroups = malloc(numpcbgroups * 165222748Srwatson sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO); 166222748Srwatson pcbinfo->ipi_npcbgroups = numpcbgroups; 167222748Srwatson pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB, 168222748Srwatson &pcbinfo->ipi_wildmask); 169222748Srwatson for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) { 170222748Srwatson pcbgroup = &pcbinfo->ipi_pcbgroups[pgn]; 171222748Srwatson pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB, 172222748Srwatson &pcbgroup->ipg_hashmask); 173222748Srwatson INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup"); 174222748Srwatson 175222748Srwatson /* 176222748Srwatson * Initialise notional affinity of the pcbgroup -- for RSS, 177263198Srwatson * we want the same notion of affinity as NICs to be used. In 178263198Srwatson * the non-RSS case, just round robin for the time being. 179263198Srwatson * 180263198Srwatson * XXXRW: The notion of a bucket to CPU mapping is common at 181263198Srwatson * both pcbgroup and RSS layers -- does that mean that we 182263198Srwatson * should migrate it all from RSS to here, and just leave RSS 183263198Srwatson * responsible only for providing hashing and mapping funtions? 184222748Srwatson */ 185263198Srwatson#ifdef RSS 186263198Srwatson pcbgroup->ipg_cpu = rss_getcpu(pgn); 187263198Srwatson#else 188222748Srwatson pcbgroup->ipg_cpu = (pgn % mp_ncpus); 189263198Srwatson#endif 190222748Srwatson } 191222748Srwatson} 192222748Srwatson 193222748Srwatsonvoid 194222748Srwatsonin_pcbgroup_destroy(struct inpcbinfo *pcbinfo) 195222748Srwatson{ 196222748Srwatson struct inpcbgroup *pcbgroup; 197222748Srwatson u_int pgn; 198222748Srwatson 199222748Srwatson if (pcbinfo->ipi_npcbgroups == 0) 200222748Srwatson return; 201222748Srwatson 202222748Srwatson for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) { 203222748Srwatson pcbgroup = &pcbinfo->ipi_pcbgroups[pgn]; 204222748Srwatson KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead), 205222748Srwatson ("in_pcbinfo_destroy: listhead not empty")); 206222748Srwatson INP_GROUP_LOCK_DESTROY(pcbgroup); 207222748Srwatson hashdestroy(pcbgroup->ipg_hashbase, M_PCB, 208222748Srwatson pcbgroup->ipg_hashmask); 209222748Srwatson } 210222748Srwatson hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask); 211222748Srwatson free(pcbinfo->ipi_pcbgroups, M_PCB); 212222748Srwatson pcbinfo->ipi_pcbgroups = NULL; 213222748Srwatson pcbinfo->ipi_npcbgroups = 0; 214222748Srwatson pcbinfo->ipi_hashfields = 0; 215222748Srwatson} 216222748Srwatson 217222748Srwatson/* 218222748Srwatson * Given a hash of whatever the covered tuple might be, return a pcbgroup 219263198Srwatson * index. Where RSS is supported, try to align bucket selection with RSS CPU 220263198Srwatson * affinity strategy. 221222748Srwatson */ 222222748Srwatsonstatic __inline u_int 223222748Srwatsonin_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash) 224222748Srwatson{ 225222748Srwatson 226263198Srwatson#ifdef RSS 227263198Srwatson return (rss_getbucket(hash)); 228263198Srwatson#else 229222748Srwatson return (hash % pcbinfo->ipi_npcbgroups); 230263198Srwatson#endif 231222748Srwatson} 232222748Srwatson 233222748Srwatson/* 234222748Srwatson * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash 235263198Srwatson * information is insufficient to identify the pcbgroup. This might occur if 236263198Srwatson * a TCP packet turns up with a 2-tuple hash, or if an RSS hash is present but 237263198Srwatson * RSS is not compiled into the kernel. 238222748Srwatson */ 239222748Srwatsonstruct inpcbgroup * 240222748Srwatsonin_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash) 241222748Srwatson{ 242222748Srwatson 243263198Srwatson#ifdef RSS 244263198Srwatson if ((pcbinfo->ipi_hashfields == IPI_HASHFIELDS_4TUPLE && 245263198Srwatson hashtype == M_HASHTYPE_RSS_TCP_IPV4) || 246263198Srwatson (pcbinfo->ipi_hashfields == IPI_HASHFIELDS_2TUPLE && 247263198Srwatson hashtype == M_HASHTYPE_RSS_IPV4)) 248263198Srwatson return (&pcbinfo->ipi_pcbgroups[ 249263198Srwatson in_pcbgroup_getbucket(pcbinfo, hash)]); 250263198Srwatson#endif 251222748Srwatson return (NULL); 252222748Srwatson} 253222748Srwatson 254222748Srwatsonstatic struct inpcbgroup * 255222748Srwatsonin_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m) 256222748Srwatson{ 257222748Srwatson 258222748Srwatson return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), 259222748Srwatson m->m_pkthdr.flowid)); 260222748Srwatson} 261222748Srwatson 262222748Srwatsonstruct inpcbgroup * 263222748Srwatsonin_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr, 264222748Srwatson u_short lport, struct in_addr faddr, u_short fport) 265222748Srwatson{ 266222748Srwatson uint32_t hash; 267222748Srwatson 268263198Srwatson /* 269263198Srwatson * RSS note: we pass foreign addr/port as source, and local addr/port 270263198Srwatson * as destination, as we want to align with what the hardware is 271263198Srwatson * doing. 272263198Srwatson */ 273222748Srwatson switch (pcbinfo->ipi_hashfields) { 274222748Srwatson case IPI_HASHFIELDS_4TUPLE: 275263198Srwatson#ifdef RSS 276263198Srwatson hash = rss_hash_ip4_4tuple(faddr, fport, laddr, lport); 277263198Srwatson#else 278222748Srwatson hash = faddr.s_addr ^ fport; 279263198Srwatson#endif 280222748Srwatson break; 281222748Srwatson 282222748Srwatson case IPI_HASHFIELDS_2TUPLE: 283263198Srwatson#ifdef RSS 284263198Srwatson hash = rss_hash_ip4_2tuple(faddr, laddr); 285263198Srwatson#else 286222748Srwatson hash = faddr.s_addr ^ laddr.s_addr; 287263198Srwatson#endif 288222748Srwatson break; 289222748Srwatson 290222748Srwatson default: 291222748Srwatson hash = 0; 292222748Srwatson } 293222748Srwatson return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo, 294222748Srwatson hash)]); 295222748Srwatson} 296222748Srwatson 297222748Srwatsonstruct inpcbgroup * 298222748Srwatsonin_pcbgroup_byinpcb(struct inpcb *inp) 299222748Srwatson{ 300222748Srwatson 301222748Srwatson return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr, 302222748Srwatson inp->inp_lport, inp->inp_faddr, inp->inp_fport)); 303222748Srwatson} 304222748Srwatson 305222748Srwatsonstatic void 306222748Srwatsonin_pcbwild_add(struct inpcb *inp) 307222748Srwatson{ 308222748Srwatson struct inpcbinfo *pcbinfo; 309222748Srwatson struct inpcbhead *head; 310222748Srwatson u_int pgn; 311222748Srwatson 312222748Srwatson INP_WLOCK_ASSERT(inp); 313222748Srwatson KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD), 314222748Srwatson ("%s: is wild",__func__)); 315222748Srwatson 316222748Srwatson pcbinfo = inp->inp_pcbinfo; 317222748Srwatson for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) 318222748Srwatson INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]); 319222748Srwatson head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport, 320222748Srwatson 0, pcbinfo->ipi_wildmask)]; 321222748Srwatson LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild); 322222748Srwatson inp->inp_flags2 |= INP_PCBGROUPWILD; 323222748Srwatson for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) 324222748Srwatson INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]); 325222748Srwatson} 326222748Srwatson 327222748Srwatsonstatic void 328222748Srwatsonin_pcbwild_remove(struct inpcb *inp) 329222748Srwatson{ 330222748Srwatson struct inpcbinfo *pcbinfo; 331222748Srwatson u_int pgn; 332222748Srwatson 333222748Srwatson INP_WLOCK_ASSERT(inp); 334222748Srwatson KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD), 335222748Srwatson ("%s: not wild", __func__)); 336222748Srwatson 337222748Srwatson pcbinfo = inp->inp_pcbinfo; 338222748Srwatson for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) 339222748Srwatson INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]); 340222748Srwatson LIST_REMOVE(inp, inp_pcbgroup_wild); 341222748Srwatson for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) 342222748Srwatson INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]); 343222748Srwatson inp->inp_flags2 &= ~INP_PCBGROUPWILD; 344222748Srwatson} 345222748Srwatson 346222748Srwatsonstatic __inline int 347222748Srwatsonin_pcbwild_needed(struct inpcb *inp) 348222748Srwatson{ 349222748Srwatson 350222748Srwatson#ifdef INET6 351222748Srwatson if (inp->inp_vflag & INP_IPV6) 352222748Srwatson return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)); 353222748Srwatson else 354222748Srwatson#endif 355222748Srwatson return (inp->inp_faddr.s_addr == htonl(INADDR_ANY)); 356222748Srwatson} 357222748Srwatson 358222748Srwatsonstatic void 359222748Srwatsonin_pcbwild_update_internal(struct inpcb *inp) 360222748Srwatson{ 361222748Srwatson int wildcard_needed; 362222748Srwatson 363222748Srwatson wildcard_needed = in_pcbwild_needed(inp); 364222748Srwatson if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD)) 365222748Srwatson in_pcbwild_add(inp); 366222748Srwatson else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD)) 367222748Srwatson in_pcbwild_remove(inp); 368222748Srwatson} 369222748Srwatson 370222748Srwatson/* 371222748Srwatson * Update the pcbgroup of an inpcb, which might include removing an old 372222748Srwatson * pcbgroup reference and/or adding a new one. Wildcard processing is not 373222748Srwatson * performed here, although ideally we'll never install a pcbgroup for a 374222748Srwatson * wildcard inpcb (asserted below). 375222748Srwatson */ 376222748Srwatsonstatic void 377222748Srwatsonin_pcbgroup_update_internal(struct inpcbinfo *pcbinfo, 378222748Srwatson struct inpcbgroup *newpcbgroup, struct inpcb *inp) 379222748Srwatson{ 380222748Srwatson struct inpcbgroup *oldpcbgroup; 381222748Srwatson struct inpcbhead *pcbhash; 382222748Srwatson uint32_t hashkey_faddr; 383222748Srwatson 384222748Srwatson INP_WLOCK_ASSERT(inp); 385222748Srwatson 386222748Srwatson oldpcbgroup = inp->inp_pcbgroup; 387222748Srwatson if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) { 388222748Srwatson INP_GROUP_LOCK(oldpcbgroup); 389222748Srwatson LIST_REMOVE(inp, inp_pcbgrouphash); 390222748Srwatson inp->inp_pcbgroup = NULL; 391222748Srwatson INP_GROUP_UNLOCK(oldpcbgroup); 392222748Srwatson } 393222748Srwatson if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) { 394222748Srwatson#ifdef INET6 395222748Srwatson if (inp->inp_vflag & INP_IPV6) 396222748Srwatson hashkey_faddr = inp->in6p_faddr.s6_addr32[3]; /* XXX */ 397222748Srwatson else 398222748Srwatson#endif 399222748Srwatson hashkey_faddr = inp->inp_faddr.s_addr; 400222748Srwatson INP_GROUP_LOCK(newpcbgroup); 401222748Srwatson pcbhash = &newpcbgroup->ipg_hashbase[ 402222748Srwatson INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, 403222748Srwatson newpcbgroup->ipg_hashmask)]; 404222748Srwatson LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash); 405222748Srwatson inp->inp_pcbgroup = newpcbgroup; 406222748Srwatson INP_GROUP_UNLOCK(newpcbgroup); 407222748Srwatson } 408222748Srwatson 409222748Srwatson KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)), 410222748Srwatson ("%s: pcbgroup and wildcard!", __func__)); 411222748Srwatson} 412222748Srwatson 413222748Srwatson/* 414222748Srwatson * Two update paths: one in which the 4-tuple on an inpcb has been updated 415222748Srwatson * and therefore connection groups may need to change (or a wildcard entry 416222748Srwatson * may needed to be installed), and another in which the 4-tuple has been 417222748Srwatson * set as a result of a packet received, in which case we may be able to use 418222748Srwatson * the hash on the mbuf to avoid doing a software hash calculation for RSS. 419222748Srwatson * 420222748Srwatson * In each case: first, let the wildcard code have a go at placing it as a 421222748Srwatson * wildcard socket. If it was a wildcard, or if the connection has been 422222748Srwatson * dropped, then no pcbgroup is required (so potentially clear it); 423222748Srwatson * otherwise, calculate and update the pcbgroup for the inpcb. 424222748Srwatson */ 425222748Srwatsonvoid 426222748Srwatsonin_pcbgroup_update(struct inpcb *inp) 427222748Srwatson{ 428222748Srwatson struct inpcbinfo *pcbinfo; 429222748Srwatson struct inpcbgroup *newpcbgroup; 430222748Srwatson 431222748Srwatson INP_WLOCK_ASSERT(inp); 432222748Srwatson 433222748Srwatson pcbinfo = inp->inp_pcbinfo; 434222748Srwatson if (!in_pcbgroup_enabled(pcbinfo)) 435222748Srwatson return; 436222748Srwatson 437222748Srwatson in_pcbwild_update_internal(inp); 438222748Srwatson if (!(inp->inp_flags2 & INP_PCBGROUPWILD) && 439222748Srwatson !(inp->inp_flags & INP_DROPPED)) { 440222748Srwatson#ifdef INET6 441222748Srwatson if (inp->inp_vflag & INP_IPV6) 442222748Srwatson newpcbgroup = in6_pcbgroup_byinpcb(inp); 443222748Srwatson else 444222748Srwatson#endif 445222748Srwatson newpcbgroup = in_pcbgroup_byinpcb(inp); 446222748Srwatson } else 447222748Srwatson newpcbgroup = NULL; 448222748Srwatson in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp); 449222748Srwatson} 450222748Srwatson 451222748Srwatsonvoid 452222748Srwatsonin_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m) 453222748Srwatson{ 454222748Srwatson struct inpcbinfo *pcbinfo; 455222748Srwatson struct inpcbgroup *newpcbgroup; 456222748Srwatson 457222748Srwatson INP_WLOCK_ASSERT(inp); 458222748Srwatson 459222748Srwatson pcbinfo = inp->inp_pcbinfo; 460222748Srwatson if (!in_pcbgroup_enabled(pcbinfo)) 461222748Srwatson return; 462222748Srwatson 463222748Srwatson /* 464222748Srwatson * Possibly should assert !INP_PCBGROUPWILD rather than testing for 465222748Srwatson * it; presumably this function should never be called for anything 466222748Srwatson * other than non-wildcard socket? 467222748Srwatson */ 468222748Srwatson in_pcbwild_update_internal(inp); 469222748Srwatson if (!(inp->inp_flags2 & INP_PCBGROUPWILD) && 470222748Srwatson !(inp->inp_flags & INP_DROPPED)) { 471222748Srwatson newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m); 472222748Srwatson#ifdef INET6 473222748Srwatson if (inp->inp_vflag & INP_IPV6) { 474222748Srwatson if (newpcbgroup == NULL) 475222748Srwatson newpcbgroup = in6_pcbgroup_byinpcb(inp); 476222748Srwatson } else { 477222748Srwatson#endif 478222748Srwatson if (newpcbgroup == NULL) 479222748Srwatson newpcbgroup = in_pcbgroup_byinpcb(inp); 480222748Srwatson#ifdef INET6 481222748Srwatson } 482222748Srwatson#endif 483222748Srwatson } else 484222748Srwatson newpcbgroup = NULL; 485222748Srwatson in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp); 486222748Srwatson} 487222748Srwatson 488222748Srwatson/* 489222748Srwatson * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb. 490222748Srwatson */ 491222748Srwatsonvoid 492222748Srwatsonin_pcbgroup_remove(struct inpcb *inp) 493222748Srwatson{ 494222748Srwatson struct inpcbgroup *pcbgroup; 495222748Srwatson 496222748Srwatson INP_WLOCK_ASSERT(inp); 497222748Srwatson 498222748Srwatson if (!in_pcbgroup_enabled(inp->inp_pcbinfo)) 499222748Srwatson return; 500222748Srwatson 501222748Srwatson if (inp->inp_flags2 & INP_PCBGROUPWILD) 502222748Srwatson in_pcbwild_remove(inp); 503222748Srwatson 504222748Srwatson pcbgroup = inp->inp_pcbgroup; 505222748Srwatson if (pcbgroup != NULL) { 506222748Srwatson INP_GROUP_LOCK(pcbgroup); 507222748Srwatson LIST_REMOVE(inp, inp_pcbgrouphash); 508222748Srwatson inp->inp_pcbgroup = NULL; 509222748Srwatson INP_GROUP_UNLOCK(pcbgroup); 510222748Srwatson } 511222748Srwatson} 512222748Srwatson 513222748Srwatson/* 514222748Srwatson * Query whether or not it is appropriate to use pcbgroups to look up inpcbs 515222748Srwatson * for a protocol. 516222748Srwatson */ 517222748Srwatsonint 518222748Srwatsonin_pcbgroup_enabled(struct inpcbinfo *pcbinfo) 519222748Srwatson{ 520222748Srwatson 521222748Srwatson return (pcbinfo->ipi_npcbgroups > 0); 522222748Srwatson} 523