1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2#ifndef _NET_RPS_H 3#define _NET_RPS_H 4 5#include <linux/types.h> 6#include <linux/static_key.h> 7#include <net/sock.h> 8#include <net/hotdata.h> 9 10#ifdef CONFIG_RPS 11 12extern struct static_key_false rps_needed; 13extern struct static_key_false rfs_needed; 14 15/* 16 * This structure holds an RPS map which can be of variable length. The 17 * map is an array of CPUs. 18 */ 19struct rps_map { 20 unsigned int len; 21 struct rcu_head rcu; 22 u16 cpus[]; 23}; 24#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) 25 26/* 27 * The rps_dev_flow structure contains the mapping of a flow to a CPU, the 28 * tail pointer for that CPU's input queue at the time of last enqueue, and 29 * a hardware filter index. 30 */ 31struct rps_dev_flow { 32 u16 cpu; 33 u16 filter; 34 unsigned int last_qtail; 35}; 36#define RPS_NO_FILTER 0xffff 37 38/* 39 * The rps_dev_flow_table structure contains a table of flow mappings. 40 */ 41struct rps_dev_flow_table { 42 unsigned int mask; 43 struct rcu_head rcu; 44 struct rps_dev_flow flows[]; 45}; 46#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ 47 ((_num) * sizeof(struct rps_dev_flow))) 48 49/* 50 * The rps_sock_flow_table contains mappings of flows to the last CPU 51 * on which they were processed by the application (set in recvmsg). 52 * Each entry is a 32bit value. Upper part is the high-order bits 53 * of flow hash, lower part is CPU number. 54 * rps_cpu_mask is used to partition the space, depending on number of 55 * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 56 * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, 57 * meaning we use 32-6=26 bits for the hash. 58 */ 59struct rps_sock_flow_table { 60 u32 mask; 61 62 u32 ents[] ____cacheline_aligned_in_smp; 63}; 64#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) 65 66#define RPS_NO_CPU 0xffff 67 68static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, 69 u32 hash) 70{ 71 unsigned int index = hash & table->mask; 72 u32 val = hash & ~net_hotdata.rps_cpu_mask; 73 74 /* We only give a hint, preemption can change CPU under us */ 75 val |= raw_smp_processor_id(); 76 77 /* The following WRITE_ONCE() is paired with the READ_ONCE() 78 * here, and another one in get_rps_cpu(). 79 */ 80 if (READ_ONCE(table->ents[index]) != val) 81 WRITE_ONCE(table->ents[index], val); 82} 83 84#endif /* CONFIG_RPS */ 85 86static inline void sock_rps_record_flow_hash(__u32 hash) 87{ 88#ifdef CONFIG_RPS 89 struct rps_sock_flow_table *sock_flow_table; 90 91 if (!hash) 92 return; 93 rcu_read_lock(); 94 sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); 95 if (sock_flow_table) 96 rps_record_sock_flow(sock_flow_table, hash); 97 rcu_read_unlock(); 98#endif 99} 100 101static inline void sock_rps_record_flow(const struct sock *sk) 102{ 103#ifdef CONFIG_RPS 104 if (static_branch_unlikely(&rfs_needed)) { 105 /* Reading sk->sk_rxhash might incur an expensive cache line 106 * miss. 107 * 108 * TCP_ESTABLISHED does cover almost all states where RFS 109 * might be useful, and is cheaper [1] than testing : 110 * IPv4: inet_sk(sk)->inet_daddr 111 * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) 112 * OR an additional socket flag 113 * [1] : sk_state and sk_prot are in the same cache line. 114 */ 115 if (sk->sk_state == TCP_ESTABLISHED) { 116 /* This READ_ONCE() is paired with the WRITE_ONCE() 117 * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). 118 */ 119 sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); 120 } 121 } 122#endif 123} 124 125#endif /* _NET_RPS_H */ 126