1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * IPVS:        Destination Hashing scheduling module
4 *
5 * Authors:     Wensong Zhang <wensong@gnuchina.org>
6 *
7 *              Inspired by the consistent hashing scheduler patch from
8 *              Thomas Proell <proellt@gmx.de>
9 *
10 * Changes:
11 */
12
13/*
14 * The dh algorithm is to select server by the hash key of destination IP
15 * address. The pseudo code is as follows:
16 *
17 *       n <- servernode[dest_ip];
18 *       if (n is dead) OR
19 *          (n is overloaded) OR (n.weight <= 0) then
20 *                 return NULL;
21 *
22 *       return n;
23 *
24 * Notes that servernode is a 256-bucket hash table that maps the hash
25 * index derived from packet destination IP address to the current server
26 * array. If the dh scheduler is used in cache cluster, it is good to
27 * combine it with cache_bypass feature. When the statically assigned
28 * server is dead or overloaded, the load balancer can bypass the cache
29 * server and send requests to the original server directly.
30 *
31 */
32
33#define KMSG_COMPONENT "IPVS"
34#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
35
36#include <linux/ip.h>
37#include <linux/slab.h>
38#include <linux/module.h>
39#include <linux/kernel.h>
40#include <linux/skbuff.h>
41#include <linux/hash.h>
42
43#include <net/ip_vs.h>
44
45
46/*
47 *      IPVS DH bucket
48 */
49struct ip_vs_dh_bucket {
50	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */
51};
52
53/*
54 *     for IPVS DH entry hash table
55 */
56#ifndef CONFIG_IP_VS_DH_TAB_BITS
57#define CONFIG_IP_VS_DH_TAB_BITS        8
58#endif
59#define IP_VS_DH_TAB_BITS               CONFIG_IP_VS_DH_TAB_BITS
60#define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)
61#define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1)
62
63struct ip_vs_dh_state {
64	struct ip_vs_dh_bucket		buckets[IP_VS_DH_TAB_SIZE];
65	struct rcu_head			rcu_head;
66};
67
68/*
69 *	Returns hash value for IPVS DH entry
70 */
71static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
72{
73	__be32 addr_fold = addr->ip;
74
75#ifdef CONFIG_IP_VS_IPV6
76	if (af == AF_INET6)
77		addr_fold = addr->ip6[0]^addr->ip6[1]^
78			    addr->ip6[2]^addr->ip6[3];
79#endif
80	return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
81}
82
83
84/*
85 *      Get ip_vs_dest associated with supplied parameters.
86 */
87static inline struct ip_vs_dest *
88ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
89{
90	return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
91}
92
93
94/*
95 *      Assign all the hash buckets of the specified table with the service.
96 */
97static int
98ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
99{
100	int i;
101	struct ip_vs_dh_bucket *b;
102	struct list_head *p;
103	struct ip_vs_dest *dest;
104	bool empty;
105
106	b = &s->buckets[0];
107	p = &svc->destinations;
108	empty = list_empty(p);
109	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
110		dest = rcu_dereference_protected(b->dest, 1);
111		if (dest)
112			ip_vs_dest_put(dest);
113		if (empty)
114			RCU_INIT_POINTER(b->dest, NULL);
115		else {
116			if (p == &svc->destinations)
117				p = p->next;
118
119			dest = list_entry(p, struct ip_vs_dest, n_list);
120			ip_vs_dest_hold(dest);
121			RCU_INIT_POINTER(b->dest, dest);
122
123			p = p->next;
124		}
125		b++;
126	}
127	return 0;
128}
129
130
131/*
132 *      Flush all the hash buckets of the specified table.
133 */
134static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
135{
136	int i;
137	struct ip_vs_dh_bucket *b;
138	struct ip_vs_dest *dest;
139
140	b = &s->buckets[0];
141	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
142		dest = rcu_dereference_protected(b->dest, 1);
143		if (dest) {
144			ip_vs_dest_put(dest);
145			RCU_INIT_POINTER(b->dest, NULL);
146		}
147		b++;
148	}
149}
150
151
152static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
153{
154	struct ip_vs_dh_state *s;
155
156	/* allocate the DH table for this service */
157	s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
158	if (s == NULL)
159		return -ENOMEM;
160
161	svc->sched_data = s;
162	IP_VS_DBG(6, "DH hash table (memory=%zdbytes) allocated for "
163		  "current service\n",
164		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
165
166	/* assign the hash buckets with current dests */
167	ip_vs_dh_reassign(s, svc);
168
169	return 0;
170}
171
172
173static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
174{
175	struct ip_vs_dh_state *s = svc->sched_data;
176
177	/* got to clean up hash buckets here */
178	ip_vs_dh_flush(s);
179
180	/* release the table itself */
181	kfree_rcu(s, rcu_head);
182	IP_VS_DBG(6, "DH hash table (memory=%zdbytes) released\n",
183		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
184}
185
186
187static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
188				 struct ip_vs_dest *dest)
189{
190	struct ip_vs_dh_state *s = svc->sched_data;
191
192	/* assign the hash buckets with the updated service */
193	ip_vs_dh_reassign(s, svc);
194
195	return 0;
196}
197
198
199/*
200 *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
201 *      consider that the server is overloaded here.
202 */
203static inline int is_overloaded(struct ip_vs_dest *dest)
204{
205	return dest->flags & IP_VS_DEST_F_OVERLOAD;
206}
207
208
209/*
210 *      Destination hashing scheduling
211 */
212static struct ip_vs_dest *
213ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
214		  struct ip_vs_iphdr *iph)
215{
216	struct ip_vs_dest *dest;
217	struct ip_vs_dh_state *s;
218
219	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
220
221	s = (struct ip_vs_dh_state *) svc->sched_data;
222	dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
223	if (!dest
224	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
225	    || atomic_read(&dest->weight) <= 0
226	    || is_overloaded(dest)) {
227		ip_vs_scheduler_err(svc, "no destination available");
228		return NULL;
229	}
230
231	IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
232		      IP_VS_DBG_ADDR(svc->af, &iph->daddr),
233		      IP_VS_DBG_ADDR(dest->af, &dest->addr),
234		      ntohs(dest->port));
235
236	return dest;
237}
238
239
240/*
241 *      IPVS DH Scheduler structure
242 */
243static struct ip_vs_scheduler ip_vs_dh_scheduler =
244{
245	.name =			"dh",
246	.refcnt =		ATOMIC_INIT(0),
247	.module =		THIS_MODULE,
248	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
249	.init_service =		ip_vs_dh_init_svc,
250	.done_service =		ip_vs_dh_done_svc,
251	.add_dest =		ip_vs_dh_dest_changed,
252	.del_dest =		ip_vs_dh_dest_changed,
253	.schedule =		ip_vs_dh_schedule,
254};
255
256
257static int __init ip_vs_dh_init(void)
258{
259	return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
260}
261
262
263static void __exit ip_vs_dh_cleanup(void)
264{
265	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
266	synchronize_rcu();
267}
268
269
270module_init(ip_vs_dh_init);
271module_exit(ip_vs_dh_cleanup);
272MODULE_LICENSE("GPL");
273MODULE_DESCRIPTION("ipvs destination hashing scheduler");
274