1/*
2 * IPVS:        Source Hashing scheduling module
3 *
4 * Version:     $Id: ip_vs_sh.c,v 1.1.1.1 2007/08/03 18:53:52 Exp $
5 *
6 * Authors:     Wensong Zhang <wensong@gnuchina.org>
7 *
8 *              This program is free software; you can redistribute it and/or
9 *              modify it under the terms of the GNU General Public License
10 *              as published by the Free Software Foundation; either version
11 *              2 of the License, or (at your option) any later version.
12 *
13 * Changes:
14 *
15 */
16
17/*
18 * The sh algorithm is to select server by the hash key of source IP
19 * address. The pseudo code is as follows:
20 *
21 *       n <- servernode[src_ip];
22 *       if (n is dead) OR
23 *          (n is overloaded) or (n.weight <= 0) then
24 *                 return NULL;
25 *
26 *       return n;
27 *
28 * Notes that servernode is a 256-bucket hash table that maps the hash
29 * index derived from packet source IP address to the current server
30 * array. If the sh scheduler is used in cache cluster, it is good to
31 * combine it with cache_bypass feature. When the statically assigned
32 * server is dead or overloaded, the load balancer can bypass the cache
33 * server and send requests to the original server directly.
34 *
35 */
36
37#include <linux/ip.h>
38#include <linux/module.h>
39#include <linux/kernel.h>
40#include <linux/skbuff.h>
41
42#include <net/ip_vs.h>
43
44
45/*
46 *      IPVS SH bucket
47 */
48struct ip_vs_sh_bucket {
49	struct ip_vs_dest       *dest;          /* real server (cache) */
50};
51
52/*
53 *     for IPVS SH entry hash table
54 */
55#ifndef CONFIG_IP_VS_SH_TAB_BITS
56#define CONFIG_IP_VS_SH_TAB_BITS        8
57#endif
58#define IP_VS_SH_TAB_BITS               CONFIG_IP_VS_SH_TAB_BITS
59#define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)
60#define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1)
61
62
63/*
64 *	Returns hash value for IPVS SH entry
65 */
66static inline unsigned ip_vs_sh_hashkey(__be32 addr)
67{
68	return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
69}
70
71
72/*
73 *      Get ip_vs_dest associated with supplied parameters.
74 */
75static inline struct ip_vs_dest *
76ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr)
77{
78	return (tbl[ip_vs_sh_hashkey(addr)]).dest;
79}
80
81
82/*
83 *      Assign all the hash buckets of the specified table with the service.
84 */
85static int
86ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
87{
88	int i;
89	struct ip_vs_sh_bucket *b;
90	struct list_head *p;
91	struct ip_vs_dest *dest;
92
93	b = tbl;
94	p = &svc->destinations;
95	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
96		if (list_empty(p)) {
97			b->dest = NULL;
98		} else {
99			if (p == &svc->destinations)
100				p = p->next;
101
102			dest = list_entry(p, struct ip_vs_dest, n_list);
103			atomic_inc(&dest->refcnt);
104			b->dest = dest;
105
106			p = p->next;
107		}
108		b++;
109	}
110	return 0;
111}
112
113
114/*
115 *      Flush all the hash buckets of the specified table.
116 */
117static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
118{
119	int i;
120	struct ip_vs_sh_bucket *b;
121
122	b = tbl;
123	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
124		if (b->dest) {
125			atomic_dec(&b->dest->refcnt);
126			b->dest = NULL;
127		}
128		b++;
129	}
130}
131
132
133static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
134{
135	struct ip_vs_sh_bucket *tbl;
136
137	/* allocate the SH table for this service */
138	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
139		      GFP_ATOMIC);
140	if (tbl == NULL) {
141		IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
142		return -ENOMEM;
143	}
144	svc->sched_data = tbl;
145	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
146		  "current service\n",
147		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
148
149	/* assign the hash buckets with the updated service */
150	ip_vs_sh_assign(tbl, svc);
151
152	return 0;
153}
154
155
156static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
157{
158	struct ip_vs_sh_bucket *tbl = svc->sched_data;
159
160	/* got to clean up hash buckets here */
161	ip_vs_sh_flush(tbl);
162
163	/* release the table itself */
164	kfree(svc->sched_data);
165	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
166		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
167
168	return 0;
169}
170
171
172static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
173{
174	struct ip_vs_sh_bucket *tbl = svc->sched_data;
175
176	/* got to clean up hash buckets here */
177	ip_vs_sh_flush(tbl);
178
179	/* assign the hash buckets with the updated service */
180	ip_vs_sh_assign(tbl, svc);
181
182	return 0;
183}
184
185
186/*
187 *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
188 *      consider that the server is overloaded here.
189 */
190static inline int is_overloaded(struct ip_vs_dest *dest)
191{
192	return dest->flags & IP_VS_DEST_F_OVERLOAD;
193}
194
195
196/*
197 *      Source Hashing scheduling
198 */
199static struct ip_vs_dest *
200ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
201{
202	struct ip_vs_dest *dest;
203	struct ip_vs_sh_bucket *tbl;
204	struct iphdr *iph = ip_hdr(skb);
205
206	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
207
208	tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
209	dest = ip_vs_sh_get(tbl, iph->saddr);
210	if (!dest
211	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
212	    || atomic_read(&dest->weight) <= 0
213	    || is_overloaded(dest)) {
214		return NULL;
215	}
216
217	IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
218		  "--> server %u.%u.%u.%u:%d\n",
219		  NIPQUAD(iph->saddr),
220		  NIPQUAD(dest->addr),
221		  ntohs(dest->port));
222
223	return dest;
224}
225
226
227/*
228 *      IPVS SH Scheduler structure
229 */
230static struct ip_vs_scheduler ip_vs_sh_scheduler =
231{
232	.name =			"sh",
233	.refcnt =		ATOMIC_INIT(0),
234	.module =		THIS_MODULE,
235	.init_service =		ip_vs_sh_init_svc,
236	.done_service =		ip_vs_sh_done_svc,
237	.update_service =	ip_vs_sh_update_svc,
238	.schedule =		ip_vs_sh_schedule,
239};
240
241
242static int __init ip_vs_sh_init(void)
243{
244	INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list);
245	return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
246}
247
248
249static void __exit ip_vs_sh_cleanup(void)
250{
251	unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
252}
253
254
255module_init(ip_vs_sh_init);
256module_exit(ip_vs_sh_cleanup);
257MODULE_LICENSE("GPL");
258