1/* 2 * IPVS: Source Hashing scheduling module 3 * 4 * Version: $Id: ip_vs_sh.c,v 1.1.1.1 2007/08/03 18:53:52 Exp $ 5 * 6 * Authors: Wensong Zhang <wensong@gnuchina.org> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 * Changes: 14 * 15 */ 16 17/* 18 * The sh algorithm is to select server by the hash key of source IP 19 * address. The pseudo code is as follows: 20 * 21 * n <- servernode[src_ip]; 22 * if (n is dead) OR 23 * (n is overloaded) or (n.weight <= 0) then 24 * return NULL; 25 * 26 * return n; 27 * 28 * Notes that servernode is a 256-bucket hash table that maps the hash 29 * index derived from packet source IP address to the current server 30 * array. If the sh scheduler is used in cache cluster, it is good to 31 * combine it with cache_bypass feature. When the statically assigned 32 * server is dead or overloaded, the load balancer can bypass the cache 33 * server and send requests to the original server directly. 34 * 35 */ 36 37#include <linux/ip.h> 38#include <linux/module.h> 39#include <linux/kernel.h> 40#include <linux/skbuff.h> 41 42#include <net/ip_vs.h> 43 44 45/* 46 * IPVS SH bucket 47 */ 48struct ip_vs_sh_bucket { 49 struct ip_vs_dest *dest; /* real server (cache) */ 50}; 51 52/* 53 * for IPVS SH entry hash table 54 */ 55#ifndef CONFIG_IP_VS_SH_TAB_BITS 56#define CONFIG_IP_VS_SH_TAB_BITS 8 57#endif 58#define IP_VS_SH_TAB_BITS CONFIG_IP_VS_SH_TAB_BITS 59#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS) 60#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) 61 62 63/* 64 * Returns hash value for IPVS SH entry 65 */ 66static inline unsigned ip_vs_sh_hashkey(__be32 addr) 67{ 68 return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK; 69} 70 71 72/* 73 * Get ip_vs_dest associated with supplied parameters. 74 */ 75static inline struct ip_vs_dest * 76ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr) 77{ 78 return (tbl[ip_vs_sh_hashkey(addr)]).dest; 79} 80 81 82/* 83 * Assign all the hash buckets of the specified table with the service. 84 */ 85static int 86ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) 87{ 88 int i; 89 struct ip_vs_sh_bucket *b; 90 struct list_head *p; 91 struct ip_vs_dest *dest; 92 93 b = tbl; 94 p = &svc->destinations; 95 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { 96 if (list_empty(p)) { 97 b->dest = NULL; 98 } else { 99 if (p == &svc->destinations) 100 p = p->next; 101 102 dest = list_entry(p, struct ip_vs_dest, n_list); 103 atomic_inc(&dest->refcnt); 104 b->dest = dest; 105 106 p = p->next; 107 } 108 b++; 109 } 110 return 0; 111} 112 113 114/* 115 * Flush all the hash buckets of the specified table. 116 */ 117static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl) 118{ 119 int i; 120 struct ip_vs_sh_bucket *b; 121 122 b = tbl; 123 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { 124 if (b->dest) { 125 atomic_dec(&b->dest->refcnt); 126 b->dest = NULL; 127 } 128 b++; 129 } 130} 131 132 133static int ip_vs_sh_init_svc(struct ip_vs_service *svc) 134{ 135 struct ip_vs_sh_bucket *tbl; 136 137 /* allocate the SH table for this service */ 138 tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, 139 GFP_ATOMIC); 140 if (tbl == NULL) { 141 IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n"); 142 return -ENOMEM; 143 } 144 svc->sched_data = tbl; 145 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for " 146 "current service\n", 147 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); 148 149 /* assign the hash buckets with the updated service */ 150 ip_vs_sh_assign(tbl, svc); 151 152 return 0; 153} 154 155 156static int ip_vs_sh_done_svc(struct ip_vs_service *svc) 157{ 158 struct ip_vs_sh_bucket *tbl = svc->sched_data; 159 160 /* got to clean up hash buckets here */ 161 ip_vs_sh_flush(tbl); 162 163 /* release the table itself */ 164 kfree(svc->sched_data); 165 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", 166 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); 167 168 return 0; 169} 170 171 172static int ip_vs_sh_update_svc(struct ip_vs_service *svc) 173{ 174 struct ip_vs_sh_bucket *tbl = svc->sched_data; 175 176 /* got to clean up hash buckets here */ 177 ip_vs_sh_flush(tbl); 178 179 /* assign the hash buckets with the updated service */ 180 ip_vs_sh_assign(tbl, svc); 181 182 return 0; 183} 184 185 186/* 187 * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, 188 * consider that the server is overloaded here. 189 */ 190static inline int is_overloaded(struct ip_vs_dest *dest) 191{ 192 return dest->flags & IP_VS_DEST_F_OVERLOAD; 193} 194 195 196/* 197 * Source Hashing scheduling 198 */ 199static struct ip_vs_dest * 200ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 201{ 202 struct ip_vs_dest *dest; 203 struct ip_vs_sh_bucket *tbl; 204 struct iphdr *iph = ip_hdr(skb); 205 206 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 207 208 tbl = (struct ip_vs_sh_bucket *)svc->sched_data; 209 dest = ip_vs_sh_get(tbl, iph->saddr); 210 if (!dest 211 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 212 || atomic_read(&dest->weight) <= 0 213 || is_overloaded(dest)) { 214 return NULL; 215 } 216 217 IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u " 218 "--> server %u.%u.%u.%u:%d\n", 219 NIPQUAD(iph->saddr), 220 NIPQUAD(dest->addr), 221 ntohs(dest->port)); 222 223 return dest; 224} 225 226 227/* 228 * IPVS SH Scheduler structure 229 */ 230static struct ip_vs_scheduler ip_vs_sh_scheduler = 231{ 232 .name = "sh", 233 .refcnt = ATOMIC_INIT(0), 234 .module = THIS_MODULE, 235 .init_service = ip_vs_sh_init_svc, 236 .done_service = ip_vs_sh_done_svc, 237 .update_service = ip_vs_sh_update_svc, 238 .schedule = ip_vs_sh_schedule, 239}; 240 241 242static int __init ip_vs_sh_init(void) 243{ 244 INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list); 245 return register_ip_vs_scheduler(&ip_vs_sh_scheduler); 246} 247 248 249static void __exit ip_vs_sh_cleanup(void) 250{ 251 unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); 252} 253 254 255module_init(ip_vs_sh_init); 256module_exit(ip_vs_sh_cleanup); 257MODULE_LICENSE("GPL"); 258