1/* 2 * IPVS: Destination Hashing scheduling module 3 * 4 * Version: $Id: ip_vs_dh.c,v 1.1.1.1 2007/08/03 18:53:51 Exp $ 5 * 6 * Authors: Wensong Zhang <wensong@gnuchina.org> 7 * 8 * Inspired by the consistent hashing scheduler patch from 9 * Thomas Proell <proellt@gmx.de> 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License 13 * as published by the Free Software Foundation; either version 14 * 2 of the License, or (at your option) any later version. 15 * 16 * Changes: 17 * 18 */ 19 20/* 21 * The dh algorithm is to select server by the hash key of destination IP 22 * address. The pseudo code is as follows: 23 * 24 * n <- servernode[dest_ip]; 25 * if (n is dead) OR 26 * (n is overloaded) OR (n.weight <= 0) then 27 * return NULL; 28 * 29 * return n; 30 * 31 * Notes that servernode is a 256-bucket hash table that maps the hash 32 * index derived from packet destination IP address to the current server 33 * array. If the dh scheduler is used in cache cluster, it is good to 34 * combine it with cache_bypass feature. When the statically assigned 35 * server is dead or overloaded, the load balancer can bypass the cache 36 * server and send requests to the original server directly. 37 * 38 */ 39 40#include <linux/ip.h> 41#include <linux/module.h> 42#include <linux/kernel.h> 43#include <linux/skbuff.h> 44 45#include <net/ip_vs.h> 46 47 48/* 49 * IPVS DH bucket 50 */ 51struct ip_vs_dh_bucket { 52 struct ip_vs_dest *dest; /* real server (cache) */ 53}; 54 55/* 56 * for IPVS DH entry hash table 57 */ 58#ifndef CONFIG_IP_VS_DH_TAB_BITS 59#define CONFIG_IP_VS_DH_TAB_BITS 8 60#endif 61#define IP_VS_DH_TAB_BITS CONFIG_IP_VS_DH_TAB_BITS 62#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS) 63#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1) 64 65 66/* 67 * Returns hash value for IPVS DH entry 68 */ 69static inline unsigned ip_vs_dh_hashkey(__be32 addr) 70{ 71 return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK; 72} 73 74 75/* 76 * Get ip_vs_dest associated with supplied parameters. 77 */ 78static inline struct ip_vs_dest * 79ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr) 80{ 81 return (tbl[ip_vs_dh_hashkey(addr)]).dest; 82} 83 84 85/* 86 * Assign all the hash buckets of the specified table with the service. 87 */ 88static int 89ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) 90{ 91 int i; 92 struct ip_vs_dh_bucket *b; 93 struct list_head *p; 94 struct ip_vs_dest *dest; 95 96 b = tbl; 97 p = &svc->destinations; 98 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { 99 if (list_empty(p)) { 100 b->dest = NULL; 101 } else { 102 if (p == &svc->destinations) 103 p = p->next; 104 105 dest = list_entry(p, struct ip_vs_dest, n_list); 106 atomic_inc(&dest->refcnt); 107 b->dest = dest; 108 109 p = p->next; 110 } 111 b++; 112 } 113 return 0; 114} 115 116 117/* 118 * Flush all the hash buckets of the specified table. 119 */ 120static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) 121{ 122 int i; 123 struct ip_vs_dh_bucket *b; 124 125 b = tbl; 126 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { 127 if (b->dest) { 128 atomic_dec(&b->dest->refcnt); 129 b->dest = NULL; 130 } 131 b++; 132 } 133} 134 135 136static int ip_vs_dh_init_svc(struct ip_vs_service *svc) 137{ 138 struct ip_vs_dh_bucket *tbl; 139 140 /* allocate the DH table for this service */ 141 tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, 142 GFP_ATOMIC); 143 if (tbl == NULL) { 144 IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n"); 145 return -ENOMEM; 146 } 147 svc->sched_data = tbl; 148 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " 149 "current service\n", 150 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); 151 152 /* assign the hash buckets with the updated service */ 153 ip_vs_dh_assign(tbl, svc); 154 155 return 0; 156} 157 158 159static int ip_vs_dh_done_svc(struct ip_vs_service *svc) 160{ 161 struct ip_vs_dh_bucket *tbl = svc->sched_data; 162 163 /* got to clean up hash buckets here */ 164 ip_vs_dh_flush(tbl); 165 166 /* release the table itself */ 167 kfree(svc->sched_data); 168 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", 169 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); 170 171 return 0; 172} 173 174 175static int ip_vs_dh_update_svc(struct ip_vs_service *svc) 176{ 177 struct ip_vs_dh_bucket *tbl = svc->sched_data; 178 179 /* got to clean up hash buckets here */ 180 ip_vs_dh_flush(tbl); 181 182 /* assign the hash buckets with the updated service */ 183 ip_vs_dh_assign(tbl, svc); 184 185 return 0; 186} 187 188 189/* 190 * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, 191 * consider that the server is overloaded here. 192 */ 193static inline int is_overloaded(struct ip_vs_dest *dest) 194{ 195 return dest->flags & IP_VS_DEST_F_OVERLOAD; 196} 197 198 199/* 200 * Destination hashing scheduling 201 */ 202static struct ip_vs_dest * 203ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 204{ 205 struct ip_vs_dest *dest; 206 struct ip_vs_dh_bucket *tbl; 207 struct iphdr *iph = ip_hdr(skb); 208 209 IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n"); 210 211 tbl = (struct ip_vs_dh_bucket *)svc->sched_data; 212 dest = ip_vs_dh_get(tbl, iph->daddr); 213 if (!dest 214 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 215 || atomic_read(&dest->weight) <= 0 216 || is_overloaded(dest)) { 217 return NULL; 218 } 219 220 IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u " 221 "--> server %u.%u.%u.%u:%d\n", 222 NIPQUAD(iph->daddr), 223 NIPQUAD(dest->addr), 224 ntohs(dest->port)); 225 226 return dest; 227} 228 229 230/* 231 * IPVS DH Scheduler structure 232 */ 233static struct ip_vs_scheduler ip_vs_dh_scheduler = 234{ 235 .name = "dh", 236 .refcnt = ATOMIC_INIT(0), 237 .module = THIS_MODULE, 238 .init_service = ip_vs_dh_init_svc, 239 .done_service = ip_vs_dh_done_svc, 240 .update_service = ip_vs_dh_update_svc, 241 .schedule = ip_vs_dh_schedule, 242}; 243 244 245static int __init ip_vs_dh_init(void) 246{ 247 INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list); 248 return register_ip_vs_scheduler(&ip_vs_dh_scheduler); 249} 250 251 252static void __exit ip_vs_dh_cleanup(void) 253{ 254 unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); 255} 256 257 258module_init(ip_vs_dh_init); 259module_exit(ip_vs_dh_cleanup); 260MODULE_LICENSE("GPL"); 261