1219937Sedwin// SPDX-License-Identifier: GPL-2.0-or-later 2219937Sedwin/* 3219937Sedwin * ip_vs_proto.c: transport protocol load balancing support for IPVS 4219937Sedwin * 5219937Sedwin * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 6219937Sedwin * Julian Anastasov <ja@ssi.bg> 7219937Sedwin * 8219937Sedwin * Changes: 9219937Sedwin */ 10219937Sedwin 11219937Sedwin#define KMSG_COMPONENT "IPVS" 12219937Sedwin#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 13219937Sedwin 14#include <linux/module.h> 15#include <linux/kernel.h> 16#include <linux/skbuff.h> 17#include <linux/gfp.h> 18#include <linux/in.h> 19#include <linux/ip.h> 20#include <net/protocol.h> 21#include <net/tcp.h> 22#include <net/udp.h> 23#include <linux/stat.h> 24#include <linux/proc_fs.h> 25 26#include <net/ip_vs.h> 27 28 29/* 30 * IPVS protocols can only be registered/unregistered when the ipvs 31 * module is loaded/unloaded, so no lock is needed in accessing the 32 * ipvs protocol table. 33 */ 34 35#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ 36#define IP_VS_PROTO_HASH(proto) ((proto) & (IP_VS_PROTO_TAB_SIZE-1)) 37 38static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; 39 40/* States for conn templates: NONE or words separated with ",", max 15 chars */ 41static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = { 42 [IP_VS_CTPL_S_NONE] = "NONE", 43 [IP_VS_CTPL_S_ASSURED] = "ASSURED", 44}; 45 46/* 47 * register an ipvs protocol 48 */ 49static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) 50{ 51 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 52 53 pp->next = ip_vs_proto_table[hash]; 54 ip_vs_proto_table[hash] = pp; 55 56 if (pp->init != NULL) 57 pp->init(pp); 58 59 return 0; 60} 61 62/* 63 * register an ipvs protocols netns related data 64 */ 65static int 66register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp) 67{ 68 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 69 struct ip_vs_proto_data *pd = 70 kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); 71 72 if (!pd) 73 return -ENOMEM; 74 75 pd->pp = pp; /* For speed issues */ 76 pd->next = ipvs->proto_data_table[hash]; 77 ipvs->proto_data_table[hash] = pd; 78 atomic_set(&pd->appcnt, 0); /* Init app counter */ 79 80 if (pp->init_netns != NULL) { 81 int ret = pp->init_netns(ipvs, pd); 82 if (ret) { 83 /* unlink an free proto data */ 84 ipvs->proto_data_table[hash] = pd->next; 85 kfree(pd); 86 return ret; 87 } 88 } 89 90 return 0; 91} 92 93/* 94 * unregister an ipvs protocol 95 */ 96static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) 97{ 98 struct ip_vs_protocol **pp_p; 99 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 100 101 pp_p = &ip_vs_proto_table[hash]; 102 for (; *pp_p; pp_p = &(*pp_p)->next) { 103 if (*pp_p == pp) { 104 *pp_p = pp->next; 105 if (pp->exit != NULL) 106 pp->exit(pp); 107 return 0; 108 } 109 } 110 111 return -ESRCH; 112} 113 114/* 115 * unregister an ipvs protocols netns data 116 */ 117static int 118unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) 119{ 120 struct ip_vs_proto_data **pd_p; 121 unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); 122 123 pd_p = &ipvs->proto_data_table[hash]; 124 for (; *pd_p; pd_p = &(*pd_p)->next) { 125 if (*pd_p == pd) { 126 *pd_p = pd->next; 127 if (pd->pp->exit_netns != NULL) 128 pd->pp->exit_netns(ipvs, pd); 129 kfree(pd); 130 return 0; 131 } 132 } 133 134 return -ESRCH; 135} 136 137/* 138 * get ip_vs_protocol object by its proto. 139 */ 140struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) 141{ 142 struct ip_vs_protocol *pp; 143 unsigned int hash = IP_VS_PROTO_HASH(proto); 144 145 for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) { 146 if (pp->protocol == proto) 147 return pp; 148 } 149 150 return NULL; 151} 152EXPORT_SYMBOL(ip_vs_proto_get); 153 154/* 155 * get ip_vs_protocol object data by netns and proto 156 */ 157struct ip_vs_proto_data * 158ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) 159{ 160 struct ip_vs_proto_data *pd; 161 unsigned int hash = IP_VS_PROTO_HASH(proto); 162 163 for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { 164 if (pd->pp->protocol == proto) 165 return pd; 166 } 167 168 return NULL; 169} 170EXPORT_SYMBOL(ip_vs_proto_data_get); 171 172/* 173 * Propagate event for state change to all protocols 174 */ 175void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) 176{ 177 struct ip_vs_proto_data *pd; 178 int i; 179 180 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 181 for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { 182 if (pd->pp->timeout_change) 183 pd->pp->timeout_change(pd, flags); 184 } 185 } 186} 187 188 189int * 190ip_vs_create_timeout_table(int *table, int size) 191{ 192 return kmemdup(table, size, GFP_KERNEL); 193} 194 195 196const char *ip_vs_state_name(const struct ip_vs_conn *cp) 197{ 198 unsigned int state = cp->state; 199 struct ip_vs_protocol *pp; 200 201 if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 202 203 if (state >= IP_VS_CTPL_S_LAST) 204 return "ERR!"; 205 return ip_vs_ctpl_state_name_table[state] ? : "?"; 206 } 207 pp = ip_vs_proto_get(cp->protocol); 208 if (pp == NULL || pp->state_name == NULL) 209 return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!"; 210 return pp->state_name(state); 211} 212 213 214static void 215ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, 216 const struct sk_buff *skb, 217 int offset, 218 const char *msg) 219{ 220 char buf[128]; 221 struct iphdr _iph, *ih; 222 223 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 224 if (ih == NULL) 225 sprintf(buf, "TRUNCATED"); 226 else if (ih->frag_off & htons(IP_OFFSET)) 227 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); 228 else { 229 __be16 _ports[2], *pptr; 230 231 pptr = skb_header_pointer(skb, offset + ih->ihl*4, 232 sizeof(_ports), _ports); 233 if (pptr == NULL) 234 sprintf(buf, "TRUNCATED %pI4->%pI4", 235 &ih->saddr, &ih->daddr); 236 else 237 sprintf(buf, "%pI4:%u->%pI4:%u", 238 &ih->saddr, ntohs(pptr[0]), 239 &ih->daddr, ntohs(pptr[1])); 240 } 241 242 pr_debug("%s: %s %s\n", msg, pp->name, buf); 243} 244 245#ifdef CONFIG_IP_VS_IPV6 246static void 247ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, 248 const struct sk_buff *skb, 249 int offset, 250 const char *msg) 251{ 252 char buf[192]; 253 struct ipv6hdr _iph, *ih; 254 255 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 256 if (ih == NULL) 257 sprintf(buf, "TRUNCATED"); 258 else if (ih->nexthdr == IPPROTO_FRAGMENT) 259 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr); 260 else { 261 __be16 _ports[2], *pptr; 262 263 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 264 sizeof(_ports), _ports); 265 if (pptr == NULL) 266 sprintf(buf, "TRUNCATED %pI6c->%pI6c", 267 &ih->saddr, &ih->daddr); 268 else 269 sprintf(buf, "%pI6c:%u->%pI6c:%u", 270 &ih->saddr, ntohs(pptr[0]), 271 &ih->daddr, ntohs(pptr[1])); 272 } 273 274 pr_debug("%s: %s %s\n", msg, pp->name, buf); 275} 276#endif 277 278 279void 280ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, 281 const struct sk_buff *skb, 282 int offset, 283 const char *msg) 284{ 285#ifdef CONFIG_IP_VS_IPV6 286 if (af == AF_INET6) 287 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); 288 else 289#endif 290 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); 291} 292 293/* 294 * per network name-space init 295 */ 296int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs) 297{ 298 int i, ret; 299 static struct ip_vs_protocol *protos[] = { 300#ifdef CONFIG_IP_VS_PROTO_TCP 301 &ip_vs_protocol_tcp, 302#endif 303#ifdef CONFIG_IP_VS_PROTO_UDP 304 &ip_vs_protocol_udp, 305#endif 306#ifdef CONFIG_IP_VS_PROTO_SCTP 307 &ip_vs_protocol_sctp, 308#endif 309#ifdef CONFIG_IP_VS_PROTO_AH 310 &ip_vs_protocol_ah, 311#endif 312#ifdef CONFIG_IP_VS_PROTO_ESP 313 &ip_vs_protocol_esp, 314#endif 315 }; 316 317 for (i = 0; i < ARRAY_SIZE(protos); i++) { 318 ret = register_ip_vs_proto_netns(ipvs, protos[i]); 319 if (ret < 0) 320 goto cleanup; 321 } 322 return 0; 323 324cleanup: 325 ip_vs_protocol_net_cleanup(ipvs); 326 return ret; 327} 328 329void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs) 330{ 331 struct ip_vs_proto_data *pd; 332 int i; 333 334 /* unregister all the ipvs proto data for this netns */ 335 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 336 while ((pd = ipvs->proto_data_table[i]) != NULL) 337 unregister_ip_vs_proto_netns(ipvs, pd); 338 } 339} 340 341int __init ip_vs_protocol_init(void) 342{ 343 char protocols[64]; 344#define REGISTER_PROTOCOL(p) \ 345 do { \ 346 register_ip_vs_protocol(p); \ 347 strcat(protocols, ", "); \ 348 strcat(protocols, (p)->name); \ 349 } while (0) 350 351 protocols[0] = '\0'; 352 protocols[2] = '\0'; 353#ifdef CONFIG_IP_VS_PROTO_TCP 354 REGISTER_PROTOCOL(&ip_vs_protocol_tcp); 355#endif 356#ifdef CONFIG_IP_VS_PROTO_UDP 357 REGISTER_PROTOCOL(&ip_vs_protocol_udp); 358#endif 359#ifdef CONFIG_IP_VS_PROTO_SCTP 360 REGISTER_PROTOCOL(&ip_vs_protocol_sctp); 361#endif 362#ifdef CONFIG_IP_VS_PROTO_AH 363 REGISTER_PROTOCOL(&ip_vs_protocol_ah); 364#endif 365#ifdef CONFIG_IP_VS_PROTO_ESP 366 REGISTER_PROTOCOL(&ip_vs_protocol_esp); 367#endif 368 pr_info("Registered protocols (%s)\n", &protocols[2]); 369 370 return 0; 371} 372 373 374void ip_vs_protocol_cleanup(void) 375{ 376 struct ip_vs_protocol *pp; 377 int i; 378 379 /* unregister all the ipvs protocols */ 380 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 381 while ((pp = ip_vs_proto_table[i]) != NULL) 382 unregister_ip_vs_protocol(pp); 383 } 384} 385