1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ 3 4#ifndef _IP_SET_HASH_GEN_H 5#define _IP_SET_HASH_GEN_H 6 7#include <linux/rcupdate.h> 8#include <linux/rcupdate_wait.h> 9#include <linux/jhash.h> 10#include <linux/types.h> 11#include <linux/netfilter/nfnetlink.h> 12#include <linux/netfilter/ipset/ip_set.h> 13 14#define __ipset_dereference(p) \ 15 rcu_dereference_protected(p, 1) 16#define ipset_dereference_nfnl(p) \ 17 rcu_dereference_protected(p, \ 18 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) 19#define ipset_dereference_set(p, set) \ 20 rcu_dereference_protected(p, \ 21 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ 22 lockdep_is_held(&(set)->lock)) 23#define ipset_dereference_bh_nfnl(p) \ 24 rcu_dereference_bh_check(p, \ 25 lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) 26 27/* Hashing which uses arrays to resolve clashing. The hash table is resized 28 * (doubled) when searching becomes too long. 29 * Internally jhash is used with the assumption that the size of the 30 * stored data is a multiple of sizeof(u32). 31 * 32 * Readers and resizing 33 * 34 * Resizing can be triggered by userspace command only, and those 35 * are serialized by the nfnl mutex. During resizing the set is 36 * read-locked, so the only possible concurrent operations are 37 * the kernel side readers. Those must be protected by proper RCU locking. 38 */ 39 40/* Number of elements to store in an initial array block */ 41#define AHASH_INIT_SIZE 2 42/* Max number of elements to store in an array block */ 43#define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE) 44/* Max muber of elements in the array block when tuned */ 45#define AHASH_MAX_TUNED 64 46#define AHASH_MAX(h) ((h)->bucketsize) 47 48/* A hash bucket */ 49struct hbucket { 50 struct rcu_head rcu; /* for call_rcu */ 51 /* Which positions are used in the array */ 52 DECLARE_BITMAP(used, AHASH_MAX_TUNED); 53 u8 size; /* size of the array */ 54 u8 pos; /* position of the first free entry */ 55 unsigned char value[] /* the array of the values */ 56 __aligned(__alignof__(u64)); 57}; 58 59/* Region size for locking == 2^HTABLE_REGION_BITS */ 60#define HTABLE_REGION_BITS 10 61#define ahash_numof_locks(htable_bits) \ 62 ((htable_bits) < HTABLE_REGION_BITS ? 1 \ 63 : jhash_size((htable_bits) - HTABLE_REGION_BITS)) 64#define ahash_sizeof_regions(htable_bits) \ 65 (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) 66#define ahash_region(n, htable_bits) \ 67 ((n) % ahash_numof_locks(htable_bits)) 68#define ahash_bucket_start(h, htable_bits) \ 69 ((htable_bits) < HTABLE_REGION_BITS ? 0 \ 70 : (h) * jhash_size(HTABLE_REGION_BITS)) 71#define ahash_bucket_end(h, htable_bits) \ 72 ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \ 73 : ((h) + 1) * jhash_size(HTABLE_REGION_BITS)) 74 75struct htable_gc { 76 struct delayed_work dwork; 77 struct ip_set *set; /* Set the gc belongs to */ 78 u32 region; /* Last gc run position */ 79}; 80 81/* The hash table: the table size stored here in order to make resizing easy */ 82struct htable { 83 atomic_t ref; /* References for resizing */ 84 atomic_t uref; /* References for dumping and gc */ 85 u8 htable_bits; /* size of hash table == 2^htable_bits */ 86 u32 maxelem; /* Maxelem per region */ 87 struct ip_set_region *hregion; /* Region locks and ext sizes */ 88 struct hbucket __rcu *bucket[]; /* hashtable buckets */ 89}; 90 91#define hbucket(h, i) ((h)->bucket[i]) 92#define ext_size(n, dsize) \ 93 (sizeof(struct hbucket) + (n) * (dsize)) 94 95#ifndef IPSET_NET_COUNT 96#define IPSET_NET_COUNT 1 97#endif 98 99/* Book-keeping of the prefixes added to the set */ 100struct net_prefixes { 101 u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */ 102 u8 cidr[IPSET_NET_COUNT]; /* the cidr value */ 103}; 104 105/* Compute the hash table size */ 106static size_t 107htable_size(u8 hbits) 108{ 109 size_t hsize; 110 111 /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */ 112 if (hbits > 31) 113 return 0; 114 hsize = jhash_size(hbits); 115 if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *) 116 < hsize) 117 return 0; 118 119 return hsize * sizeof(struct hbucket *) + sizeof(struct htable); 120} 121 122#ifdef IP_SET_HASH_WITH_NETS 123#if IPSET_NET_COUNT > 1 124#define __CIDR(cidr, i) (cidr[i]) 125#else 126#define __CIDR(cidr, i) (cidr) 127#endif 128 129/* cidr + 1 is stored in net_prefixes to support /0 */ 130#define NCIDR_PUT(cidr) ((cidr) + 1) 131#define NCIDR_GET(cidr) ((cidr) - 1) 132 133#ifdef IP_SET_HASH_WITH_NETS_PACKED 134/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ 135#define DCIDR_PUT(cidr) ((cidr) - 1) 136#define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1) 137#else 138#define DCIDR_PUT(cidr) (cidr) 139#define DCIDR_GET(cidr, i) __CIDR(cidr, i) 140#endif 141 142#define INIT_CIDR(cidr, host_mask) \ 143 DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) 144 145#ifdef IP_SET_HASH_WITH_NET0 146/* cidr from 0 to HOST_MASK value and c = cidr + 1 */ 147#define NLEN (HOST_MASK + 1) 148#define CIDR_POS(c) ((c) - 1) 149#else 150/* cidr from 1 to HOST_MASK value and c = cidr + 1 */ 151#define NLEN HOST_MASK 152#define CIDR_POS(c) ((c) - 2) 153#endif 154 155#else 156#define NLEN 0 157#endif /* IP_SET_HASH_WITH_NETS */ 158 159#define SET_ELEM_EXPIRED(set, d) \ 160 (SET_WITH_TIMEOUT(set) && \ 161 ip_set_timeout_expired(ext_timeout(d, set))) 162 163#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) 164static const union nf_inet_addr onesmask = { 165 .all[0] = 0xffffffff, 166 .all[1] = 0xffffffff, 167 .all[2] = 0xffffffff, 168 .all[3] = 0xffffffff 169}; 170 171static const union nf_inet_addr zeromask = {}; 172#endif 173 174#endif /* _IP_SET_HASH_GEN_H */ 175 176#ifndef MTYPE 177#error "MTYPE is not defined!" 178#endif 179 180#ifndef HTYPE 181#error "HTYPE is not defined!" 182#endif 183 184#ifndef HOST_MASK 185#error "HOST_MASK is not defined!" 186#endif 187 188/* Family dependent templates */ 189 190#undef ahash_data 191#undef mtype_data_equal 192#undef mtype_do_data_match 193#undef mtype_data_set_flags 194#undef mtype_data_reset_elem 195#undef mtype_data_reset_flags 196#undef mtype_data_netmask 197#undef mtype_data_list 198#undef mtype_data_next 199#undef mtype_elem 200 201#undef mtype_ahash_destroy 202#undef mtype_ext_cleanup 203#undef mtype_add_cidr 204#undef mtype_del_cidr 205#undef mtype_ahash_memsize 206#undef mtype_flush 207#undef mtype_destroy 208#undef mtype_same_set 209#undef mtype_kadt 210#undef mtype_uadt 211 212#undef mtype_add 213#undef mtype_del 214#undef mtype_test_cidrs 215#undef mtype_test 216#undef mtype_uref 217#undef mtype_resize 218#undef mtype_ext_size 219#undef mtype_resize_ad 220#undef mtype_head 221#undef mtype_list 222#undef mtype_gc_do 223#undef mtype_gc 224#undef mtype_gc_init 225#undef mtype_cancel_gc 226#undef mtype_variant 227#undef mtype_data_match 228 229#undef htype 230#undef HKEY 231 232#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) 233#ifdef IP_SET_HASH_WITH_NETS 234#define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) 235#else 236#define mtype_do_data_match(d) 1 237#endif 238#define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) 239#define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem) 240#define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) 241#define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) 242#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) 243#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) 244#define mtype_elem IPSET_TOKEN(MTYPE, _elem) 245 246#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) 247#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) 248#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) 249#define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) 250#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) 251#define mtype_flush IPSET_TOKEN(MTYPE, _flush) 252#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) 253#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) 254#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) 255#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) 256 257#define mtype_add IPSET_TOKEN(MTYPE, _add) 258#define mtype_del IPSET_TOKEN(MTYPE, _del) 259#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) 260#define mtype_test IPSET_TOKEN(MTYPE, _test) 261#define mtype_uref IPSET_TOKEN(MTYPE, _uref) 262#define mtype_resize IPSET_TOKEN(MTYPE, _resize) 263#define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size) 264#define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad) 265#define mtype_head IPSET_TOKEN(MTYPE, _head) 266#define mtype_list IPSET_TOKEN(MTYPE, _list) 267#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) 268#define mtype_gc IPSET_TOKEN(MTYPE, _gc) 269#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) 270#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) 271#define mtype_variant IPSET_TOKEN(MTYPE, _variant) 272#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) 273 274#ifndef HKEY_DATALEN 275#define HKEY_DATALEN sizeof(struct mtype_elem) 276#endif 277 278#define htype MTYPE 279 280#define HKEY(data, initval, htable_bits) \ 281({ \ 282 const u32 *__k = (const u32 *)data; \ 283 u32 __l = HKEY_DATALEN / sizeof(u32); \ 284 \ 285 BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \ 286 \ 287 jhash2(__k, __l, initval) & jhash_mask(htable_bits); \ 288}) 289 290/* The generic hash structure */ 291struct htype { 292 struct htable __rcu *table; /* the hash table */ 293 struct htable_gc gc; /* gc workqueue */ 294 u32 maxelem; /* max elements in the hash */ 295 u32 initval; /* random jhash init value */ 296#ifdef IP_SET_HASH_WITH_MARKMASK 297 u32 markmask; /* markmask value for mark mask to store */ 298#endif 299 u8 bucketsize; /* max elements in an array block */ 300#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) 301 u8 netmask; /* netmask value for subnets to store */ 302 union nf_inet_addr bitmask; /* stores bitmask */ 303#endif 304 struct list_head ad; /* Resize add|del backlist */ 305 struct mtype_elem next; /* temporary storage for uadd */ 306#ifdef IP_SET_HASH_WITH_NETS 307 struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ 308#endif 309}; 310 311/* ADD|DEL entries saved during resize */ 312struct mtype_resize_ad { 313 struct list_head list; 314 enum ipset_adt ad; /* ADD|DEL element */ 315 struct mtype_elem d; /* Element value */ 316 struct ip_set_ext ext; /* Extensions for ADD */ 317 struct ip_set_ext mext; /* Target extensions for ADD */ 318 u32 flags; /* Flags for ADD */ 319}; 320 321#ifdef IP_SET_HASH_WITH_NETS 322/* Network cidr size book keeping when the hash stores different 323 * sized networks. cidr == real cidr + 1 to support /0. 324 */ 325static void 326mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) 327{ 328 int i, j; 329 330 spin_lock_bh(&set->lock); 331 /* Add in increasing prefix order, so larger cidr first */ 332 for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { 333 if (j != -1) { 334 continue; 335 } else if (h->nets[i].cidr[n] < cidr) { 336 j = i; 337 } else if (h->nets[i].cidr[n] == cidr) { 338 h->nets[CIDR_POS(cidr)].nets[n]++; 339 goto unlock; 340 } 341 } 342 if (j != -1) { 343 for (; i > j; i--) 344 h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; 345 } 346 h->nets[i].cidr[n] = cidr; 347 h->nets[CIDR_POS(cidr)].nets[n] = 1; 348unlock: 349 spin_unlock_bh(&set->lock); 350} 351 352static void 353mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) 354{ 355 u8 i, j, net_end = NLEN - 1; 356 357 spin_lock_bh(&set->lock); 358 for (i = 0; i < NLEN; i++) { 359 if (h->nets[i].cidr[n] != cidr) 360 continue; 361 h->nets[CIDR_POS(cidr)].nets[n]--; 362 if (h->nets[CIDR_POS(cidr)].nets[n] > 0) 363 goto unlock; 364 for (j = i; j < net_end && h->nets[j].cidr[n]; j++) 365 h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; 366 h->nets[j].cidr[n] = 0; 367 goto unlock; 368 } 369unlock: 370 spin_unlock_bh(&set->lock); 371} 372#endif 373 374/* Calculate the actual memory size of the set data */ 375static size_t 376mtype_ahash_memsize(const struct htype *h, const struct htable *t) 377{ 378 return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits); 379} 380 381/* Get the ith element from the array block n */ 382#define ahash_data(n, i, dsize) \ 383 ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) 384 385static void 386mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) 387{ 388 int i; 389 390 for (i = 0; i < n->pos; i++) 391 if (test_bit(i, n->used)) 392 ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); 393} 394 395/* Flush a hash type of set: destroy all elements */ 396static void 397mtype_flush(struct ip_set *set) 398{ 399 struct htype *h = set->data; 400 struct htable *t; 401 struct hbucket *n; 402 u32 r, i; 403 404 t = ipset_dereference_nfnl(h->table); 405 for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { 406 spin_lock_bh(&t->hregion[r].lock); 407 for (i = ahash_bucket_start(r, t->htable_bits); 408 i < ahash_bucket_end(r, t->htable_bits); i++) { 409 n = __ipset_dereference(hbucket(t, i)); 410 if (!n) 411 continue; 412 if (set->extensions & IPSET_EXT_DESTROY) 413 mtype_ext_cleanup(set, n); 414 /* FIXME: use slab cache */ 415 rcu_assign_pointer(hbucket(t, i), NULL); 416 kfree_rcu(n, rcu); 417 } 418 t->hregion[r].ext_size = 0; 419 t->hregion[r].elements = 0; 420 spin_unlock_bh(&t->hregion[r].lock); 421 } 422#ifdef IP_SET_HASH_WITH_NETS 423 memset(h->nets, 0, sizeof(h->nets)); 424#endif 425} 426 427/* Destroy the hashtable part of the set */ 428static void 429mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) 430{ 431 struct hbucket *n; 432 u32 i; 433 434 for (i = 0; i < jhash_size(t->htable_bits); i++) { 435 n = (__force struct hbucket *)hbucket(t, i); 436 if (!n) 437 continue; 438 if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) 439 mtype_ext_cleanup(set, n); 440 /* FIXME: use slab cache */ 441 kfree(n); 442 } 443 444 ip_set_free(t->hregion); 445 ip_set_free(t); 446} 447 448/* Destroy a hash type of set */ 449static void 450mtype_destroy(struct ip_set *set) 451{ 452 struct htype *h = set->data; 453 struct list_head *l, *lt; 454 455 mtype_ahash_destroy(set, (__force struct htable *)h->table, true); 456 list_for_each_safe(l, lt, &h->ad) { 457 list_del(l); 458 kfree(l); 459 } 460 kfree(h); 461 462 set->data = NULL; 463} 464 465static bool 466mtype_same_set(const struct ip_set *a, const struct ip_set *b) 467{ 468 const struct htype *x = a->data; 469 const struct htype *y = b->data; 470 471 /* Resizing changes htable_bits, so we ignore it */ 472 return x->maxelem == y->maxelem && 473 a->timeout == b->timeout && 474#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) 475 nf_inet_addr_cmp(&x->bitmask, &y->bitmask) && 476#endif 477#ifdef IP_SET_HASH_WITH_MARKMASK 478 x->markmask == y->markmask && 479#endif 480 a->extensions == b->extensions; 481} 482 483static void 484mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) 485{ 486 struct hbucket *n, *tmp; 487 struct mtype_elem *data; 488 u32 i, j, d; 489 size_t dsize = set->dsize; 490#ifdef IP_SET_HASH_WITH_NETS 491 u8 k; 492#endif 493 u8 htable_bits = t->htable_bits; 494 495 spin_lock_bh(&t->hregion[r].lock); 496 for (i = ahash_bucket_start(r, htable_bits); 497 i < ahash_bucket_end(r, htable_bits); i++) { 498 n = __ipset_dereference(hbucket(t, i)); 499 if (!n) 500 continue; 501 for (j = 0, d = 0; j < n->pos; j++) { 502 if (!test_bit(j, n->used)) { 503 d++; 504 continue; 505 } 506 data = ahash_data(n, j, dsize); 507 if (!ip_set_timeout_expired(ext_timeout(data, set))) 508 continue; 509 pr_debug("expired %u/%u\n", i, j); 510 clear_bit(j, n->used); 511 smp_mb__after_atomic(); 512#ifdef IP_SET_HASH_WITH_NETS 513 for (k = 0; k < IPSET_NET_COUNT; k++) 514 mtype_del_cidr(set, h, 515 NCIDR_PUT(DCIDR_GET(data->cidr, k)), 516 k); 517#endif 518 t->hregion[r].elements--; 519 ip_set_ext_destroy(set, data); 520 d++; 521 } 522 if (d >= AHASH_INIT_SIZE) { 523 if (d >= n->size) { 524 t->hregion[r].ext_size -= 525 ext_size(n->size, dsize); 526 rcu_assign_pointer(hbucket(t, i), NULL); 527 kfree_rcu(n, rcu); 528 continue; 529 } 530 tmp = kzalloc(sizeof(*tmp) + 531 (n->size - AHASH_INIT_SIZE) * dsize, 532 GFP_ATOMIC); 533 if (!tmp) 534 /* Still try to delete expired elements. */ 535 continue; 536 tmp->size = n->size - AHASH_INIT_SIZE; 537 for (j = 0, d = 0; j < n->pos; j++) { 538 if (!test_bit(j, n->used)) 539 continue; 540 data = ahash_data(n, j, dsize); 541 memcpy(tmp->value + d * dsize, 542 data, dsize); 543 set_bit(d, tmp->used); 544 d++; 545 } 546 tmp->pos = d; 547 t->hregion[r].ext_size -= 548 ext_size(AHASH_INIT_SIZE, dsize); 549 rcu_assign_pointer(hbucket(t, i), tmp); 550 kfree_rcu(n, rcu); 551 } 552 } 553 spin_unlock_bh(&t->hregion[r].lock); 554} 555 556static void 557mtype_gc(struct work_struct *work) 558{ 559 struct htable_gc *gc; 560 struct ip_set *set; 561 struct htype *h; 562 struct htable *t; 563 u32 r, numof_locks; 564 unsigned int next_run; 565 566 gc = container_of(work, struct htable_gc, dwork.work); 567 set = gc->set; 568 h = set->data; 569 570 spin_lock_bh(&set->lock); 571 t = ipset_dereference_set(h->table, set); 572 atomic_inc(&t->uref); 573 numof_locks = ahash_numof_locks(t->htable_bits); 574 r = gc->region++; 575 if (r >= numof_locks) { 576 r = gc->region = 0; 577 } 578 next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks; 579 if (next_run < HZ/10) 580 next_run = HZ/10; 581 spin_unlock_bh(&set->lock); 582 583 mtype_gc_do(set, h, t, r); 584 585 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 586 pr_debug("Table destroy after resize by expire: %p\n", t); 587 mtype_ahash_destroy(set, t, false); 588 } 589 590 queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run); 591 592} 593 594static void 595mtype_gc_init(struct htable_gc *gc) 596{ 597 INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc); 598 queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); 599} 600 601static void 602mtype_cancel_gc(struct ip_set *set) 603{ 604 struct htype *h = set->data; 605 606 if (SET_WITH_TIMEOUT(set)) 607 cancel_delayed_work_sync(&h->gc.dwork); 608} 609 610static int 611mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, 612 struct ip_set_ext *mext, u32 flags); 613static int 614mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, 615 struct ip_set_ext *mext, u32 flags); 616 617/* Resize a hash: create a new hash table with doubling the hashsize 618 * and inserting the elements to it. Repeat until we succeed or 619 * fail due to memory pressures. 620 */ 621static int 622mtype_resize(struct ip_set *set, bool retried) 623{ 624 struct htype *h = set->data; 625 struct htable *t, *orig; 626 u8 htable_bits; 627 size_t hsize, dsize = set->dsize; 628#ifdef IP_SET_HASH_WITH_NETS 629 u8 flags; 630 struct mtype_elem *tmp; 631#endif 632 struct mtype_elem *data; 633 struct mtype_elem *d; 634 struct hbucket *n, *m; 635 struct list_head *l, *lt; 636 struct mtype_resize_ad *x; 637 u32 i, j, r, nr, key; 638 int ret; 639 640#ifdef IP_SET_HASH_WITH_NETS 641 tmp = kmalloc(dsize, GFP_KERNEL); 642 if (!tmp) 643 return -ENOMEM; 644#endif 645 orig = ipset_dereference_bh_nfnl(h->table); 646 htable_bits = orig->htable_bits; 647 648retry: 649 ret = 0; 650 htable_bits++; 651 if (!htable_bits) 652 goto hbwarn; 653 hsize = htable_size(htable_bits); 654 if (!hsize) 655 goto hbwarn; 656 t = ip_set_alloc(hsize); 657 if (!t) { 658 ret = -ENOMEM; 659 goto out; 660 } 661 t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); 662 if (!t->hregion) { 663 ip_set_free(t); 664 ret = -ENOMEM; 665 goto out; 666 } 667 t->htable_bits = htable_bits; 668 t->maxelem = h->maxelem / ahash_numof_locks(htable_bits); 669 for (i = 0; i < ahash_numof_locks(htable_bits); i++) 670 spin_lock_init(&t->hregion[i].lock); 671 672 /* There can't be another parallel resizing, 673 * but dumping, gc, kernel side add/del are possible 674 */ 675 orig = ipset_dereference_bh_nfnl(h->table); 676 atomic_set(&orig->ref, 1); 677 atomic_inc(&orig->uref); 678 pr_debug("attempt to resize set %s from %u to %u, t %p\n", 679 set->name, orig->htable_bits, htable_bits, orig); 680 for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) { 681 /* Expire may replace a hbucket with another one */ 682 rcu_read_lock_bh(); 683 for (i = ahash_bucket_start(r, orig->htable_bits); 684 i < ahash_bucket_end(r, orig->htable_bits); i++) { 685 n = __ipset_dereference(hbucket(orig, i)); 686 if (!n) 687 continue; 688 for (j = 0; j < n->pos; j++) { 689 if (!test_bit(j, n->used)) 690 continue; 691 data = ahash_data(n, j, dsize); 692 if (SET_ELEM_EXPIRED(set, data)) 693 continue; 694#ifdef IP_SET_HASH_WITH_NETS 695 /* We have readers running parallel with us, 696 * so the live data cannot be modified. 697 */ 698 flags = 0; 699 memcpy(tmp, data, dsize); 700 data = tmp; 701 mtype_data_reset_flags(data, &flags); 702#endif 703 key = HKEY(data, h->initval, htable_bits); 704 m = __ipset_dereference(hbucket(t, key)); 705 nr = ahash_region(key, htable_bits); 706 if (!m) { 707 m = kzalloc(sizeof(*m) + 708 AHASH_INIT_SIZE * dsize, 709 GFP_ATOMIC); 710 if (!m) { 711 ret = -ENOMEM; 712 goto cleanup; 713 } 714 m->size = AHASH_INIT_SIZE; 715 t->hregion[nr].ext_size += 716 ext_size(AHASH_INIT_SIZE, 717 dsize); 718 RCU_INIT_POINTER(hbucket(t, key), m); 719 } else if (m->pos >= m->size) { 720 struct hbucket *ht; 721 722 if (m->size >= AHASH_MAX(h)) { 723 ret = -EAGAIN; 724 } else { 725 ht = kzalloc(sizeof(*ht) + 726 (m->size + AHASH_INIT_SIZE) 727 * dsize, 728 GFP_ATOMIC); 729 if (!ht) 730 ret = -ENOMEM; 731 } 732 if (ret < 0) 733 goto cleanup; 734 memcpy(ht, m, sizeof(struct hbucket) + 735 m->size * dsize); 736 ht->size = m->size + AHASH_INIT_SIZE; 737 t->hregion[nr].ext_size += 738 ext_size(AHASH_INIT_SIZE, 739 dsize); 740 kfree(m); 741 m = ht; 742 RCU_INIT_POINTER(hbucket(t, key), ht); 743 } 744 d = ahash_data(m, m->pos, dsize); 745 memcpy(d, data, dsize); 746 set_bit(m->pos++, m->used); 747 t->hregion[nr].elements++; 748#ifdef IP_SET_HASH_WITH_NETS 749 mtype_data_reset_flags(d, &flags); 750#endif 751 } 752 } 753 rcu_read_unlock_bh(); 754 } 755 756 /* There can't be any other writer. */ 757 rcu_assign_pointer(h->table, t); 758 759 /* Give time to other readers of the set */ 760 synchronize_rcu(); 761 762 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, 763 orig->htable_bits, orig, t->htable_bits, t); 764 /* Add/delete elements processed by the SET target during resize. 765 * Kernel-side add cannot trigger a resize and userspace actions 766 * are serialized by the mutex. 767 */ 768 list_for_each_safe(l, lt, &h->ad) { 769 x = list_entry(l, struct mtype_resize_ad, list); 770 if (x->ad == IPSET_ADD) { 771 mtype_add(set, &x->d, &x->ext, &x->mext, x->flags); 772 } else { 773 mtype_del(set, &x->d, NULL, NULL, 0); 774 } 775 list_del(l); 776 kfree(l); 777 } 778 /* If there's nobody else using the table, destroy it */ 779 if (atomic_dec_and_test(&orig->uref)) { 780 pr_debug("Table destroy by resize %p\n", orig); 781 mtype_ahash_destroy(set, orig, false); 782 } 783 784out: 785#ifdef IP_SET_HASH_WITH_NETS 786 kfree(tmp); 787#endif 788 return ret; 789 790cleanup: 791 rcu_read_unlock_bh(); 792 atomic_set(&orig->ref, 0); 793 atomic_dec(&orig->uref); 794 mtype_ahash_destroy(set, t, false); 795 if (ret == -EAGAIN) 796 goto retry; 797 goto out; 798 799hbwarn: 800 /* In case we have plenty of memory :-) */ 801 pr_warn("Cannot increase the hashsize of set %s further\n", set->name); 802 ret = -IPSET_ERR_HASH_FULL; 803 goto out; 804} 805 806/* Get the current number of elements and ext_size in the set */ 807static void 808mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) 809{ 810 struct htype *h = set->data; 811 const struct htable *t; 812 u32 i, j, r; 813 struct hbucket *n; 814 struct mtype_elem *data; 815 816 t = rcu_dereference_bh(h->table); 817 for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { 818 for (i = ahash_bucket_start(r, t->htable_bits); 819 i < ahash_bucket_end(r, t->htable_bits); i++) { 820 n = rcu_dereference_bh(hbucket(t, i)); 821 if (!n) 822 continue; 823 for (j = 0; j < n->pos; j++) { 824 if (!test_bit(j, n->used)) 825 continue; 826 data = ahash_data(n, j, set->dsize); 827 if (!SET_ELEM_EXPIRED(set, data)) 828 (*elements)++; 829 } 830 } 831 *ext_size += t->hregion[r].ext_size; 832 } 833} 834 835/* Add an element to a hash and update the internal counters when succeeded, 836 * otherwise report the proper error code. 837 */ 838static int 839mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, 840 struct ip_set_ext *mext, u32 flags) 841{ 842 struct htype *h = set->data; 843 struct htable *t; 844 const struct mtype_elem *d = value; 845 struct mtype_elem *data; 846 struct hbucket *n, *old = ERR_PTR(-ENOENT); 847 int i, j = -1, ret; 848 bool flag_exist = flags & IPSET_FLAG_EXIST; 849 bool deleted = false, forceadd = false, reuse = false; 850 u32 r, key, multi = 0, elements, maxelem; 851 852 rcu_read_lock_bh(); 853 t = rcu_dereference_bh(h->table); 854 key = HKEY(value, h->initval, t->htable_bits); 855 r = ahash_region(key, t->htable_bits); 856 atomic_inc(&t->uref); 857 elements = t->hregion[r].elements; 858 maxelem = t->maxelem; 859 if (elements >= maxelem) { 860 u32 e; 861 if (SET_WITH_TIMEOUT(set)) { 862 rcu_read_unlock_bh(); 863 mtype_gc_do(set, h, t, r); 864 rcu_read_lock_bh(); 865 } 866 maxelem = h->maxelem; 867 elements = 0; 868 for (e = 0; e < ahash_numof_locks(t->htable_bits); e++) 869 elements += t->hregion[e].elements; 870 if (elements >= maxelem && SET_WITH_FORCEADD(set)) 871 forceadd = true; 872 } 873 rcu_read_unlock_bh(); 874 875 spin_lock_bh(&t->hregion[r].lock); 876 n = rcu_dereference_bh(hbucket(t, key)); 877 if (!n) { 878 if (forceadd || elements >= maxelem) 879 goto set_full; 880 old = NULL; 881 n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, 882 GFP_ATOMIC); 883 if (!n) { 884 ret = -ENOMEM; 885 goto unlock; 886 } 887 n->size = AHASH_INIT_SIZE; 888 t->hregion[r].ext_size += 889 ext_size(AHASH_INIT_SIZE, set->dsize); 890 goto copy_elem; 891 } 892 for (i = 0; i < n->pos; i++) { 893 if (!test_bit(i, n->used)) { 894 /* Reuse first deleted entry */ 895 if (j == -1) { 896 deleted = reuse = true; 897 j = i; 898 } 899 continue; 900 } 901 data = ahash_data(n, i, set->dsize); 902 if (mtype_data_equal(data, d, &multi)) { 903 if (flag_exist || SET_ELEM_EXPIRED(set, data)) { 904 /* Just the extensions could be overwritten */ 905 j = i; 906 goto overwrite_extensions; 907 } 908 ret = -IPSET_ERR_EXIST; 909 goto unlock; 910 } 911 /* Reuse first timed out entry */ 912 if (SET_ELEM_EXPIRED(set, data) && j == -1) { 913 j = i; 914 reuse = true; 915 } 916 } 917 if (reuse || forceadd) { 918 if (j == -1) 919 j = 0; 920 data = ahash_data(n, j, set->dsize); 921 if (!deleted) { 922#ifdef IP_SET_HASH_WITH_NETS 923 for (i = 0; i < IPSET_NET_COUNT; i++) 924 mtype_del_cidr(set, h, 925 NCIDR_PUT(DCIDR_GET(data->cidr, i)), 926 i); 927#endif 928 ip_set_ext_destroy(set, data); 929 t->hregion[r].elements--; 930 } 931 goto copy_data; 932 } 933 if (elements >= maxelem) 934 goto set_full; 935 /* Create a new slot */ 936 if (n->pos >= n->size) { 937#ifdef IP_SET_HASH_WITH_MULTI 938 if (h->bucketsize >= AHASH_MAX_TUNED) 939 goto set_full; 940 else if (h->bucketsize <= multi) 941 h->bucketsize += AHASH_INIT_SIZE; 942#endif 943 if (n->size >= AHASH_MAX(h)) { 944 /* Trigger rehashing */ 945 mtype_data_next(&h->next, d); 946 ret = -EAGAIN; 947 goto resize; 948 } 949 old = n; 950 n = kzalloc(sizeof(*n) + 951 (old->size + AHASH_INIT_SIZE) * set->dsize, 952 GFP_ATOMIC); 953 if (!n) { 954 ret = -ENOMEM; 955 goto unlock; 956 } 957 memcpy(n, old, sizeof(struct hbucket) + 958 old->size * set->dsize); 959 n->size = old->size + AHASH_INIT_SIZE; 960 t->hregion[r].ext_size += 961 ext_size(AHASH_INIT_SIZE, set->dsize); 962 } 963 964copy_elem: 965 j = n->pos++; 966 data = ahash_data(n, j, set->dsize); 967copy_data: 968 t->hregion[r].elements++; 969#ifdef IP_SET_HASH_WITH_NETS 970 for (i = 0; i < IPSET_NET_COUNT; i++) 971 mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); 972#endif 973 memcpy(data, d, sizeof(struct mtype_elem)); 974overwrite_extensions: 975#ifdef IP_SET_HASH_WITH_NETS 976 mtype_data_set_flags(data, flags); 977#endif 978 if (SET_WITH_COUNTER(set)) 979 ip_set_init_counter(ext_counter(data, set), ext); 980 if (SET_WITH_COMMENT(set)) 981 ip_set_init_comment(set, ext_comment(data, set), ext); 982 if (SET_WITH_SKBINFO(set)) 983 ip_set_init_skbinfo(ext_skbinfo(data, set), ext); 984 /* Must come last for the case when timed out entry is reused */ 985 if (SET_WITH_TIMEOUT(set)) 986 ip_set_timeout_set(ext_timeout(data, set), ext->timeout); 987 smp_mb__before_atomic(); 988 set_bit(j, n->used); 989 if (old != ERR_PTR(-ENOENT)) { 990 rcu_assign_pointer(hbucket(t, key), n); 991 if (old) 992 kfree_rcu(old, rcu); 993 } 994 ret = 0; 995resize: 996 spin_unlock_bh(&t->hregion[r].lock); 997 if (atomic_read(&t->ref) && ext->target) { 998 /* Resize is in process and kernel side add, save values */ 999 struct mtype_resize_ad *x; 1000 1001 x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); 1002 if (!x) 1003 /* Don't bother */ 1004 goto out; 1005 x->ad = IPSET_ADD; 1006 memcpy(&x->d, value, sizeof(struct mtype_elem)); 1007 memcpy(&x->ext, ext, sizeof(struct ip_set_ext)); 1008 memcpy(&x->mext, mext, sizeof(struct ip_set_ext)); 1009 x->flags = flags; 1010 spin_lock_bh(&set->lock); 1011 list_add_tail(&x->list, &h->ad); 1012 spin_unlock_bh(&set->lock); 1013 } 1014 goto out; 1015 1016set_full: 1017 if (net_ratelimit()) 1018 pr_warn("Set %s is full, maxelem %u reached\n", 1019 set->name, maxelem); 1020 ret = -IPSET_ERR_HASH_FULL; 1021unlock: 1022 spin_unlock_bh(&t->hregion[r].lock); 1023out: 1024 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1025 pr_debug("Table destroy after resize by add: %p\n", t); 1026 mtype_ahash_destroy(set, t, false); 1027 } 1028 return ret; 1029} 1030 1031/* Delete an element from the hash and free up space if possible. 1032 */ 1033static int 1034mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, 1035 struct ip_set_ext *mext, u32 flags) 1036{ 1037 struct htype *h = set->data; 1038 struct htable *t; 1039 const struct mtype_elem *d = value; 1040 struct mtype_elem *data; 1041 struct hbucket *n; 1042 struct mtype_resize_ad *x = NULL; 1043 int i, j, k, r, ret = -IPSET_ERR_EXIST; 1044 u32 key, multi = 0; 1045 size_t dsize = set->dsize; 1046 1047 /* Userspace add and resize is excluded by the mutex. 1048 * Kernespace add does not trigger resize. 1049 */ 1050 rcu_read_lock_bh(); 1051 t = rcu_dereference_bh(h->table); 1052 key = HKEY(value, h->initval, t->htable_bits); 1053 r = ahash_region(key, t->htable_bits); 1054 atomic_inc(&t->uref); 1055 rcu_read_unlock_bh(); 1056 1057 spin_lock_bh(&t->hregion[r].lock); 1058 n = rcu_dereference_bh(hbucket(t, key)); 1059 if (!n) 1060 goto out; 1061 for (i = 0, k = 0; i < n->pos; i++) { 1062 if (!test_bit(i, n->used)) { 1063 k++; 1064 continue; 1065 } 1066 data = ahash_data(n, i, dsize); 1067 if (!mtype_data_equal(data, d, &multi)) 1068 continue; 1069 if (SET_ELEM_EXPIRED(set, data)) 1070 goto out; 1071 1072 ret = 0; 1073 clear_bit(i, n->used); 1074 smp_mb__after_atomic(); 1075 if (i + 1 == n->pos) 1076 n->pos--; 1077 t->hregion[r].elements--; 1078#ifdef IP_SET_HASH_WITH_NETS 1079 for (j = 0; j < IPSET_NET_COUNT; j++) 1080 mtype_del_cidr(set, h, 1081 NCIDR_PUT(DCIDR_GET(d->cidr, j)), j); 1082#endif 1083 ip_set_ext_destroy(set, data); 1084 1085 if (atomic_read(&t->ref) && ext->target) { 1086 /* Resize is in process and kernel side del, 1087 * save values 1088 */ 1089 x = kzalloc(sizeof(struct mtype_resize_ad), 1090 GFP_ATOMIC); 1091 if (x) { 1092 x->ad = IPSET_DEL; 1093 memcpy(&x->d, value, 1094 sizeof(struct mtype_elem)); 1095 x->flags = flags; 1096 } 1097 } 1098 for (; i < n->pos; i++) { 1099 if (!test_bit(i, n->used)) 1100 k++; 1101 } 1102 if (n->pos == 0 && k == 0) { 1103 t->hregion[r].ext_size -= ext_size(n->size, dsize); 1104 rcu_assign_pointer(hbucket(t, key), NULL); 1105 kfree_rcu(n, rcu); 1106 } else if (k >= AHASH_INIT_SIZE) { 1107 struct hbucket *tmp = kzalloc(sizeof(*tmp) + 1108 (n->size - AHASH_INIT_SIZE) * dsize, 1109 GFP_ATOMIC); 1110 if (!tmp) 1111 goto out; 1112 tmp->size = n->size - AHASH_INIT_SIZE; 1113 for (j = 0, k = 0; j < n->pos; j++) { 1114 if (!test_bit(j, n->used)) 1115 continue; 1116 data = ahash_data(n, j, dsize); 1117 memcpy(tmp->value + k * dsize, data, dsize); 1118 set_bit(k, tmp->used); 1119 k++; 1120 } 1121 tmp->pos = k; 1122 t->hregion[r].ext_size -= 1123 ext_size(AHASH_INIT_SIZE, dsize); 1124 rcu_assign_pointer(hbucket(t, key), tmp); 1125 kfree_rcu(n, rcu); 1126 } 1127 goto out; 1128 } 1129 1130out: 1131 spin_unlock_bh(&t->hregion[r].lock); 1132 if (x) { 1133 spin_lock_bh(&set->lock); 1134 list_add(&x->list, &h->ad); 1135 spin_unlock_bh(&set->lock); 1136 } 1137 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1138 pr_debug("Table destroy after resize by del: %p\n", t); 1139 mtype_ahash_destroy(set, t, false); 1140 } 1141 return ret; 1142} 1143 1144static int 1145mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, 1146 struct ip_set_ext *mext, struct ip_set *set, u32 flags) 1147{ 1148 if (!ip_set_match_extensions(set, ext, mext, flags, data)) 1149 return 0; 1150 /* nomatch entries return -ENOTEMPTY */ 1151 return mtype_do_data_match(data); 1152} 1153 1154#ifdef IP_SET_HASH_WITH_NETS 1155/* Special test function which takes into account the different network 1156 * sizes added to the set 1157 */ 1158static int 1159mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, 1160 const struct ip_set_ext *ext, 1161 struct ip_set_ext *mext, u32 flags) 1162{ 1163 struct htype *h = set->data; 1164 struct htable *t = rcu_dereference_bh(h->table); 1165 struct hbucket *n; 1166 struct mtype_elem *data; 1167#if IPSET_NET_COUNT == 2 1168 struct mtype_elem orig = *d; 1169 int ret, i, j = 0, k; 1170#else 1171 int ret, i, j = 0; 1172#endif 1173 u32 key, multi = 0; 1174 1175 pr_debug("test by nets\n"); 1176 for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) { 1177#if IPSET_NET_COUNT == 2 1178 mtype_data_reset_elem(d, &orig); 1179 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); 1180 for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi; 1181 k++) { 1182 mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), 1183 true); 1184#else 1185 mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); 1186#endif 1187 key = HKEY(d, h->initval, t->htable_bits); 1188 n = rcu_dereference_bh(hbucket(t, key)); 1189 if (!n) 1190 continue; 1191 for (i = 0; i < n->pos; i++) { 1192 if (!test_bit(i, n->used)) 1193 continue; 1194 data = ahash_data(n, i, set->dsize); 1195 if (!mtype_data_equal(data, d, &multi)) 1196 continue; 1197 ret = mtype_data_match(data, ext, mext, set, flags); 1198 if (ret != 0) 1199 return ret; 1200#ifdef IP_SET_HASH_WITH_MULTI 1201 /* No match, reset multiple match flag */ 1202 multi = 0; 1203#endif 1204 } 1205#if IPSET_NET_COUNT == 2 1206 } 1207#endif 1208 } 1209 return 0; 1210} 1211#endif 1212 1213/* Test whether the element is added to the set */ 1214static int 1215mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, 1216 struct ip_set_ext *mext, u32 flags) 1217{ 1218 struct htype *h = set->data; 1219 struct htable *t; 1220 struct mtype_elem *d = value; 1221 struct hbucket *n; 1222 struct mtype_elem *data; 1223 int i, ret = 0; 1224 u32 key, multi = 0; 1225 1226 rcu_read_lock_bh(); 1227 t = rcu_dereference_bh(h->table); 1228#ifdef IP_SET_HASH_WITH_NETS 1229 /* If we test an IP address and not a network address, 1230 * try all possible network sizes 1231 */ 1232 for (i = 0; i < IPSET_NET_COUNT; i++) 1233 if (DCIDR_GET(d->cidr, i) != HOST_MASK) 1234 break; 1235 if (i == IPSET_NET_COUNT) { 1236 ret = mtype_test_cidrs(set, d, ext, mext, flags); 1237 goto out; 1238 } 1239#endif 1240 1241 key = HKEY(d, h->initval, t->htable_bits); 1242 n = rcu_dereference_bh(hbucket(t, key)); 1243 if (!n) { 1244 ret = 0; 1245 goto out; 1246 } 1247 for (i = 0; i < n->pos; i++) { 1248 if (!test_bit(i, n->used)) 1249 continue; 1250 data = ahash_data(n, i, set->dsize); 1251 if (!mtype_data_equal(data, d, &multi)) 1252 continue; 1253 ret = mtype_data_match(data, ext, mext, set, flags); 1254 if (ret != 0) 1255 goto out; 1256 } 1257out: 1258 rcu_read_unlock_bh(); 1259 return ret; 1260} 1261 1262/* Reply a HEADER request: fill out the header part of the set */ 1263static int 1264mtype_head(struct ip_set *set, struct sk_buff *skb) 1265{ 1266 struct htype *h = set->data; 1267 const struct htable *t; 1268 struct nlattr *nested; 1269 size_t memsize; 1270 u32 elements = 0; 1271 size_t ext_size = 0; 1272 u8 htable_bits; 1273 1274 rcu_read_lock_bh(); 1275 t = rcu_dereference_bh(h->table); 1276 mtype_ext_size(set, &elements, &ext_size); 1277 memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size; 1278 htable_bits = t->htable_bits; 1279 rcu_read_unlock_bh(); 1280 1281 nested = nla_nest_start(skb, IPSET_ATTR_DATA); 1282 if (!nested) 1283 goto nla_put_failure; 1284 if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, 1285 htonl(jhash_size(htable_bits))) || 1286 nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) 1287 goto nla_put_failure; 1288#ifdef IP_SET_HASH_WITH_BITMASK 1289 /* if netmask is set to anything other than HOST_MASK we know that the user supplied netmask 1290 * and not bitmask. These two are mutually exclusive. */ 1291 if (h->netmask == HOST_MASK && !nf_inet_addr_cmp(&onesmask, &h->bitmask)) { 1292 if (set->family == NFPROTO_IPV4) { 1293 if (nla_put_ipaddr4(skb, IPSET_ATTR_BITMASK, h->bitmask.ip)) 1294 goto nla_put_failure; 1295 } else if (set->family == NFPROTO_IPV6) { 1296 if (nla_put_ipaddr6(skb, IPSET_ATTR_BITMASK, &h->bitmask.in6)) 1297 goto nla_put_failure; 1298 } 1299 } 1300#endif 1301#ifdef IP_SET_HASH_WITH_NETMASK 1302 if (h->netmask != HOST_MASK && nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) 1303 goto nla_put_failure; 1304#endif 1305#ifdef IP_SET_HASH_WITH_MARKMASK 1306 if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) 1307 goto nla_put_failure; 1308#endif 1309 if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE) { 1310 if (nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize) || 1311 nla_put_net32(skb, IPSET_ATTR_INITVAL, htonl(h->initval))) 1312 goto nla_put_failure; 1313 } 1314 if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 1315 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || 1316 nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements))) 1317 goto nla_put_failure; 1318 if (unlikely(ip_set_put_flags(skb, set))) 1319 goto nla_put_failure; 1320 nla_nest_end(skb, nested); 1321 1322 return 0; 1323nla_put_failure: 1324 return -EMSGSIZE; 1325} 1326 1327/* Make possible to run dumping parallel with resizing */ 1328static void 1329mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) 1330{ 1331 struct htype *h = set->data; 1332 struct htable *t; 1333 1334 if (start) { 1335 rcu_read_lock_bh(); 1336 t = ipset_dereference_bh_nfnl(h->table); 1337 atomic_inc(&t->uref); 1338 cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; 1339 rcu_read_unlock_bh(); 1340 } else if (cb->args[IPSET_CB_PRIVATE]) { 1341 t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; 1342 if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { 1343 pr_debug("Table destroy after resize " 1344 " by dump: %p\n", t); 1345 mtype_ahash_destroy(set, t, false); 1346 } 1347 cb->args[IPSET_CB_PRIVATE] = 0; 1348 } 1349} 1350 1351/* Reply a LIST/SAVE request: dump the elements of the specified set */ 1352static int 1353mtype_list(const struct ip_set *set, 1354 struct sk_buff *skb, struct netlink_callback *cb) 1355{ 1356 const struct htable *t; 1357 struct nlattr *atd, *nested; 1358 const struct hbucket *n; 1359 const struct mtype_elem *e; 1360 u32 first = cb->args[IPSET_CB_ARG0]; 1361 /* We assume that one hash bucket fills into one page */ 1362 void *incomplete; 1363 int i, ret = 0; 1364 1365 atd = nla_nest_start(skb, IPSET_ATTR_ADT); 1366 if (!atd) 1367 return -EMSGSIZE; 1368 1369 pr_debug("list hash set %s\n", set->name); 1370 t = (const struct htable *)cb->args[IPSET_CB_PRIVATE]; 1371 /* Expire may replace a hbucket with another one */ 1372 rcu_read_lock(); 1373 for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); 1374 cb->args[IPSET_CB_ARG0]++) { 1375 cond_resched_rcu(); 1376 incomplete = skb_tail_pointer(skb); 1377 n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0])); 1378 pr_debug("cb->arg bucket: %lu, t %p n %p\n", 1379 cb->args[IPSET_CB_ARG0], t, n); 1380 if (!n) 1381 continue; 1382 for (i = 0; i < n->pos; i++) { 1383 if (!test_bit(i, n->used)) 1384 continue; 1385 e = ahash_data(n, i, set->dsize); 1386 if (SET_ELEM_EXPIRED(set, e)) 1387 continue; 1388 pr_debug("list hash %lu hbucket %p i %u, data %p\n", 1389 cb->args[IPSET_CB_ARG0], n, i, e); 1390 nested = nla_nest_start(skb, IPSET_ATTR_DATA); 1391 if (!nested) { 1392 if (cb->args[IPSET_CB_ARG0] == first) { 1393 nla_nest_cancel(skb, atd); 1394 ret = -EMSGSIZE; 1395 goto out; 1396 } 1397 goto nla_put_failure; 1398 } 1399 if (mtype_data_list(skb, e)) 1400 goto nla_put_failure; 1401 if (ip_set_put_extensions(skb, set, e, true)) 1402 goto nla_put_failure; 1403 nla_nest_end(skb, nested); 1404 } 1405 } 1406 nla_nest_end(skb, atd); 1407 /* Set listing finished */ 1408 cb->args[IPSET_CB_ARG0] = 0; 1409 1410 goto out; 1411 1412nla_put_failure: 1413 nlmsg_trim(skb, incomplete); 1414 if (unlikely(first == cb->args[IPSET_CB_ARG0])) { 1415 pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n", 1416 set->name); 1417 cb->args[IPSET_CB_ARG0] = 0; 1418 ret = -EMSGSIZE; 1419 } else { 1420 nla_nest_end(skb, atd); 1421 } 1422out: 1423 rcu_read_unlock(); 1424 return ret; 1425} 1426 1427static int 1428IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, 1429 const struct xt_action_param *par, 1430 enum ipset_adt adt, struct ip_set_adt_opt *opt); 1431 1432static int 1433IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], 1434 enum ipset_adt adt, u32 *lineno, u32 flags, 1435 bool retried); 1436 1437static const struct ip_set_type_variant mtype_variant = { 1438 .kadt = mtype_kadt, 1439 .uadt = mtype_uadt, 1440 .adt = { 1441 [IPSET_ADD] = mtype_add, 1442 [IPSET_DEL] = mtype_del, 1443 [IPSET_TEST] = mtype_test, 1444 }, 1445 .destroy = mtype_destroy, 1446 .flush = mtype_flush, 1447 .head = mtype_head, 1448 .list = mtype_list, 1449 .uref = mtype_uref, 1450 .resize = mtype_resize, 1451 .same_set = mtype_same_set, 1452 .cancel_gc = mtype_cancel_gc, 1453 .region_lock = true, 1454}; 1455 1456#ifdef IP_SET_EMIT_CREATE 1457static int 1458IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, 1459 struct nlattr *tb[], u32 flags) 1460{ 1461 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; 1462#ifdef IP_SET_HASH_WITH_MARKMASK 1463 u32 markmask; 1464#endif 1465 u8 hbits; 1466#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) 1467 int ret __attribute__((unused)) = 0; 1468 u8 netmask = set->family == NFPROTO_IPV4 ? 32 : 128; 1469 union nf_inet_addr bitmask = onesmask; 1470#endif 1471 size_t hsize; 1472 struct htype *h; 1473 struct htable *t; 1474 u32 i; 1475 1476 pr_debug("Create set %s with family %s\n", 1477 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); 1478 1479#ifdef IP_SET_PROTO_UNDEF 1480 if (set->family != NFPROTO_UNSPEC) 1481 return -IPSET_ERR_INVALID_FAMILY; 1482#else 1483 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) 1484 return -IPSET_ERR_INVALID_FAMILY; 1485#endif 1486 1487 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || 1488 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || 1489 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 1490 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 1491 return -IPSET_ERR_PROTOCOL; 1492 1493#ifdef IP_SET_HASH_WITH_MARKMASK 1494 /* Separated condition in order to avoid directive in argument list */ 1495 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK))) 1496 return -IPSET_ERR_PROTOCOL; 1497 1498 markmask = 0xffffffff; 1499 if (tb[IPSET_ATTR_MARKMASK]) { 1500 markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); 1501 if (markmask == 0) 1502 return -IPSET_ERR_INVALID_MARKMASK; 1503 } 1504#endif 1505 1506#ifdef IP_SET_HASH_WITH_NETMASK 1507 if (tb[IPSET_ATTR_NETMASK]) { 1508 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); 1509 1510 if ((set->family == NFPROTO_IPV4 && netmask > 32) || 1511 (set->family == NFPROTO_IPV6 && netmask > 128) || 1512 netmask == 0) 1513 return -IPSET_ERR_INVALID_NETMASK; 1514 1515 /* we convert netmask to bitmask and store it */ 1516 if (set->family == NFPROTO_IPV4) 1517 bitmask.ip = ip_set_netmask(netmask); 1518 else 1519 ip6_netmask(&bitmask, netmask); 1520 } 1521#endif 1522 1523#ifdef IP_SET_HASH_WITH_BITMASK 1524 if (tb[IPSET_ATTR_BITMASK]) { 1525 /* bitmask and netmask do the same thing, allow only one of these options */ 1526 if (tb[IPSET_ATTR_NETMASK]) 1527 return -IPSET_ERR_BITMASK_NETMASK_EXCL; 1528 1529 if (set->family == NFPROTO_IPV4) { 1530 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_BITMASK], &bitmask.ip); 1531 if (ret || !bitmask.ip) 1532 return -IPSET_ERR_INVALID_NETMASK; 1533 } else if (set->family == NFPROTO_IPV6) { 1534 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_BITMASK], &bitmask); 1535 if (ret || ipv6_addr_any(&bitmask.in6)) 1536 return -IPSET_ERR_INVALID_NETMASK; 1537 } 1538 1539 if (nf_inet_addr_cmp(&bitmask, &zeromask)) 1540 return -IPSET_ERR_INVALID_NETMASK; 1541 } 1542#endif 1543 1544 if (tb[IPSET_ATTR_HASHSIZE]) { 1545 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); 1546 if (hashsize < IPSET_MIMINAL_HASHSIZE) 1547 hashsize = IPSET_MIMINAL_HASHSIZE; 1548 } 1549 1550 if (tb[IPSET_ATTR_MAXELEM]) 1551 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); 1552 1553 hsize = sizeof(*h); 1554 h = kzalloc(hsize, GFP_KERNEL); 1555 if (!h) 1556 return -ENOMEM; 1557 1558 /* Compute htable_bits from the user input parameter hashsize. 1559 * Assume that hashsize == 2^htable_bits, 1560 * otherwise round up to the first 2^n value. 1561 */ 1562 hbits = fls(hashsize - 1); 1563 hsize = htable_size(hbits); 1564 if (hsize == 0) { 1565 kfree(h); 1566 return -ENOMEM; 1567 } 1568 t = ip_set_alloc(hsize); 1569 if (!t) { 1570 kfree(h); 1571 return -ENOMEM; 1572 } 1573 t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); 1574 if (!t->hregion) { 1575 ip_set_free(t); 1576 kfree(h); 1577 return -ENOMEM; 1578 } 1579 h->gc.set = set; 1580 for (i = 0; i < ahash_numof_locks(hbits); i++) 1581 spin_lock_init(&t->hregion[i].lock); 1582 h->maxelem = maxelem; 1583#if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) 1584 h->bitmask = bitmask; 1585 h->netmask = netmask; 1586#endif 1587#ifdef IP_SET_HASH_WITH_MARKMASK 1588 h->markmask = markmask; 1589#endif 1590 if (tb[IPSET_ATTR_INITVAL]) 1591 h->initval = ntohl(nla_get_be32(tb[IPSET_ATTR_INITVAL])); 1592 else 1593 get_random_bytes(&h->initval, sizeof(h->initval)); 1594 h->bucketsize = AHASH_MAX_SIZE; 1595 if (tb[IPSET_ATTR_BUCKETSIZE]) { 1596 h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]); 1597 if (h->bucketsize < AHASH_INIT_SIZE) 1598 h->bucketsize = AHASH_INIT_SIZE; 1599 else if (h->bucketsize > AHASH_MAX_SIZE) 1600 h->bucketsize = AHASH_MAX_SIZE; 1601 else if (h->bucketsize % 2) 1602 h->bucketsize += 1; 1603 } 1604 t->htable_bits = hbits; 1605 t->maxelem = h->maxelem / ahash_numof_locks(hbits); 1606 RCU_INIT_POINTER(h->table, t); 1607 1608 INIT_LIST_HEAD(&h->ad); 1609 set->data = h; 1610#ifndef IP_SET_PROTO_UNDEF 1611 if (set->family == NFPROTO_IPV4) { 1612#endif 1613 set->variant = &IPSET_TOKEN(HTYPE, 4_variant); 1614 set->dsize = ip_set_elem_len(set, tb, 1615 sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)), 1616 __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem))); 1617#ifndef IP_SET_PROTO_UNDEF 1618 } else { 1619 set->variant = &IPSET_TOKEN(HTYPE, 6_variant); 1620 set->dsize = ip_set_elem_len(set, tb, 1621 sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)), 1622 __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem))); 1623 } 1624#endif 1625 set->timeout = IPSET_NO_TIMEOUT; 1626 if (tb[IPSET_ATTR_TIMEOUT]) { 1627 set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 1628#ifndef IP_SET_PROTO_UNDEF 1629 if (set->family == NFPROTO_IPV4) 1630#endif 1631 IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc); 1632#ifndef IP_SET_PROTO_UNDEF 1633 else 1634 IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc); 1635#endif 1636 } 1637 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", 1638 set->name, jhash_size(t->htable_bits), 1639 t->htable_bits, h->maxelem, set->data, t); 1640 1641 return 0; 1642} 1643#endif /* IP_SET_EMIT_CREATE */ 1644 1645#undef HKEY_DATALEN 1646