1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> 4 * 5 * Development of this code funded by Astaro AG (http://www.astaro.com/) 6 */ 7 8#include <linux/kernel.h> 9#include <linux/init.h> 10#include <linux/module.h> 11#include <linux/list.h> 12#include <linux/rbtree.h> 13#include <linux/netlink.h> 14#include <linux/netfilter.h> 15#include <linux/netfilter/nf_tables.h> 16#include <net/netfilter/nf_tables_core.h> 17 18struct nft_rbtree { 19 struct rb_root root; 20 rwlock_t lock; 21 seqcount_rwlock_t count; 22 unsigned long last_gc; 23}; 24 25struct nft_rbtree_elem { 26 struct nft_elem_priv priv; 27 struct rb_node node; 28 struct nft_set_ext ext; 29}; 30 31static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe) 32{ 33 return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && 34 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END); 35} 36 37static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) 38{ 39 return !nft_rbtree_interval_end(rbe); 40} 41 42static int nft_rbtree_cmp(const struct nft_set *set, 43 const struct nft_rbtree_elem *e1, 44 const struct nft_rbtree_elem *e2) 45{ 46 return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), 47 set->klen); 48} 49 50static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) 51{ 52 return nft_set_elem_expired(&rbe->ext); 53} 54 55static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, 56 const u32 *key, const struct nft_set_ext **ext, 57 unsigned int seq) 58{ 59 struct nft_rbtree *priv = nft_set_priv(set); 60 const struct nft_rbtree_elem *rbe, *interval = NULL; 61 u8 genmask = nft_genmask_cur(net); 62 const struct rb_node *parent; 63 int d; 64 65 parent = rcu_dereference_raw(priv->root.rb_node); 66 while (parent != NULL) { 67 if (read_seqcount_retry(&priv->count, seq)) 68 return false; 69 70 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 71 72 d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); 73 if (d < 0) { 74 parent = rcu_dereference_raw(parent->rb_left); 75 if (interval && 76 !nft_rbtree_cmp(set, rbe, interval) && 77 nft_rbtree_interval_end(rbe) && 78 nft_rbtree_interval_start(interval)) 79 continue; 80 interval = rbe; 81 } else if (d > 0) 82 parent = rcu_dereference_raw(parent->rb_right); 83 else { 84 if (!nft_set_elem_active(&rbe->ext, genmask)) { 85 parent = rcu_dereference_raw(parent->rb_left); 86 continue; 87 } 88 89 if (nft_rbtree_elem_expired(rbe)) 90 return false; 91 92 if (nft_rbtree_interval_end(rbe)) { 93 if (nft_set_is_anonymous(set)) 94 return false; 95 parent = rcu_dereference_raw(parent->rb_left); 96 interval = NULL; 97 continue; 98 } 99 100 *ext = &rbe->ext; 101 return true; 102 } 103 } 104 105 if (set->flags & NFT_SET_INTERVAL && interval != NULL && 106 nft_set_elem_active(&interval->ext, genmask) && 107 !nft_rbtree_elem_expired(interval) && 108 nft_rbtree_interval_start(interval)) { 109 *ext = &interval->ext; 110 return true; 111 } 112 113 return false; 114} 115 116INDIRECT_CALLABLE_SCOPE 117bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, 118 const u32 *key, const struct nft_set_ext **ext) 119{ 120 struct nft_rbtree *priv = nft_set_priv(set); 121 unsigned int seq = read_seqcount_begin(&priv->count); 122 bool ret; 123 124 ret = __nft_rbtree_lookup(net, set, key, ext, seq); 125 if (ret || !read_seqcount_retry(&priv->count, seq)) 126 return ret; 127 128 read_lock_bh(&priv->lock); 129 seq = read_seqcount_begin(&priv->count); 130 ret = __nft_rbtree_lookup(net, set, key, ext, seq); 131 read_unlock_bh(&priv->lock); 132 133 return ret; 134} 135 136static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, 137 const u32 *key, struct nft_rbtree_elem **elem, 138 unsigned int seq, unsigned int flags, u8 genmask) 139{ 140 struct nft_rbtree_elem *rbe, *interval = NULL; 141 struct nft_rbtree *priv = nft_set_priv(set); 142 const struct rb_node *parent; 143 const void *this; 144 int d; 145 146 parent = rcu_dereference_raw(priv->root.rb_node); 147 while (parent != NULL) { 148 if (read_seqcount_retry(&priv->count, seq)) 149 return false; 150 151 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 152 153 this = nft_set_ext_key(&rbe->ext); 154 d = memcmp(this, key, set->klen); 155 if (d < 0) { 156 parent = rcu_dereference_raw(parent->rb_left); 157 if (!(flags & NFT_SET_ELEM_INTERVAL_END)) 158 interval = rbe; 159 } else if (d > 0) { 160 parent = rcu_dereference_raw(parent->rb_right); 161 if (flags & NFT_SET_ELEM_INTERVAL_END) 162 interval = rbe; 163 } else { 164 if (!nft_set_elem_active(&rbe->ext, genmask)) { 165 parent = rcu_dereference_raw(parent->rb_left); 166 continue; 167 } 168 169 if (nft_set_elem_expired(&rbe->ext)) 170 return false; 171 172 if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || 173 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == 174 (flags & NFT_SET_ELEM_INTERVAL_END)) { 175 *elem = rbe; 176 return true; 177 } 178 179 if (nft_rbtree_interval_end(rbe)) 180 interval = NULL; 181 182 parent = rcu_dereference_raw(parent->rb_left); 183 } 184 } 185 186 if (set->flags & NFT_SET_INTERVAL && interval != NULL && 187 nft_set_elem_active(&interval->ext, genmask) && 188 !nft_set_elem_expired(&interval->ext) && 189 ((!nft_rbtree_interval_end(interval) && 190 !(flags & NFT_SET_ELEM_INTERVAL_END)) || 191 (nft_rbtree_interval_end(interval) && 192 (flags & NFT_SET_ELEM_INTERVAL_END)))) { 193 *elem = interval; 194 return true; 195 } 196 197 return false; 198} 199 200static struct nft_elem_priv * 201nft_rbtree_get(const struct net *net, const struct nft_set *set, 202 const struct nft_set_elem *elem, unsigned int flags) 203{ 204 struct nft_rbtree *priv = nft_set_priv(set); 205 unsigned int seq = read_seqcount_begin(&priv->count); 206 struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT); 207 const u32 *key = (const u32 *)&elem->key.val; 208 u8 genmask = nft_genmask_cur(net); 209 bool ret; 210 211 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); 212 if (ret || !read_seqcount_retry(&priv->count, seq)) 213 return &rbe->priv; 214 215 read_lock_bh(&priv->lock); 216 seq = read_seqcount_begin(&priv->count); 217 ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); 218 read_unlock_bh(&priv->lock); 219 220 if (!ret) 221 return ERR_PTR(-ENOENT); 222 223 return &rbe->priv; 224} 225 226static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set, 227 struct nft_rbtree *priv, 228 struct nft_rbtree_elem *rbe) 229{ 230 lockdep_assert_held_write(&priv->lock); 231 nft_setelem_data_deactivate(net, set, &rbe->priv); 232 rb_erase(&rbe->node, &priv->root); 233} 234 235static const struct nft_rbtree_elem * 236nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, 237 struct nft_rbtree_elem *rbe) 238{ 239 struct nft_set *set = (struct nft_set *)__set; 240 struct rb_node *prev = rb_prev(&rbe->node); 241 struct net *net = read_pnet(&set->net); 242 struct nft_rbtree_elem *rbe_prev; 243 struct nft_trans_gc *gc; 244 245 gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); 246 if (!gc) 247 return ERR_PTR(-ENOMEM); 248 249 /* search for end interval coming before this element. 250 * end intervals don't carry a timeout extension, they 251 * are coupled with the interval start element. 252 */ 253 while (prev) { 254 rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); 255 if (nft_rbtree_interval_end(rbe_prev) && 256 nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) 257 break; 258 259 prev = rb_prev(prev); 260 } 261 262 rbe_prev = NULL; 263 if (prev) { 264 rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); 265 nft_rbtree_gc_elem_remove(net, set, priv, rbe_prev); 266 267 /* There is always room in this trans gc for this element, 268 * memory allocation never actually happens, hence, the warning 269 * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT, 270 * this is synchronous gc which never fails. 271 */ 272 gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); 273 if (WARN_ON_ONCE(!gc)) 274 return ERR_PTR(-ENOMEM); 275 276 nft_trans_gc_elem_add(gc, rbe_prev); 277 } 278 279 nft_rbtree_gc_elem_remove(net, set, priv, rbe); 280 gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); 281 if (WARN_ON_ONCE(!gc)) 282 return ERR_PTR(-ENOMEM); 283 284 nft_trans_gc_elem_add(gc, rbe); 285 286 nft_trans_gc_queue_sync_done(gc); 287 288 return rbe_prev; 289} 290 291static bool nft_rbtree_update_first(const struct nft_set *set, 292 struct nft_rbtree_elem *rbe, 293 struct rb_node *first) 294{ 295 struct nft_rbtree_elem *first_elem; 296 297 first_elem = rb_entry(first, struct nft_rbtree_elem, node); 298 /* this element is closest to where the new element is to be inserted: 299 * update the first element for the node list path. 300 */ 301 if (nft_rbtree_cmp(set, rbe, first_elem) < 0) 302 return true; 303 304 return false; 305} 306 307static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, 308 struct nft_rbtree_elem *new, 309 struct nft_elem_priv **elem_priv) 310{ 311 struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; 312 struct rb_node *node, *next, *parent, **p, *first = NULL; 313 struct nft_rbtree *priv = nft_set_priv(set); 314 u8 cur_genmask = nft_genmask_cur(net); 315 u8 genmask = nft_genmask_next(net); 316 u64 tstamp = nft_net_tstamp(net); 317 int d; 318 319 /* Descend the tree to search for an existing element greater than the 320 * key value to insert that is greater than the new element. This is the 321 * first element to walk the ordered elements to find possible overlap. 322 */ 323 parent = NULL; 324 p = &priv->root.rb_node; 325 while (*p != NULL) { 326 parent = *p; 327 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 328 d = nft_rbtree_cmp(set, rbe, new); 329 330 if (d < 0) { 331 p = &parent->rb_left; 332 } else if (d > 0) { 333 if (!first || 334 nft_rbtree_update_first(set, rbe, first)) 335 first = &rbe->node; 336 337 p = &parent->rb_right; 338 } else { 339 if (nft_rbtree_interval_end(rbe)) 340 p = &parent->rb_left; 341 else 342 p = &parent->rb_right; 343 } 344 } 345 346 if (!first) 347 first = rb_first(&priv->root); 348 349 /* Detect overlap by going through the list of valid tree nodes. 350 * Values stored in the tree are in reversed order, starting from 351 * highest to lowest value. 352 */ 353 for (node = first; node != NULL; node = next) { 354 next = rb_next(node); 355 356 rbe = rb_entry(node, struct nft_rbtree_elem, node); 357 358 if (!nft_set_elem_active(&rbe->ext, genmask)) 359 continue; 360 361 /* perform garbage collection to avoid bogus overlap reports 362 * but skip new elements in this transaction. 363 */ 364 if (__nft_set_elem_expired(&rbe->ext, tstamp) && 365 nft_set_elem_active(&rbe->ext, cur_genmask)) { 366 const struct nft_rbtree_elem *removed_end; 367 368 removed_end = nft_rbtree_gc_elem(set, priv, rbe); 369 if (IS_ERR(removed_end)) 370 return PTR_ERR(removed_end); 371 372 if (removed_end == rbe_le || removed_end == rbe_ge) 373 return -EAGAIN; 374 375 continue; 376 } 377 378 d = nft_rbtree_cmp(set, rbe, new); 379 if (d == 0) { 380 /* Matching end element: no need to look for an 381 * overlapping greater or equal element. 382 */ 383 if (nft_rbtree_interval_end(rbe)) { 384 rbe_le = rbe; 385 break; 386 } 387 388 /* first element that is greater or equal to key value. */ 389 if (!rbe_ge) { 390 rbe_ge = rbe; 391 continue; 392 } 393 394 /* this is a closer more or equal element, update it. */ 395 if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { 396 rbe_ge = rbe; 397 continue; 398 } 399 400 /* element is equal to key value, make sure flags are 401 * the same, an existing more or equal start element 402 * must not be replaced by more or equal end element. 403 */ 404 if ((nft_rbtree_interval_start(new) && 405 nft_rbtree_interval_start(rbe_ge)) || 406 (nft_rbtree_interval_end(new) && 407 nft_rbtree_interval_end(rbe_ge))) { 408 rbe_ge = rbe; 409 continue; 410 } 411 } else if (d > 0) { 412 /* annotate element greater than the new element. */ 413 rbe_ge = rbe; 414 continue; 415 } else if (d < 0) { 416 /* annotate element less than the new element. */ 417 rbe_le = rbe; 418 break; 419 } 420 } 421 422 /* - new start element matching existing start element: full overlap 423 * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. 424 */ 425 if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && 426 nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { 427 *elem_priv = &rbe_ge->priv; 428 return -EEXIST; 429 } 430 431 /* - new end element matching existing end element: full overlap 432 * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. 433 */ 434 if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && 435 nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { 436 *elem_priv = &rbe_le->priv; 437 return -EEXIST; 438 } 439 440 /* - new start element with existing closest, less or equal key value 441 * being a start element: partial overlap, reported as -ENOTEMPTY. 442 * Anonymous sets allow for two consecutive start element since they 443 * are constant, skip them to avoid bogus overlap reports. 444 */ 445 if (!nft_set_is_anonymous(set) && rbe_le && 446 nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) 447 return -ENOTEMPTY; 448 449 /* - new end element with existing closest, less or equal key value 450 * being a end element: partial overlap, reported as -ENOTEMPTY. 451 */ 452 if (rbe_le && 453 nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) 454 return -ENOTEMPTY; 455 456 /* - new end element with existing closest, greater or equal key value 457 * being an end element: partial overlap, reported as -ENOTEMPTY 458 */ 459 if (rbe_ge && 460 nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) 461 return -ENOTEMPTY; 462 463 /* Accepted element: pick insertion point depending on key value */ 464 parent = NULL; 465 p = &priv->root.rb_node; 466 while (*p != NULL) { 467 parent = *p; 468 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 469 d = nft_rbtree_cmp(set, rbe, new); 470 471 if (d < 0) 472 p = &parent->rb_left; 473 else if (d > 0) 474 p = &parent->rb_right; 475 else if (nft_rbtree_interval_end(rbe)) 476 p = &parent->rb_left; 477 else 478 p = &parent->rb_right; 479 } 480 481 rb_link_node_rcu(&new->node, parent, p); 482 rb_insert_color(&new->node, &priv->root); 483 return 0; 484} 485 486static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, 487 const struct nft_set_elem *elem, 488 struct nft_elem_priv **elem_priv) 489{ 490 struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); 491 struct nft_rbtree *priv = nft_set_priv(set); 492 int err; 493 494 do { 495 if (fatal_signal_pending(current)) 496 return -EINTR; 497 498 cond_resched(); 499 500 write_lock_bh(&priv->lock); 501 write_seqcount_begin(&priv->count); 502 err = __nft_rbtree_insert(net, set, rbe, elem_priv); 503 write_seqcount_end(&priv->count); 504 write_unlock_bh(&priv->lock); 505 } while (err == -EAGAIN); 506 507 return err; 508} 509 510static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) 511{ 512 write_lock_bh(&priv->lock); 513 write_seqcount_begin(&priv->count); 514 rb_erase(&rbe->node, &priv->root); 515 write_seqcount_end(&priv->count); 516 write_unlock_bh(&priv->lock); 517} 518 519static void nft_rbtree_remove(const struct net *net, 520 const struct nft_set *set, 521 struct nft_elem_priv *elem_priv) 522{ 523 struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); 524 struct nft_rbtree *priv = nft_set_priv(set); 525 526 nft_rbtree_erase(priv, rbe); 527} 528 529static void nft_rbtree_activate(const struct net *net, 530 const struct nft_set *set, 531 struct nft_elem_priv *elem_priv) 532{ 533 struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); 534 535 nft_clear(net, &rbe->ext); 536} 537 538static void nft_rbtree_flush(const struct net *net, 539 const struct nft_set *set, 540 struct nft_elem_priv *elem_priv) 541{ 542 struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); 543 544 nft_set_elem_change_active(net, set, &rbe->ext); 545} 546 547static struct nft_elem_priv * 548nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, 549 const struct nft_set_elem *elem) 550{ 551 struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv); 552 const struct nft_rbtree *priv = nft_set_priv(set); 553 const struct rb_node *parent = priv->root.rb_node; 554 u8 genmask = nft_genmask_next(net); 555 u64 tstamp = nft_net_tstamp(net); 556 int d; 557 558 while (parent != NULL) { 559 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 560 561 d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val, 562 set->klen); 563 if (d < 0) 564 parent = parent->rb_left; 565 else if (d > 0) 566 parent = parent->rb_right; 567 else { 568 if (nft_rbtree_interval_end(rbe) && 569 nft_rbtree_interval_start(this)) { 570 parent = parent->rb_left; 571 continue; 572 } else if (nft_rbtree_interval_start(rbe) && 573 nft_rbtree_interval_end(this)) { 574 parent = parent->rb_right; 575 continue; 576 } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) { 577 break; 578 } else if (!nft_set_elem_active(&rbe->ext, genmask)) { 579 parent = parent->rb_left; 580 continue; 581 } 582 nft_rbtree_flush(net, set, &rbe->priv); 583 return &rbe->priv; 584 } 585 } 586 return NULL; 587} 588 589static void nft_rbtree_walk(const struct nft_ctx *ctx, 590 struct nft_set *set, 591 struct nft_set_iter *iter) 592{ 593 struct nft_rbtree *priv = nft_set_priv(set); 594 struct nft_rbtree_elem *rbe; 595 struct rb_node *node; 596 597 read_lock_bh(&priv->lock); 598 for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { 599 rbe = rb_entry(node, struct nft_rbtree_elem, node); 600 601 if (iter->count < iter->skip) 602 goto cont; 603 604 iter->err = iter->fn(ctx, set, iter, &rbe->priv); 605 if (iter->err < 0) { 606 read_unlock_bh(&priv->lock); 607 return; 608 } 609cont: 610 iter->count++; 611 } 612 read_unlock_bh(&priv->lock); 613} 614 615static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, 616 struct nft_rbtree *priv, 617 struct nft_rbtree_elem *rbe) 618{ 619 nft_setelem_data_deactivate(net, set, &rbe->priv); 620 nft_rbtree_erase(priv, rbe); 621} 622 623static void nft_rbtree_gc(struct nft_set *set) 624{ 625 struct nft_rbtree *priv = nft_set_priv(set); 626 struct nft_rbtree_elem *rbe, *rbe_end = NULL; 627 struct net *net = read_pnet(&set->net); 628 u64 tstamp = nft_net_tstamp(net); 629 struct rb_node *node, *next; 630 struct nft_trans_gc *gc; 631 632 set = nft_set_container_of(priv); 633 net = read_pnet(&set->net); 634 635 gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); 636 if (!gc) 637 return; 638 639 for (node = rb_first(&priv->root); node ; node = next) { 640 next = rb_next(node); 641 642 rbe = rb_entry(node, struct nft_rbtree_elem, node); 643 644 /* elements are reversed in the rbtree for historical reasons, 645 * from highest to lowest value, that is why end element is 646 * always visited before the start element. 647 */ 648 if (nft_rbtree_interval_end(rbe)) { 649 rbe_end = rbe; 650 continue; 651 } 652 if (!__nft_set_elem_expired(&rbe->ext, tstamp)) 653 continue; 654 655 gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); 656 if (!gc) 657 goto try_later; 658 659 /* end element needs to be removed first, it has 660 * no timeout extension. 661 */ 662 if (rbe_end) { 663 nft_rbtree_gc_remove(net, set, priv, rbe_end); 664 nft_trans_gc_elem_add(gc, rbe_end); 665 rbe_end = NULL; 666 } 667 668 gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); 669 if (!gc) 670 goto try_later; 671 672 nft_rbtree_gc_remove(net, set, priv, rbe); 673 nft_trans_gc_elem_add(gc, rbe); 674 } 675 676try_later: 677 678 if (gc) { 679 gc = nft_trans_gc_catchall_sync(gc); 680 nft_trans_gc_queue_sync_done(gc); 681 priv->last_gc = jiffies; 682 } 683} 684 685static u64 nft_rbtree_privsize(const struct nlattr * const nla[], 686 const struct nft_set_desc *desc) 687{ 688 return sizeof(struct nft_rbtree); 689} 690 691static int nft_rbtree_init(const struct nft_set *set, 692 const struct nft_set_desc *desc, 693 const struct nlattr * const nla[]) 694{ 695 struct nft_rbtree *priv = nft_set_priv(set); 696 697 BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0); 698 699 rwlock_init(&priv->lock); 700 seqcount_rwlock_init(&priv->count, &priv->lock); 701 priv->root = RB_ROOT; 702 703 return 0; 704} 705 706static void nft_rbtree_destroy(const struct nft_ctx *ctx, 707 const struct nft_set *set) 708{ 709 struct nft_rbtree *priv = nft_set_priv(set); 710 struct nft_rbtree_elem *rbe; 711 struct rb_node *node; 712 713 while ((node = priv->root.rb_node) != NULL) { 714 rb_erase(node, &priv->root); 715 rbe = rb_entry(node, struct nft_rbtree_elem, node); 716 nf_tables_set_elem_destroy(ctx, set, &rbe->priv); 717 } 718} 719 720static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, 721 struct nft_set_estimate *est) 722{ 723 if (desc->field_count > 1) 724 return false; 725 726 if (desc->size) 727 est->size = sizeof(struct nft_rbtree) + 728 desc->size * sizeof(struct nft_rbtree_elem); 729 else 730 est->size = ~0; 731 732 est->lookup = NFT_SET_CLASS_O_LOG_N; 733 est->space = NFT_SET_CLASS_O_N; 734 735 return true; 736} 737 738static void nft_rbtree_commit(struct nft_set *set) 739{ 740 struct nft_rbtree *priv = nft_set_priv(set); 741 742 if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) 743 nft_rbtree_gc(set); 744} 745 746static void nft_rbtree_gc_init(const struct nft_set *set) 747{ 748 struct nft_rbtree *priv = nft_set_priv(set); 749 750 priv->last_gc = jiffies; 751} 752 753const struct nft_set_type nft_set_rbtree_type = { 754 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, 755 .ops = { 756 .privsize = nft_rbtree_privsize, 757 .elemsize = offsetof(struct nft_rbtree_elem, ext), 758 .estimate = nft_rbtree_estimate, 759 .init = nft_rbtree_init, 760 .destroy = nft_rbtree_destroy, 761 .insert = nft_rbtree_insert, 762 .remove = nft_rbtree_remove, 763 .deactivate = nft_rbtree_deactivate, 764 .flush = nft_rbtree_flush, 765 .activate = nft_rbtree_activate, 766 .commit = nft_rbtree_commit, 767 .gc_init = nft_rbtree_gc_init, 768 .lookup = nft_rbtree_lookup, 769 .walk = nft_rbtree_walk, 770 .get = nft_rbtree_get, 771 }, 772}; 773