1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef __NET_SCHED_GENERIC_H 3#define __NET_SCHED_GENERIC_H 4 5#include <linux/netdevice.h> 6#include <linux/types.h> 7#include <linux/rcupdate.h> 8#include <linux/pkt_sched.h> 9#include <linux/pkt_cls.h> 10#include <linux/percpu.h> 11#include <linux/dynamic_queue_limits.h> 12#include <linux/list.h> 13#include <linux/refcount.h> 14#include <linux/workqueue.h> 15#include <linux/mutex.h> 16#include <linux/rwsem.h> 17#include <linux/atomic.h> 18#include <linux/hashtable.h> 19#include <net/gen_stats.h> 20#include <net/rtnetlink.h> 21#include <net/flow_offload.h> 22#include <linux/xarray.h> 23 24struct Qdisc_ops; 25struct qdisc_walker; 26struct tcf_walker; 27struct module; 28struct bpf_flow_keys; 29 30struct qdisc_rate_table { 31 struct tc_ratespec rate; 32 u32 data[256]; 33 struct qdisc_rate_table *next; 34 int refcnt; 35}; 36 37enum qdisc_state_t { 38 __QDISC_STATE_SCHED, 39 __QDISC_STATE_DEACTIVATED, 40 __QDISC_STATE_MISSED, 41 __QDISC_STATE_DRAINING, 42}; 43 44enum qdisc_state2_t { 45 /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. 46 * Use qdisc_run_begin/end() or qdisc_is_running() instead. 47 */ 48 __QDISC_STATE2_RUNNING, 49}; 50 51#define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) 52#define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING) 53 54#define QDISC_STATE_NON_EMPTY (QDISC_STATE_MISSED | \ 55 QDISC_STATE_DRAINING) 56 57struct qdisc_size_table { 58 struct rcu_head rcu; 59 struct list_head list; 60 struct tc_sizespec szopts; 61 int refcnt; 62 u16 data[]; 63}; 64 65/* similar to sk_buff_head, but skb->prev pointer is undefined. */ 66struct qdisc_skb_head { 67 struct sk_buff *head; 68 struct sk_buff *tail; 69 __u32 qlen; 70 spinlock_t lock; 71}; 72 73struct Qdisc { 74 int (*enqueue)(struct sk_buff *skb, 75 struct Qdisc *sch, 76 struct sk_buff **to_free); 77 struct sk_buff * (*dequeue)(struct Qdisc *sch); 78 unsigned int flags; 79#define TCQ_F_BUILTIN 1 80#define TCQ_F_INGRESS 2 81#define TCQ_F_CAN_BYPASS 4 82#define TCQ_F_MQROOT 8 83#define TCQ_F_ONETXQUEUE 0x10 /* dequeue_skb() can assume all skbs are for 84 * q->dev_queue : It can test 85 * netif_xmit_frozen_or_stopped() before 86 * dequeueing next packet. 87 * Its true for MQ/MQPRIO slaves, or non 88 * multiqueue device. 89 */ 90#define TCQ_F_WARN_NONWC (1 << 16) 91#define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ 92#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : 93 * qdisc_tree_decrease_qlen() should stop. 94 */ 95#define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ 96#define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ 97#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ 98 u32 limit; 99 const struct Qdisc_ops *ops; 100 struct qdisc_size_table __rcu *stab; 101 struct hlist_node hash; 102 u32 handle; 103 u32 parent; 104 105 struct netdev_queue *dev_queue; 106 107 struct net_rate_estimator __rcu *rate_est; 108 struct gnet_stats_basic_sync __percpu *cpu_bstats; 109 struct gnet_stats_queue __percpu *cpu_qstats; 110 int pad; 111 refcount_t refcnt; 112 113 /* 114 * For performance sake on SMP, we put highly modified fields at the end 115 */ 116 struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; 117 struct qdisc_skb_head q; 118 struct gnet_stats_basic_sync bstats; 119 struct gnet_stats_queue qstats; 120 int owner; 121 unsigned long state; 122 unsigned long state2; /* must be written under qdisc spinlock */ 123 struct Qdisc *next_sched; 124 struct sk_buff_head skb_bad_txq; 125 126 spinlock_t busylock ____cacheline_aligned_in_smp; 127 spinlock_t seqlock; 128 129 struct rcu_head rcu; 130 netdevice_tracker dev_tracker; 131 /* private data */ 132 long privdata[] ____cacheline_aligned; 133}; 134 135static inline void qdisc_refcount_inc(struct Qdisc *qdisc) 136{ 137 if (qdisc->flags & TCQ_F_BUILTIN) 138 return; 139 refcount_inc(&qdisc->refcnt); 140} 141 142static inline bool qdisc_refcount_dec_if_one(struct Qdisc *qdisc) 143{ 144 if (qdisc->flags & TCQ_F_BUILTIN) 145 return true; 146 return refcount_dec_if_one(&qdisc->refcnt); 147} 148 149/* Intended to be used by unlocked users, when concurrent qdisc release is 150 * possible. 151 */ 152 153static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) 154{ 155 if (qdisc->flags & TCQ_F_BUILTIN) 156 return qdisc; 157 if (refcount_inc_not_zero(&qdisc->refcnt)) 158 return qdisc; 159 return NULL; 160} 161 162/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc 163 * root_lock section, or provide their own memory barriers -- ordering 164 * against qdisc_run_begin/end() atomic bit operations. 165 */ 166static inline bool qdisc_is_running(struct Qdisc *qdisc) 167{ 168 if (qdisc->flags & TCQ_F_NOLOCK) 169 return spin_is_locked(&qdisc->seqlock); 170 return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); 171} 172 173static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) 174{ 175 return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY); 176} 177 178static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) 179{ 180 return q->flags & TCQ_F_CPUSTATS; 181} 182 183static inline bool qdisc_is_empty(const struct Qdisc *qdisc) 184{ 185 if (qdisc_is_percpu_stats(qdisc)) 186 return nolock_qdisc_is_empty(qdisc); 187 return !READ_ONCE(qdisc->q.qlen); 188} 189 190/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with 191 * the qdisc root lock acquired. 192 */ 193static inline bool qdisc_run_begin(struct Qdisc *qdisc) 194{ 195 if (qdisc->flags & TCQ_F_NOLOCK) { 196 if (spin_trylock(&qdisc->seqlock)) 197 return true; 198 199 /* No need to insist if the MISSED flag was already set. 200 * Note that test_and_set_bit() also gives us memory ordering 201 * guarantees wrt potential earlier enqueue() and below 202 * spin_trylock(), both of which are necessary to prevent races 203 */ 204 if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state)) 205 return false; 206 207 /* Try to take the lock again to make sure that we will either 208 * grab it or the CPU that still has it will see MISSED set 209 * when testing it in qdisc_run_end() 210 */ 211 return spin_trylock(&qdisc->seqlock); 212 } 213 return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); 214} 215 216static inline void qdisc_run_end(struct Qdisc *qdisc) 217{ 218 if (qdisc->flags & TCQ_F_NOLOCK) { 219 spin_unlock(&qdisc->seqlock); 220 221 /* spin_unlock() only has store-release semantic. The unlock 222 * and test_bit() ordering is a store-load ordering, so a full 223 * memory barrier is needed here. 224 */ 225 smp_mb(); 226 227 if (unlikely(test_bit(__QDISC_STATE_MISSED, 228 &qdisc->state))) 229 __netif_schedule(qdisc); 230 } else { 231 __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); 232 } 233} 234 235static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) 236{ 237 return qdisc->flags & TCQ_F_ONETXQUEUE; 238} 239 240static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) 241{ 242 return netdev_queue_dql_avail(txq); 243} 244 245struct Qdisc_class_ops { 246 unsigned int flags; 247 /* Child qdisc manipulation */ 248 struct netdev_queue * (*select_queue)(struct Qdisc *, struct tcmsg *); 249 int (*graft)(struct Qdisc *, unsigned long cl, 250 struct Qdisc *, struct Qdisc **, 251 struct netlink_ext_ack *extack); 252 struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); 253 void (*qlen_notify)(struct Qdisc *, unsigned long); 254 255 /* Class manipulation routines */ 256 unsigned long (*find)(struct Qdisc *, u32 classid); 257 int (*change)(struct Qdisc *, u32, u32, 258 struct nlattr **, unsigned long *, 259 struct netlink_ext_ack *); 260 int (*delete)(struct Qdisc *, unsigned long, 261 struct netlink_ext_ack *); 262 void (*walk)(struct Qdisc *, struct qdisc_walker * arg); 263 264 /* Filter manipulation */ 265 struct tcf_block * (*tcf_block)(struct Qdisc *sch, 266 unsigned long arg, 267 struct netlink_ext_ack *extack); 268 unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, 269 u32 classid); 270 void (*unbind_tcf)(struct Qdisc *, unsigned long); 271 272 /* rtnetlink specific */ 273 int (*dump)(struct Qdisc *, unsigned long, 274 struct sk_buff *skb, struct tcmsg*); 275 int (*dump_stats)(struct Qdisc *, unsigned long, 276 struct gnet_dump *); 277}; 278 279/* Qdisc_class_ops flag values */ 280 281/* Implements API that doesn't require rtnl lock */ 282enum qdisc_class_ops_flags { 283 QDISC_CLASS_OPS_DOIT_UNLOCKED = 1, 284}; 285 286struct Qdisc_ops { 287 struct Qdisc_ops *next; 288 const struct Qdisc_class_ops *cl_ops; 289 char id[IFNAMSIZ]; 290 int priv_size; 291 unsigned int static_flags; 292 293 int (*enqueue)(struct sk_buff *skb, 294 struct Qdisc *sch, 295 struct sk_buff **to_free); 296 struct sk_buff * (*dequeue)(struct Qdisc *); 297 struct sk_buff * (*peek)(struct Qdisc *); 298 299 int (*init)(struct Qdisc *sch, struct nlattr *arg, 300 struct netlink_ext_ack *extack); 301 void (*reset)(struct Qdisc *); 302 void (*destroy)(struct Qdisc *); 303 int (*change)(struct Qdisc *sch, 304 struct nlattr *arg, 305 struct netlink_ext_ack *extack); 306 void (*attach)(struct Qdisc *sch); 307 int (*change_tx_queue_len)(struct Qdisc *, unsigned int); 308 void (*change_real_num_tx)(struct Qdisc *sch, 309 unsigned int new_real_tx); 310 311 int (*dump)(struct Qdisc *, struct sk_buff *); 312 int (*dump_stats)(struct Qdisc *, struct gnet_dump *); 313 314 void (*ingress_block_set)(struct Qdisc *sch, 315 u32 block_index); 316 void (*egress_block_set)(struct Qdisc *sch, 317 u32 block_index); 318 u32 (*ingress_block_get)(struct Qdisc *sch); 319 u32 (*egress_block_get)(struct Qdisc *sch); 320 321 struct module *owner; 322}; 323 324struct tcf_result { 325 union { 326 struct { 327 unsigned long class; 328 u32 classid; 329 }; 330 const struct tcf_proto *goto_tp; 331 }; 332}; 333 334struct tcf_chain; 335 336struct tcf_proto_ops { 337 struct list_head head; 338 char kind[IFNAMSIZ]; 339 340 int (*classify)(struct sk_buff *, 341 const struct tcf_proto *, 342 struct tcf_result *); 343 int (*init)(struct tcf_proto*); 344 void (*destroy)(struct tcf_proto *tp, bool rtnl_held, 345 struct netlink_ext_ack *extack); 346 347 void* (*get)(struct tcf_proto*, u32 handle); 348 void (*put)(struct tcf_proto *tp, void *f); 349 int (*change)(struct net *net, struct sk_buff *, 350 struct tcf_proto*, unsigned long, 351 u32 handle, struct nlattr **, 352 void **, u32, 353 struct netlink_ext_ack *); 354 int (*delete)(struct tcf_proto *tp, void *arg, 355 bool *last, bool rtnl_held, 356 struct netlink_ext_ack *); 357 bool (*delete_empty)(struct tcf_proto *tp); 358 void (*walk)(struct tcf_proto *tp, 359 struct tcf_walker *arg, bool rtnl_held); 360 int (*reoffload)(struct tcf_proto *tp, bool add, 361 flow_setup_cb_t *cb, void *cb_priv, 362 struct netlink_ext_ack *extack); 363 void (*hw_add)(struct tcf_proto *tp, 364 void *type_data); 365 void (*hw_del)(struct tcf_proto *tp, 366 void *type_data); 367 void (*bind_class)(void *, u32, unsigned long, 368 void *, unsigned long); 369 void * (*tmplt_create)(struct net *net, 370 struct tcf_chain *chain, 371 struct nlattr **tca, 372 struct netlink_ext_ack *extack); 373 void (*tmplt_destroy)(void *tmplt_priv); 374 void (*tmplt_reoffload)(struct tcf_chain *chain, 375 bool add, 376 flow_setup_cb_t *cb, 377 void *cb_priv); 378 struct tcf_exts * (*get_exts)(const struct tcf_proto *tp, 379 u32 handle); 380 381 /* rtnetlink specific */ 382 int (*dump)(struct net*, struct tcf_proto*, void *, 383 struct sk_buff *skb, struct tcmsg*, 384 bool); 385 int (*terse_dump)(struct net *net, 386 struct tcf_proto *tp, void *fh, 387 struct sk_buff *skb, 388 struct tcmsg *t, bool rtnl_held); 389 int (*tmplt_dump)(struct sk_buff *skb, 390 struct net *net, 391 void *tmplt_priv); 392 393 struct module *owner; 394 int flags; 395}; 396 397/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags 398 * are expected to implement tcf_proto_ops->delete_empty(), otherwise race 399 * conditions can occur when filters are inserted/deleted simultaneously. 400 */ 401enum tcf_proto_ops_flags { 402 TCF_PROTO_OPS_DOIT_UNLOCKED = 1, 403}; 404 405struct tcf_proto { 406 /* Fast access part */ 407 struct tcf_proto __rcu *next; 408 void __rcu *root; 409 410 /* called under RCU BH lock*/ 411 int (*classify)(struct sk_buff *, 412 const struct tcf_proto *, 413 struct tcf_result *); 414 __be16 protocol; 415 416 /* All the rest */ 417 u32 prio; 418 void *data; 419 const struct tcf_proto_ops *ops; 420 struct tcf_chain *chain; 421 /* Lock protects tcf_proto shared state and can be used by unlocked 422 * classifiers to protect their private data. 423 */ 424 spinlock_t lock; 425 bool deleting; 426 refcount_t refcnt; 427 struct rcu_head rcu; 428 struct hlist_node destroy_ht_node; 429}; 430 431struct qdisc_skb_cb { 432 struct { 433 unsigned int pkt_len; 434 u16 slave_dev_queue_mapping; 435 u16 tc_classid; 436 }; 437#define QDISC_CB_PRIV_LEN 20 438 unsigned char data[QDISC_CB_PRIV_LEN]; 439}; 440 441typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); 442 443struct tcf_chain { 444 /* Protects filter_chain. */ 445 struct mutex filter_chain_lock; 446 struct tcf_proto __rcu *filter_chain; 447 struct list_head list; 448 struct tcf_block *block; 449 u32 index; /* chain index */ 450 unsigned int refcnt; 451 unsigned int action_refcnt; 452 bool explicitly_created; 453 bool flushing; 454 const struct tcf_proto_ops *tmplt_ops; 455 void *tmplt_priv; 456 struct rcu_head rcu; 457}; 458 459struct tcf_block { 460 struct xarray ports; /* datapath accessible */ 461 /* Lock protects tcf_block and lifetime-management data of chains 462 * attached to the block (refcnt, action_refcnt, explicitly_created). 463 */ 464 struct mutex lock; 465 struct list_head chain_list; 466 u32 index; /* block index for shared blocks */ 467 u32 classid; /* which class this block belongs to */ 468 refcount_t refcnt; 469 struct net *net; 470 struct Qdisc *q; 471 struct rw_semaphore cb_lock; /* protects cb_list and offload counters */ 472 struct flow_block flow_block; 473 struct list_head owner_list; 474 bool keep_dst; 475 atomic_t offloadcnt; /* Number of oddloaded filters */ 476 unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ 477 unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ 478 struct { 479 struct tcf_chain *chain; 480 struct list_head filter_chain_list; 481 } chain0; 482 struct rcu_head rcu; 483 DECLARE_HASHTABLE(proto_destroy_ht, 7); 484 struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */ 485}; 486 487struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index); 488 489static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain) 490{ 491 return lockdep_is_held(&chain->filter_chain_lock); 492} 493 494static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) 495{ 496 return lockdep_is_held(&tp->lock); 497} 498 499#define tcf_chain_dereference(p, chain) \ 500 rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain)) 501 502#define tcf_proto_dereference(p, tp) \ 503 rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp)) 504 505static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) 506{ 507 struct qdisc_skb_cb *qcb; 508 509 BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb)); 510 BUILD_BUG_ON(sizeof(qcb->data) < sz); 511} 512 513static inline int qdisc_qlen(const struct Qdisc *q) 514{ 515 return q->q.qlen; 516} 517 518static inline int qdisc_qlen_sum(const struct Qdisc *q) 519{ 520 __u32 qlen = q->qstats.qlen; 521 int i; 522 523 if (qdisc_is_percpu_stats(q)) { 524 for_each_possible_cpu(i) 525 qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; 526 } else { 527 qlen += q->q.qlen; 528 } 529 530 return qlen; 531} 532 533static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb) 534{ 535 return (struct qdisc_skb_cb *)skb->cb; 536} 537 538static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc) 539{ 540 return &qdisc->q.lock; 541} 542 543static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc) 544{ 545 struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc); 546 547 return q; 548} 549 550static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc) 551{ 552 return rcu_dereference_bh(qdisc->dev_queue->qdisc); 553} 554 555static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) 556{ 557 return rcu_dereference_rtnl(qdisc->dev_queue->qdisc_sleeping); 558} 559 560static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) 561{ 562 struct Qdisc *root = qdisc_root_sleeping(qdisc); 563 564 ASSERT_RTNL(); 565 return qdisc_lock(root); 566} 567 568static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) 569{ 570 return qdisc->dev_queue->dev; 571} 572 573static inline void sch_tree_lock(struct Qdisc *q) 574{ 575 if (q->flags & TCQ_F_MQROOT) 576 spin_lock_bh(qdisc_lock(q)); 577 else 578 spin_lock_bh(qdisc_root_sleeping_lock(q)); 579} 580 581static inline void sch_tree_unlock(struct Qdisc *q) 582{ 583 if (q->flags & TCQ_F_MQROOT) 584 spin_unlock_bh(qdisc_lock(q)); 585 else 586 spin_unlock_bh(qdisc_root_sleeping_lock(q)); 587} 588 589extern struct Qdisc noop_qdisc; 590extern struct Qdisc_ops noop_qdisc_ops; 591extern struct Qdisc_ops pfifo_fast_ops; 592extern const u8 sch_default_prio2band[TC_PRIO_MAX + 1]; 593extern struct Qdisc_ops mq_qdisc_ops; 594extern struct Qdisc_ops noqueue_qdisc_ops; 595extern const struct Qdisc_ops *default_qdisc_ops; 596static inline const struct Qdisc_ops * 597get_default_qdisc_ops(const struct net_device *dev, int ntx) 598{ 599 return ntx < dev->real_num_tx_queues ? 600 default_qdisc_ops : &pfifo_fast_ops; 601} 602 603struct Qdisc_class_common { 604 u32 classid; 605 unsigned int filter_cnt; 606 struct hlist_node hnode; 607}; 608 609struct Qdisc_class_hash { 610 struct hlist_head *hash; 611 unsigned int hashsize; 612 unsigned int hashmask; 613 unsigned int hashelems; 614}; 615 616static inline unsigned int qdisc_class_hash(u32 id, u32 mask) 617{ 618 id ^= id >> 8; 619 id ^= id >> 4; 620 return id & mask; 621} 622 623static inline struct Qdisc_class_common * 624qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id) 625{ 626 struct Qdisc_class_common *cl; 627 unsigned int h; 628 629 if (!id) 630 return NULL; 631 632 h = qdisc_class_hash(id, hash->hashmask); 633 hlist_for_each_entry(cl, &hash->hash[h], hnode) { 634 if (cl->classid == id) 635 return cl; 636 } 637 return NULL; 638} 639 640static inline bool qdisc_class_in_use(const struct Qdisc_class_common *cl) 641{ 642 return cl->filter_cnt > 0; 643} 644 645static inline void qdisc_class_get(struct Qdisc_class_common *cl) 646{ 647 unsigned int res; 648 649 if (check_add_overflow(cl->filter_cnt, 1, &res)) 650 WARN(1, "Qdisc class overflow"); 651 652 cl->filter_cnt = res; 653} 654 655static inline void qdisc_class_put(struct Qdisc_class_common *cl) 656{ 657 unsigned int res; 658 659 if (check_sub_overflow(cl->filter_cnt, 1, &res)) 660 WARN(1, "Qdisc class underflow"); 661 662 cl->filter_cnt = res; 663} 664 665static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid) 666{ 667 u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY; 668 669 return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL; 670} 671 672int qdisc_class_hash_init(struct Qdisc_class_hash *); 673void qdisc_class_hash_insert(struct Qdisc_class_hash *, 674 struct Qdisc_class_common *); 675void qdisc_class_hash_remove(struct Qdisc_class_hash *, 676 struct Qdisc_class_common *); 677void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); 678void qdisc_class_hash_destroy(struct Qdisc_class_hash *); 679 680int dev_qdisc_change_tx_queue_len(struct net_device *dev); 681void dev_qdisc_change_real_num_tx(struct net_device *dev, 682 unsigned int new_real_tx); 683void dev_init_scheduler(struct net_device *dev); 684void dev_shutdown(struct net_device *dev); 685void dev_activate(struct net_device *dev); 686void dev_deactivate(struct net_device *dev); 687void dev_deactivate_many(struct list_head *head); 688struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, 689 struct Qdisc *qdisc); 690void qdisc_reset(struct Qdisc *qdisc); 691void qdisc_destroy(struct Qdisc *qdisc); 692void qdisc_put(struct Qdisc *qdisc); 693void qdisc_put_unlocked(struct Qdisc *qdisc); 694void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); 695#ifdef CONFIG_NET_SCHED 696int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, 697 void *type_data); 698void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, 699 struct Qdisc *new, struct Qdisc *old, 700 enum tc_setup_type type, void *type_data, 701 struct netlink_ext_ack *extack); 702#else 703static inline int 704qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, 705 void *type_data) 706{ 707 q->flags &= ~TCQ_F_OFFLOADED; 708 return 0; 709} 710 711static inline void 712qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, 713 struct Qdisc *new, struct Qdisc *old, 714 enum tc_setup_type type, void *type_data, 715 struct netlink_ext_ack *extack) 716{ 717} 718#endif 719void qdisc_offload_query_caps(struct net_device *dev, 720 enum tc_setup_type type, 721 void *caps, size_t caps_len); 722struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 723 const struct Qdisc_ops *ops, 724 struct netlink_ext_ack *extack); 725void qdisc_free(struct Qdisc *qdisc); 726struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, 727 const struct Qdisc_ops *ops, u32 parentid, 728 struct netlink_ext_ack *extack); 729void __qdisc_calculate_pkt_len(struct sk_buff *skb, 730 const struct qdisc_size_table *stab); 731int skb_do_redirect(struct sk_buff *); 732 733static inline bool skb_at_tc_ingress(const struct sk_buff *skb) 734{ 735#ifdef CONFIG_NET_XGRESS 736 return skb->tc_at_ingress; 737#else 738 return false; 739#endif 740} 741 742static inline bool skb_skip_tc_classify(struct sk_buff *skb) 743{ 744#ifdef CONFIG_NET_CLS_ACT 745 if (skb->tc_skip_classify) { 746 skb->tc_skip_classify = 0; 747 return true; 748 } 749#endif 750 return false; 751} 752 753/* Reset all TX qdiscs greater than index of a device. */ 754static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) 755{ 756 struct Qdisc *qdisc; 757 758 for (; i < dev->num_tx_queues; i++) { 759 qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); 760 if (qdisc) { 761 spin_lock_bh(qdisc_lock(qdisc)); 762 qdisc_reset(qdisc); 763 spin_unlock_bh(qdisc_lock(qdisc)); 764 } 765 } 766} 767 768/* Are all TX queues of the device empty? */ 769static inline bool qdisc_all_tx_empty(const struct net_device *dev) 770{ 771 unsigned int i; 772 773 rcu_read_lock(); 774 for (i = 0; i < dev->num_tx_queues; i++) { 775 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 776 const struct Qdisc *q = rcu_dereference(txq->qdisc); 777 778 if (!qdisc_is_empty(q)) { 779 rcu_read_unlock(); 780 return false; 781 } 782 } 783 rcu_read_unlock(); 784 return true; 785} 786 787/* Are any of the TX qdiscs changing? */ 788static inline bool qdisc_tx_changing(const struct net_device *dev) 789{ 790 unsigned int i; 791 792 for (i = 0; i < dev->num_tx_queues; i++) { 793 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 794 795 if (rcu_access_pointer(txq->qdisc) != 796 rcu_access_pointer(txq->qdisc_sleeping)) 797 return true; 798 } 799 return false; 800} 801 802/* Is the device using the noop qdisc on all queues? */ 803static inline bool qdisc_tx_is_noop(const struct net_device *dev) 804{ 805 unsigned int i; 806 807 for (i = 0; i < dev->num_tx_queues; i++) { 808 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 809 if (rcu_access_pointer(txq->qdisc) != &noop_qdisc) 810 return false; 811 } 812 return true; 813} 814 815static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb) 816{ 817 return qdisc_skb_cb(skb)->pkt_len; 818} 819 820/* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */ 821enum net_xmit_qdisc_t { 822 __NET_XMIT_STOLEN = 0x00010000, 823 __NET_XMIT_BYPASS = 0x00020000, 824}; 825 826#ifdef CONFIG_NET_CLS_ACT 827#define net_xmit_drop_count(e) ((e) & __NET_XMIT_STOLEN ? 0 : 1) 828#else 829#define net_xmit_drop_count(e) (1) 830#endif 831 832static inline void qdisc_calculate_pkt_len(struct sk_buff *skb, 833 const struct Qdisc *sch) 834{ 835#ifdef CONFIG_NET_SCHED 836 struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab); 837 838 if (stab) 839 __qdisc_calculate_pkt_len(skb, stab); 840#endif 841} 842 843static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, 844 struct sk_buff **to_free) 845{ 846 qdisc_calculate_pkt_len(skb, sch); 847 return sch->enqueue(skb, sch, to_free); 848} 849 850static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, 851 __u64 bytes, __u32 packets) 852{ 853 u64_stats_update_begin(&bstats->syncp); 854 u64_stats_add(&bstats->bytes, bytes); 855 u64_stats_add(&bstats->packets, packets); 856 u64_stats_update_end(&bstats->syncp); 857} 858 859static inline void bstats_update(struct gnet_stats_basic_sync *bstats, 860 const struct sk_buff *skb) 861{ 862 _bstats_update(bstats, 863 qdisc_pkt_len(skb), 864 skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); 865} 866 867static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, 868 const struct sk_buff *skb) 869{ 870 bstats_update(this_cpu_ptr(sch->cpu_bstats), skb); 871} 872 873static inline void qdisc_bstats_update(struct Qdisc *sch, 874 const struct sk_buff *skb) 875{ 876 bstats_update(&sch->bstats, skb); 877} 878 879static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch, 880 const struct sk_buff *skb) 881{ 882 sch->qstats.backlog -= qdisc_pkt_len(skb); 883} 884 885static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch, 886 const struct sk_buff *skb) 887{ 888 this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); 889} 890 891static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch, 892 const struct sk_buff *skb) 893{ 894 sch->qstats.backlog += qdisc_pkt_len(skb); 895} 896 897static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch, 898 const struct sk_buff *skb) 899{ 900 this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb)); 901} 902 903static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch) 904{ 905 this_cpu_inc(sch->cpu_qstats->qlen); 906} 907 908static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch) 909{ 910 this_cpu_dec(sch->cpu_qstats->qlen); 911} 912 913static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch) 914{ 915 this_cpu_inc(sch->cpu_qstats->requeues); 916} 917 918static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count) 919{ 920 sch->qstats.drops += count; 921} 922 923static inline void qstats_drop_inc(struct gnet_stats_queue *qstats) 924{ 925 qstats->drops++; 926} 927 928static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats) 929{ 930 qstats->overlimits++; 931} 932 933static inline void qdisc_qstats_drop(struct Qdisc *sch) 934{ 935 qstats_drop_inc(&sch->qstats); 936} 937 938static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) 939{ 940 this_cpu_inc(sch->cpu_qstats->drops); 941} 942 943static inline void qdisc_qstats_overlimit(struct Qdisc *sch) 944{ 945 sch->qstats.overlimits++; 946} 947 948static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch) 949{ 950 __u32 qlen = qdisc_qlen_sum(sch); 951 952 return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen); 953} 954 955static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, 956 __u32 *backlog) 957{ 958 struct gnet_stats_queue qstats = { 0 }; 959 960 gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats); 961 *qlen = qstats.qlen + qdisc_qlen(sch); 962 *backlog = qstats.backlog; 963} 964 965static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) 966{ 967 __u32 qlen, backlog; 968 969 qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); 970 qdisc_tree_reduce_backlog(sch, qlen, backlog); 971} 972 973static inline void qdisc_purge_queue(struct Qdisc *sch) 974{ 975 __u32 qlen, backlog; 976 977 qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); 978 qdisc_reset(sch); 979 qdisc_tree_reduce_backlog(sch, qlen, backlog); 980} 981 982static inline void __qdisc_enqueue_tail(struct sk_buff *skb, 983 struct qdisc_skb_head *qh) 984{ 985 struct sk_buff *last = qh->tail; 986 987 if (last) { 988 skb->next = NULL; 989 last->next = skb; 990 qh->tail = skb; 991 } else { 992 qh->tail = skb; 993 qh->head = skb; 994 } 995 qh->qlen++; 996} 997 998static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) 999{ 1000 __qdisc_enqueue_tail(skb, &sch->q); 1001 qdisc_qstats_backlog_inc(sch, skb); 1002 return NET_XMIT_SUCCESS; 1003} 1004 1005static inline void __qdisc_enqueue_head(struct sk_buff *skb, 1006 struct qdisc_skb_head *qh) 1007{ 1008 skb->next = qh->head; 1009 1010 if (!qh->head) 1011 qh->tail = skb; 1012 qh->head = skb; 1013 qh->qlen++; 1014} 1015 1016static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) 1017{ 1018 struct sk_buff *skb = qh->head; 1019 1020 if (likely(skb != NULL)) { 1021 qh->head = skb->next; 1022 qh->qlen--; 1023 if (qh->head == NULL) 1024 qh->tail = NULL; 1025 skb->next = NULL; 1026 } 1027 1028 return skb; 1029} 1030 1031static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) 1032{ 1033 struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); 1034 1035 if (likely(skb != NULL)) { 1036 qdisc_qstats_backlog_dec(sch, skb); 1037 qdisc_bstats_update(sch, skb); 1038 } 1039 1040 return skb; 1041} 1042 1043struct tc_skb_cb { 1044 struct qdisc_skb_cb qdisc_cb; 1045 u32 drop_reason; 1046 1047 u16 zone; /* Only valid if post_ct = true */ 1048 u16 mru; 1049 u8 post_ct:1; 1050 u8 post_ct_snat:1; 1051 u8 post_ct_dnat:1; 1052}; 1053 1054static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) 1055{ 1056 struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; 1057 1058 BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); 1059 return cb; 1060} 1061 1062static inline enum skb_drop_reason 1063tcf_get_drop_reason(const struct sk_buff *skb) 1064{ 1065 return tc_skb_cb(skb)->drop_reason; 1066} 1067 1068static inline void tcf_set_drop_reason(const struct sk_buff *skb, 1069 enum skb_drop_reason reason) 1070{ 1071 tc_skb_cb(skb)->drop_reason = reason; 1072} 1073 1074/* Instead of calling kfree_skb() while root qdisc lock is held, 1075 * queue the skb for future freeing at end of __dev_xmit_skb() 1076 */ 1077static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) 1078{ 1079 skb->next = *to_free; 1080 *to_free = skb; 1081} 1082 1083static inline void __qdisc_drop_all(struct sk_buff *skb, 1084 struct sk_buff **to_free) 1085{ 1086 if (skb->prev) 1087 skb->prev->next = *to_free; 1088 else 1089 skb->next = *to_free; 1090 *to_free = skb; 1091} 1092 1093static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, 1094 struct qdisc_skb_head *qh, 1095 struct sk_buff **to_free) 1096{ 1097 struct sk_buff *skb = __qdisc_dequeue_head(qh); 1098 1099 if (likely(skb != NULL)) { 1100 unsigned int len = qdisc_pkt_len(skb); 1101 1102 qdisc_qstats_backlog_dec(sch, skb); 1103 __qdisc_drop(skb, to_free); 1104 return len; 1105 } 1106 1107 return 0; 1108} 1109 1110static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) 1111{ 1112 const struct qdisc_skb_head *qh = &sch->q; 1113 1114 return qh->head; 1115} 1116 1117/* generic pseudo peek method for non-work-conserving qdisc */ 1118static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) 1119{ 1120 struct sk_buff *skb = skb_peek(&sch->gso_skb); 1121 1122 /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ 1123 if (!skb) { 1124 skb = sch->dequeue(sch); 1125 1126 if (skb) { 1127 __skb_queue_head(&sch->gso_skb, skb); 1128 /* it's still part of the queue */ 1129 qdisc_qstats_backlog_inc(sch, skb); 1130 sch->q.qlen++; 1131 } 1132 } 1133 1134 return skb; 1135} 1136 1137static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch, 1138 struct sk_buff *skb) 1139{ 1140 if (qdisc_is_percpu_stats(sch)) { 1141 qdisc_qstats_cpu_backlog_dec(sch, skb); 1142 qdisc_bstats_cpu_update(sch, skb); 1143 qdisc_qstats_cpu_qlen_dec(sch); 1144 } else { 1145 qdisc_qstats_backlog_dec(sch, skb); 1146 qdisc_bstats_update(sch, skb); 1147 sch->q.qlen--; 1148 } 1149} 1150 1151static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch, 1152 unsigned int pkt_len) 1153{ 1154 if (qdisc_is_percpu_stats(sch)) { 1155 qdisc_qstats_cpu_qlen_inc(sch); 1156 this_cpu_add(sch->cpu_qstats->backlog, pkt_len); 1157 } else { 1158 sch->qstats.backlog += pkt_len; 1159 sch->q.qlen++; 1160 } 1161} 1162 1163/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ 1164static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) 1165{ 1166 struct sk_buff *skb = skb_peek(&sch->gso_skb); 1167 1168 if (skb) { 1169 skb = __skb_dequeue(&sch->gso_skb); 1170 if (qdisc_is_percpu_stats(sch)) { 1171 qdisc_qstats_cpu_backlog_dec(sch, skb); 1172 qdisc_qstats_cpu_qlen_dec(sch); 1173 } else { 1174 qdisc_qstats_backlog_dec(sch, skb); 1175 sch->q.qlen--; 1176 } 1177 } else { 1178 skb = sch->dequeue(sch); 1179 } 1180 1181 return skb; 1182} 1183 1184static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) 1185{ 1186 /* 1187 * We do not know the backlog in bytes of this list, it 1188 * is up to the caller to correct it 1189 */ 1190 ASSERT_RTNL(); 1191 if (qh->qlen) { 1192 rtnl_kfree_skbs(qh->head, qh->tail); 1193 1194 qh->head = NULL; 1195 qh->tail = NULL; 1196 qh->qlen = 0; 1197 } 1198} 1199 1200static inline void qdisc_reset_queue(struct Qdisc *sch) 1201{ 1202 __qdisc_reset_queue(&sch->q); 1203} 1204 1205static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, 1206 struct Qdisc **pold) 1207{ 1208 struct Qdisc *old; 1209 1210 sch_tree_lock(sch); 1211 old = *pold; 1212 *pold = new; 1213 if (old != NULL) 1214 qdisc_purge_queue(old); 1215 sch_tree_unlock(sch); 1216 1217 return old; 1218} 1219 1220static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) 1221{ 1222 rtnl_kfree_skbs(skb, skb); 1223 qdisc_qstats_drop(sch); 1224} 1225 1226static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch, 1227 struct sk_buff **to_free) 1228{ 1229 __qdisc_drop(skb, to_free); 1230 qdisc_qstats_cpu_drop(sch); 1231 1232 return NET_XMIT_DROP; 1233} 1234 1235static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, 1236 struct sk_buff **to_free) 1237{ 1238 __qdisc_drop(skb, to_free); 1239 qdisc_qstats_drop(sch); 1240 1241 return NET_XMIT_DROP; 1242} 1243 1244static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch, 1245 struct sk_buff **to_free) 1246{ 1247 __qdisc_drop_all(skb, to_free); 1248 qdisc_qstats_drop(sch); 1249 1250 return NET_XMIT_DROP; 1251} 1252 1253struct psched_ratecfg { 1254 u64 rate_bytes_ps; /* bytes per second */ 1255 u32 mult; 1256 u16 overhead; 1257 u16 mpu; 1258 u8 linklayer; 1259 u8 shift; 1260}; 1261 1262static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, 1263 unsigned int len) 1264{ 1265 len += r->overhead; 1266 1267 if (len < r->mpu) 1268 len = r->mpu; 1269 1270 if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) 1271 return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; 1272 1273 return ((u64)len * r->mult) >> r->shift; 1274} 1275 1276void psched_ratecfg_precompute(struct psched_ratecfg *r, 1277 const struct tc_ratespec *conf, 1278 u64 rate64); 1279 1280static inline void psched_ratecfg_getrate(struct tc_ratespec *res, 1281 const struct psched_ratecfg *r) 1282{ 1283 memset(res, 0, sizeof(*res)); 1284 1285 /* legacy struct tc_ratespec has a 32bit @rate field 1286 * Qdisc using 64bit rate should add new attributes 1287 * in order to maintain compatibility. 1288 */ 1289 res->rate = min_t(u64, r->rate_bytes_ps, ~0U); 1290 1291 res->overhead = r->overhead; 1292 res->mpu = r->mpu; 1293 res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); 1294} 1295 1296struct psched_pktrate { 1297 u64 rate_pkts_ps; /* packets per second */ 1298 u32 mult; 1299 u8 shift; 1300}; 1301 1302static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r, 1303 unsigned int pkt_num) 1304{ 1305 return ((u64)pkt_num * r->mult) >> r->shift; 1306} 1307 1308void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64); 1309 1310/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc. 1311 * The fast path only needs to access filter list and to update stats 1312 */ 1313struct mini_Qdisc { 1314 struct tcf_proto *filter_list; 1315 struct tcf_block *block; 1316 struct gnet_stats_basic_sync __percpu *cpu_bstats; 1317 struct gnet_stats_queue __percpu *cpu_qstats; 1318 unsigned long rcu_state; 1319}; 1320 1321static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, 1322 const struct sk_buff *skb) 1323{ 1324 bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb); 1325} 1326 1327static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) 1328{ 1329 this_cpu_inc(miniq->cpu_qstats->drops); 1330} 1331 1332struct mini_Qdisc_pair { 1333 struct mini_Qdisc miniq1; 1334 struct mini_Qdisc miniq2; 1335 struct mini_Qdisc __rcu **p_miniq; 1336}; 1337 1338void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, 1339 struct tcf_proto *tp_head); 1340void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, 1341 struct mini_Qdisc __rcu **p_miniq); 1342void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp, 1343 struct tcf_block *block); 1344 1345void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx); 1346 1347int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)); 1348 1349/* Make sure qdisc is no longer in SCHED state. */ 1350static inline void qdisc_synchronize(const struct Qdisc *q) 1351{ 1352 while (test_bit(__QDISC_STATE_SCHED, &q->state)) 1353 msleep(1); 1354} 1355 1356#endif 1357