1#ifndef __NET_PKT_SCHED_H 2#define __NET_PKT_SCHED_H 3 4#define PSCHED_GETTIMEOFDAY 1 5#define PSCHED_JIFFIES 2 6#define PSCHED_CPU 3 7 8#define PSCHED_CLOCK_SOURCE PSCHED_JIFFIES 9 10#include <linux/config.h> 11#include <linux/types.h> 12#include <linux/pkt_sched.h> 13#include <net/pkt_cls.h> 14 15#ifdef CONFIG_X86_TSC 16#include <asm/msr.h> 17#endif 18 19struct rtattr; 20struct Qdisc; 21 22struct qdisc_walker 23{ 24 int stop; 25 int skip; 26 int count; 27 int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *); 28}; 29 30struct Qdisc_class_ops 31{ 32 /* Child qdisc manipulation */ 33 int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **); 34 struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); 35 36 /* Class manipulation routines */ 37 unsigned long (*get)(struct Qdisc *, u32 classid); 38 void (*put)(struct Qdisc *, unsigned long); 39 int (*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *); 40 int (*delete)(struct Qdisc *, unsigned long); 41 void (*walk)(struct Qdisc *, struct qdisc_walker * arg); 42 43 /* Filter manipulation */ 44 struct tcf_proto ** (*tcf_chain)(struct Qdisc *, unsigned long); 45 unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); 46 void (*unbind_tcf)(struct Qdisc *, unsigned long); 47 48 /* rtnetlink specific */ 49 int (*dump)(struct Qdisc *, unsigned long, struct sk_buff *skb, struct tcmsg*); 50}; 51 52struct Qdisc_ops 53{ 54 struct Qdisc_ops *next; 55 struct Qdisc_class_ops *cl_ops; 56 char id[IFNAMSIZ]; 57 int priv_size; 58 59 int (*enqueue)(struct sk_buff *, struct Qdisc *); 60 struct sk_buff * (*dequeue)(struct Qdisc *); 61 int (*requeue)(struct sk_buff *, struct Qdisc *); 62 int (*drop)(struct Qdisc *); 63 64 int (*init)(struct Qdisc *, struct rtattr *arg); 65 void (*reset)(struct Qdisc *); 66 void (*destroy)(struct Qdisc *); 67 int (*change)(struct Qdisc *, struct rtattr *arg); 68 69 int (*dump)(struct Qdisc *, struct sk_buff *); 70}; 71 72extern rwlock_t qdisc_tree_lock; 73 74struct Qdisc 75{ 76 int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev); 77 struct sk_buff * (*dequeue)(struct Qdisc *dev); 78 unsigned flags; 79#define TCQ_F_BUILTIN 1 80#define TCQ_F_THROTTLED 2 81#define TCQ_F_INGRES 4 82 struct Qdisc_ops *ops; 83 struct Qdisc *next; 84 u32 handle; 85 atomic_t refcnt; 86 struct sk_buff_head q; 87 struct net_device *dev; 88 89 struct tc_stats stats; 90 int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); 91 92 /* This field is deprecated, but it is still used by CBQ 93 * and it will live until better solution will be invented. 94 */ 95 struct Qdisc *__parent; 96 97 char data[0]; 98}; 99 100struct qdisc_rate_table 101{ 102 struct tc_ratespec rate; 103 u32 data[256]; 104 struct qdisc_rate_table *next; 105 int refcnt; 106}; 107 108static inline void sch_tree_lock(struct Qdisc *q) 109{ 110 write_lock(&qdisc_tree_lock); 111 spin_lock_bh(&q->dev->queue_lock); 112} 113 114static inline void sch_tree_unlock(struct Qdisc *q) 115{ 116 spin_unlock_bh(&q->dev->queue_lock); 117 write_unlock(&qdisc_tree_lock); 118} 119 120static inline void tcf_tree_lock(struct tcf_proto *tp) 121{ 122 write_lock(&qdisc_tree_lock); 123 spin_lock_bh(&tp->q->dev->queue_lock); 124} 125 126static inline void tcf_tree_unlock(struct tcf_proto *tp) 127{ 128 spin_unlock_bh(&tp->q->dev->queue_lock); 129 write_unlock(&qdisc_tree_lock); 130} 131 132 133static inline unsigned long 134cls_set_class(struct tcf_proto *tp, unsigned long *clp, unsigned long cl) 135{ 136 unsigned long old_cl; 137 138 tcf_tree_lock(tp); 139 old_cl = *clp; 140 *clp = cl; 141 tcf_tree_unlock(tp); 142 return old_cl; 143} 144 145static inline unsigned long 146__cls_set_class(unsigned long *clp, unsigned long cl) 147{ 148 unsigned long old_cl; 149 150 old_cl = *clp; 151 *clp = cl; 152 return old_cl; 153} 154 155 156/* 157 Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth 158 159 Normal IP packet size ~ 512byte, hence: 160 161 0.5Kbyte/1Mbyte/sec = 0.5msec, so that we need 50usec timer for 162 10Mbit ethernet. 163 164 10msec resolution -> <50Kbit/sec. 165 166 The result: [34]86 is not good choice for QoS router :-( 167 168 The things are not so bad, because we may use artifical 169 clock evaluated by integration of network data flow 170 in the most critical places. 171 172 Note: we do not use fastgettimeofday. 173 The reason is that, when it is not the same thing as 174 gettimeofday, it returns invalid timestamp, which is 175 not updated, when net_bh is active. 176 177 So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums 178 with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86 179 and pentiums without rtdsc. 180 You can use PSCHED_GETTIMEOFDAY on another architectures, 181 which have fast and precise clock source, but it is too expensive. 182 */ 183 184/* General note about internal clock. 185 186 Any clock source returns time intervals, measured in units 187 close to 1usec. With source PSCHED_GETTIMEOFDAY it is precisely 188 microseconds, otherwise something close but different chosen to minimize 189 arithmetic cost. Ratio usec/internal untis in form nominator/denominator 190 may be read from /proc/net/psched. 191 */ 192 193 194#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY 195 196typedef struct timeval psched_time_t; 197typedef long psched_tdiff_t; 198 199#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp)) 200#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ)) 201 202#define PSCHED_EXPORTLIST EXPORT_SYMBOL(psched_tod_diff); 203 204#else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */ 205 206#define PSCHED_EXPORTLIST PSCHED_EXPORTLIST_1 PSCHED_EXPORTLIST_2 207 208typedef u64 psched_time_t; 209typedef long psched_tdiff_t; 210 211extern psched_time_t psched_time_base; 212 213#if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES 214 215#if HZ == 100 216#define PSCHED_JSCALE 13 217#elif HZ == 1024 218#define PSCHED_JSCALE 10 219#else 220#define PSCHED_JSCALE 0 221#endif 222 223#define PSCHED_EXPORTLIST_2 224 225#if BITS_PER_LONG <= 32 226 227#define PSCHED_WATCHER unsigned long 228 229extern PSCHED_WATCHER psched_time_mark; 230 231#define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<<PSCHED_JSCALE)) 232 233#define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \ 234 EXPORT_SYMBOL(psched_time_mark); 235 236#else 237 238#define PSCHED_GET_TIME(stamp) ((stamp) = (jiffies<<PSCHED_JSCALE)) 239 240#define PSCHED_EXPORTLIST_1 241 242#endif 243 244#define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE) 245 246#elif PSCHED_CLOCK_SOURCE == PSCHED_CPU 247 248extern psched_tdiff_t psched_clock_per_hz; 249extern int psched_clock_scale; 250 251#define PSCHED_EXPORTLIST_2 EXPORT_SYMBOL(psched_clock_per_hz); \ 252 EXPORT_SYMBOL(psched_clock_scale); 253 254#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz) 255 256#ifdef CONFIG_X86_TSC 257 258#define PSCHED_GET_TIME(stamp) \ 259({ u64 __cur; \ 260 rdtscll(__cur); \ 261 (stamp) = __cur>>psched_clock_scale; \ 262}) 263 264#define PSCHED_EXPORTLIST_1 265 266#elif defined(__alpha__) 267 268#define PSCHED_WATCHER u32 269 270extern PSCHED_WATCHER psched_time_mark; 271 272#define PSCHED_GET_TIME(stamp) \ 273({ u32 __res; \ 274 __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \ 275 if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \ 276 psched_time_mark = __res; \ 277 (stamp) = (psched_time_base + __res)>>psched_clock_scale; \ 278}) 279 280#define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \ 281 EXPORT_SYMBOL(psched_time_mark); 282 283#else 284 285#error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch. 286 287#endif /* ARCH */ 288 289#endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */ 290 291#endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */ 292 293#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY 294#define PSCHED_TDIFF(tv1, tv2) \ 295({ \ 296 int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ 297 int __delta = (tv1).tv_usec - (tv2).tv_usec; \ 298 if (__delta_sec) { \ 299 switch (__delta_sec) { \ 300 default: \ 301 __delta = 0; \ 302 case 2: \ 303 __delta += 1000000; \ 304 case 1: \ 305 __delta += 1000000; \ 306 } \ 307 } \ 308 __delta; \ 309}) 310 311extern int psched_tod_diff(int delta_sec, int bound); 312 313#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \ 314({ \ 315 int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ 316 int __delta = (tv1).tv_usec - (tv2).tv_usec; \ 317 switch (__delta_sec) { \ 318 default: \ 319 __delta = psched_tod_diff(__delta_sec, bound); guard; break; \ 320 case 2: \ 321 __delta += 1000000; \ 322 case 1: \ 323 __delta += 1000000; \ 324 case 0: ; \ 325 } \ 326 __delta; \ 327}) 328 329#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \ 330 (tv1).tv_sec <= (tv2).tv_sec) || \ 331 (tv1).tv_sec < (tv2).tv_sec) 332 333#define PSCHED_TADD2(tv, delta, tv_res) \ 334({ \ 335 int __delta = (tv).tv_usec + (delta); \ 336 (tv_res).tv_sec = (tv).tv_sec; \ 337 if (__delta > 1000000) { (tv_res).tv_sec++; __delta -= 1000000; } \ 338 (tv_res).tv_usec = __delta; \ 339}) 340 341#define PSCHED_TADD(tv, delta) \ 342({ \ 343 (tv).tv_usec += (delta); \ 344 if ((tv).tv_usec > 1000000) { (tv).tv_sec++; \ 345 (tv).tv_usec -= 1000000; } \ 346}) 347 348/* Set/check that time is in the "past perfect"; 349 it depends on concrete representation of system time 350 */ 351 352#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0) 353#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0) 354 355#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; }) 356 357#else 358 359#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) 360#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \ 361({ \ 362 long long __delta = (tv1) - (tv2); \ 363 if ( __delta > (long long)(bound)) { __delta = (bound); guard; } \ 364 __delta; \ 365}) 366 367 368#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) 369#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta)) 370#define PSCHED_TADD(tv, delta) ((tv) += (delta)) 371#define PSCHED_SET_PASTPERFECT(t) ((t) = 0) 372#define PSCHED_IS_PASTPERFECT(t) ((t) == 0) 373#define PSCHED_AUDIT_TDIFF(t) 374 375#endif 376 377struct tcf_police 378{ 379 struct tcf_police *next; 380 int refcnt; 381 u32 index; 382 383 int action; 384 int result; 385 u32 ewma_rate; 386 u32 burst; 387 u32 mtu; 388 389 u32 toks; 390 u32 ptoks; 391 psched_time_t t_c; 392 spinlock_t lock; 393 struct qdisc_rate_table *R_tab; 394 struct qdisc_rate_table *P_tab; 395 396 struct tc_stats stats; 397}; 398 399extern int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st); 400extern void tcf_police_destroy(struct tcf_police *p); 401extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est); 402extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p); 403extern int tcf_police(struct sk_buff *skb, struct tcf_police *p); 404 405static inline void tcf_police_release(struct tcf_police *p) 406{ 407 if (p && --p->refcnt == 0) 408 tcf_police_destroy(p); 409} 410 411extern struct Qdisc noop_qdisc; 412extern struct Qdisc_ops noop_qdisc_ops; 413extern struct Qdisc_ops pfifo_qdisc_ops; 414extern struct Qdisc_ops bfifo_qdisc_ops; 415 416int register_qdisc(struct Qdisc_ops *qops); 417int unregister_qdisc(struct Qdisc_ops *qops); 418struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); 419struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); 420void dev_init_scheduler(struct net_device *dev); 421void dev_shutdown(struct net_device *dev); 422void dev_activate(struct net_device *dev); 423void dev_deactivate(struct net_device *dev); 424void qdisc_reset(struct Qdisc *qdisc); 425void qdisc_destroy(struct Qdisc *qdisc); 426struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops); 427int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt); 428void qdisc_kill_estimator(struct tc_stats *stats); 429struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab); 430void qdisc_put_rtab(struct qdisc_rate_table *tab); 431int teql_init(void); 432int tc_filter_init(void); 433int pktsched_init(void); 434 435extern int qdisc_restart(struct net_device *dev); 436 437static inline void qdisc_run(struct net_device *dev) 438{ 439 while (!netif_queue_stopped(dev) && 440 qdisc_restart(dev)<0) 441 /* NOTHING */; 442} 443 444/* Calculate maximal size of packet seen by hard_start_xmit 445 routine of this device. 446 */ 447static inline unsigned psched_mtu(struct net_device *dev) 448{ 449 unsigned mtu = dev->mtu; 450 return dev->hard_header ? mtu + dev->hard_header_len : mtu; 451} 452 453#endif 454