1#ifndef __NET_PKT_SCHED_H
2#define __NET_PKT_SCHED_H
3
4#define PSCHED_GETTIMEOFDAY	1
5#define PSCHED_JIFFIES 		2
6#define PSCHED_CPU 		3
7
8#define PSCHED_CLOCK_SOURCE	PSCHED_JIFFIES
9
10#include <linux/config.h>
11#include <linux/types.h>
12#include <linux/pkt_sched.h>
13#include <net/pkt_cls.h>
14
15#ifdef CONFIG_X86_TSC
16#include <asm/msr.h>
17#endif
18
19struct rtattr;
20struct Qdisc;
21
22struct qdisc_walker
23{
24	int	stop;
25	int	skip;
26	int	count;
27	int	(*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
28};
29
30struct Qdisc_class_ops
31{
32	/* Child qdisc manipulation */
33	int			(*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **);
34	struct Qdisc *		(*leaf)(struct Qdisc *, unsigned long cl);
35
36	/* Class manipulation routines */
37	unsigned long		(*get)(struct Qdisc *, u32 classid);
38	void			(*put)(struct Qdisc *, unsigned long);
39	int			(*change)(struct Qdisc *, u32, u32, struct rtattr **, unsigned long *);
40	int			(*delete)(struct Qdisc *, unsigned long);
41	void			(*walk)(struct Qdisc *, struct qdisc_walker * arg);
42
43	/* Filter manipulation */
44	struct tcf_proto **	(*tcf_chain)(struct Qdisc *, unsigned long);
45	unsigned long		(*bind_tcf)(struct Qdisc *, unsigned long, u32 classid);
46	void			(*unbind_tcf)(struct Qdisc *, unsigned long);
47
48	/* rtnetlink specific */
49	int			(*dump)(struct Qdisc *, unsigned long, struct sk_buff *skb, struct tcmsg*);
50};
51
52struct Qdisc_ops
53{
54	struct Qdisc_ops	*next;
55	struct Qdisc_class_ops	*cl_ops;
56	char			id[IFNAMSIZ];
57	int			priv_size;
58
59	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
60	struct sk_buff *	(*dequeue)(struct Qdisc *);
61	int 			(*requeue)(struct sk_buff *, struct Qdisc *);
62	int			(*drop)(struct Qdisc *);
63
64	int			(*init)(struct Qdisc *, struct rtattr *arg);
65	void			(*reset)(struct Qdisc *);
66	void			(*destroy)(struct Qdisc *);
67	int			(*change)(struct Qdisc *, struct rtattr *arg);
68
69	int			(*dump)(struct Qdisc *, struct sk_buff *);
70};
71
72extern rwlock_t qdisc_tree_lock;
73
74struct Qdisc
75{
76	int 			(*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
77	struct sk_buff *	(*dequeue)(struct Qdisc *dev);
78	unsigned		flags;
79#define TCQ_F_BUILTIN	1
80#define TCQ_F_THROTTLED	2
81#define TCQ_F_INGRES	4
82	struct Qdisc_ops	*ops;
83	struct Qdisc		*next;
84	u32			handle;
85	atomic_t		refcnt;
86	struct sk_buff_head	q;
87	struct net_device	*dev;
88
89	struct tc_stats		stats;
90	int			(*reshape_fail)(struct sk_buff *skb, struct Qdisc *q);
91
92	/* This field is deprecated, but it is still used by CBQ
93	 * and it will live until better solution will be invented.
94	 */
95	struct Qdisc		*__parent;
96
97	char			data[0];
98};
99
100struct qdisc_rate_table
101{
102	struct tc_ratespec rate;
103	u32		data[256];
104	struct qdisc_rate_table *next;
105	int		refcnt;
106};
107
108static inline void sch_tree_lock(struct Qdisc *q)
109{
110	write_lock(&qdisc_tree_lock);
111	spin_lock_bh(&q->dev->queue_lock);
112}
113
114static inline void sch_tree_unlock(struct Qdisc *q)
115{
116	spin_unlock_bh(&q->dev->queue_lock);
117	write_unlock(&qdisc_tree_lock);
118}
119
120static inline void tcf_tree_lock(struct tcf_proto *tp)
121{
122	write_lock(&qdisc_tree_lock);
123	spin_lock_bh(&tp->q->dev->queue_lock);
124}
125
126static inline void tcf_tree_unlock(struct tcf_proto *tp)
127{
128	spin_unlock_bh(&tp->q->dev->queue_lock);
129	write_unlock(&qdisc_tree_lock);
130}
131
132
133static inline unsigned long
134cls_set_class(struct tcf_proto *tp, unsigned long *clp, unsigned long cl)
135{
136	unsigned long old_cl;
137
138	tcf_tree_lock(tp);
139	old_cl = *clp;
140	*clp = cl;
141	tcf_tree_unlock(tp);
142	return old_cl;
143}
144
145static inline unsigned long
146__cls_set_class(unsigned long *clp, unsigned long cl)
147{
148	unsigned long old_cl;
149
150	old_cl = *clp;
151	*clp = cl;
152	return old_cl;
153}
154
155
156/*
157   Timer resolution MUST BE < 10% of min_schedulable_packet_size/bandwidth
158
159   Normal IP packet size ~ 512byte, hence:
160
161   0.5Kbyte/1Mbyte/sec = 0.5msec, so that we need 50usec timer for
162   10Mbit ethernet.
163
164   10msec resolution -> <50Kbit/sec.
165
166   The result: [34]86 is not good choice for QoS router :-(
167
168   The things are not so bad, because we may use artifical
169   clock evaluated by integration of network data flow
170   in the most critical places.
171
172   Note: we do not use fastgettimeofday.
173   The reason is that, when it is not the same thing as
174   gettimeofday, it returns invalid timestamp, which is
175   not updated, when net_bh is active.
176
177   So, use PSCHED_CLOCK_SOURCE = PSCHED_CPU on alpha and pentiums
178   with rtdsc. And PSCHED_JIFFIES on all other architectures, including [34]86
179   and pentiums without rtdsc.
180   You can use PSCHED_GETTIMEOFDAY on another architectures,
181   which have fast and precise clock source, but it is too expensive.
182 */
183
184/* General note about internal clock.
185
186   Any clock source returns time intervals, measured in units
187   close to 1usec. With source PSCHED_GETTIMEOFDAY it is precisely
188   microseconds, otherwise something close but different chosen to minimize
189   arithmetic cost. Ratio usec/internal untis in form nominator/denominator
190   may be read from /proc/net/psched.
191 */
192
193
194#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
195
196typedef struct timeval	psched_time_t;
197typedef long		psched_tdiff_t;
198
199#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
200#define PSCHED_US2JIFFIE(usecs) (((usecs)+(1000000/HZ-1))/(1000000/HZ))
201
202#define PSCHED_EXPORTLIST EXPORT_SYMBOL(psched_tod_diff);
203
204#else /* PSCHED_CLOCK_SOURCE != PSCHED_GETTIMEOFDAY */
205
206#define PSCHED_EXPORTLIST PSCHED_EXPORTLIST_1 PSCHED_EXPORTLIST_2
207
208typedef u64	psched_time_t;
209typedef long	psched_tdiff_t;
210
211extern psched_time_t	psched_time_base;
212
213#if PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
214
215#if HZ == 100
216#define PSCHED_JSCALE 13
217#elif HZ == 1024
218#define PSCHED_JSCALE 10
219#else
220#define PSCHED_JSCALE 0
221#endif
222
223#define PSCHED_EXPORTLIST_2
224
225#if BITS_PER_LONG <= 32
226
227#define PSCHED_WATCHER unsigned long
228
229extern PSCHED_WATCHER psched_time_mark;
230
231#define PSCHED_GET_TIME(stamp) ((stamp) = psched_time_base + (((unsigned long)(jiffies-psched_time_mark))<<PSCHED_JSCALE))
232
233#define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \
234                            EXPORT_SYMBOL(psched_time_mark);
235
236#else
237
238#define PSCHED_GET_TIME(stamp) ((stamp) = (jiffies<<PSCHED_JSCALE))
239
240#define PSCHED_EXPORTLIST_1
241
242#endif
243
244#define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE)
245
246#elif PSCHED_CLOCK_SOURCE == PSCHED_CPU
247
248extern psched_tdiff_t psched_clock_per_hz;
249extern int psched_clock_scale;
250
251#define PSCHED_EXPORTLIST_2 EXPORT_SYMBOL(psched_clock_per_hz); \
252                            EXPORT_SYMBOL(psched_clock_scale);
253
254#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
255
256#ifdef CONFIG_X86_TSC
257
258#define PSCHED_GET_TIME(stamp) \
259({ u64 __cur; \
260   rdtscll(__cur); \
261   (stamp) = __cur>>psched_clock_scale; \
262})
263
264#define PSCHED_EXPORTLIST_1
265
266#elif defined(__alpha__)
267
268#define PSCHED_WATCHER u32
269
270extern PSCHED_WATCHER psched_time_mark;
271
272#define PSCHED_GET_TIME(stamp) \
273({ u32 __res; \
274   __asm__ __volatile__ ("rpcc %0" : "r="(__res)); \
275   if (__res <= psched_time_mark) psched_time_base += 0x100000000UL; \
276   psched_time_mark = __res; \
277   (stamp) = (psched_time_base + __res)>>psched_clock_scale; \
278})
279
280#define PSCHED_EXPORTLIST_1 EXPORT_SYMBOL(psched_time_base); \
281                            EXPORT_SYMBOL(psched_time_mark);
282
283#else
284
285#error PSCHED_CLOCK_SOURCE=PSCHED_CPU is not supported on this arch.
286
287#endif /* ARCH */
288
289#endif /* PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES */
290
291#endif /* PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY */
292
293#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
294#define PSCHED_TDIFF(tv1, tv2) \
295({ \
296	   int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
297	   int __delta = (tv1).tv_usec - (tv2).tv_usec; \
298	   if (__delta_sec) { \
299	           switch (__delta_sec) { \
300		   default: \
301			   __delta = 0; \
302		   case 2: \
303			   __delta += 1000000; \
304		   case 1: \
305			   __delta += 1000000; \
306	           } \
307	   } \
308	   __delta; \
309})
310
311extern int psched_tod_diff(int delta_sec, int bound);
312
313#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \
314({ \
315	   int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
316	   int __delta = (tv1).tv_usec - (tv2).tv_usec; \
317	   switch (__delta_sec) { \
318	   default: \
319		   __delta = psched_tod_diff(__delta_sec, bound); guard; break; \
320	   case 2: \
321		   __delta += 1000000; \
322	   case 1: \
323		   __delta += 1000000; \
324	   case 0: ; \
325	   } \
326	   __delta; \
327})
328
329#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
330				(tv1).tv_sec <= (tv2).tv_sec) || \
331				 (tv1).tv_sec < (tv2).tv_sec)
332
333#define PSCHED_TADD2(tv, delta, tv_res) \
334({ \
335	   int __delta = (tv).tv_usec + (delta); \
336	   (tv_res).tv_sec = (tv).tv_sec; \
337	   if (__delta > 1000000) { (tv_res).tv_sec++; __delta -= 1000000; } \
338	   (tv_res).tv_usec = __delta; \
339})
340
341#define PSCHED_TADD(tv, delta) \
342({ \
343	   (tv).tv_usec += (delta); \
344	   if ((tv).tv_usec > 1000000) { (tv).tv_sec++; \
345		 (tv).tv_usec -= 1000000; } \
346})
347
348/* Set/check that time is in the "past perfect";
349   it depends on concrete representation of system time
350 */
351
352#define PSCHED_SET_PASTPERFECT(t)	((t).tv_sec = 0)
353#define PSCHED_IS_PASTPERFECT(t)	((t).tv_sec == 0)
354
355#define	PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
356
357#else
358
359#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
360#define PSCHED_TDIFF_SAFE(tv1, tv2, bound, guard) \
361({ \
362	   long long __delta = (tv1) - (tv2); \
363	   if ( __delta > (long long)(bound)) {  __delta = (bound); guard; } \
364	   __delta; \
365})
366
367
368#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
369#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
370#define PSCHED_TADD(tv, delta) ((tv) += (delta))
371#define PSCHED_SET_PASTPERFECT(t)	((t) = 0)
372#define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
373#define	PSCHED_AUDIT_TDIFF(t)
374
375#endif
376
377struct tcf_police
378{
379	struct tcf_police *next;
380	int		refcnt;
381	u32		index;
382
383	int		action;
384	int		result;
385	u32		ewma_rate;
386	u32		burst;
387	u32		mtu;
388
389	u32		toks;
390	u32		ptoks;
391	psched_time_t	t_c;
392	spinlock_t	lock;
393	struct qdisc_rate_table *R_tab;
394	struct qdisc_rate_table *P_tab;
395
396	struct tc_stats	stats;
397};
398
399extern int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st);
400extern void tcf_police_destroy(struct tcf_police *p);
401extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est);
402extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p);
403extern int tcf_police(struct sk_buff *skb, struct tcf_police *p);
404
405static inline void tcf_police_release(struct tcf_police *p)
406{
407	if (p && --p->refcnt == 0)
408		tcf_police_destroy(p);
409}
410
411extern struct Qdisc noop_qdisc;
412extern struct Qdisc_ops noop_qdisc_ops;
413extern struct Qdisc_ops pfifo_qdisc_ops;
414extern struct Qdisc_ops bfifo_qdisc_ops;
415
416int register_qdisc(struct Qdisc_ops *qops);
417int unregister_qdisc(struct Qdisc_ops *qops);
418struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
419struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
420void dev_init_scheduler(struct net_device *dev);
421void dev_shutdown(struct net_device *dev);
422void dev_activate(struct net_device *dev);
423void dev_deactivate(struct net_device *dev);
424void qdisc_reset(struct Qdisc *qdisc);
425void qdisc_destroy(struct Qdisc *qdisc);
426struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops);
427int qdisc_new_estimator(struct tc_stats *stats, struct rtattr *opt);
428void qdisc_kill_estimator(struct tc_stats *stats);
429struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab);
430void qdisc_put_rtab(struct qdisc_rate_table *tab);
431int teql_init(void);
432int tc_filter_init(void);
433int pktsched_init(void);
434
435extern int qdisc_restart(struct net_device *dev);
436
437static inline void qdisc_run(struct net_device *dev)
438{
439	while (!netif_queue_stopped(dev) &&
440	       qdisc_restart(dev)<0)
441		/* NOTHING */;
442}
443
444/* Calculate maximal size of packet seen by hard_start_xmit
445   routine of this device.
446 */
447static inline unsigned psched_mtu(struct net_device *dev)
448{
449	unsigned mtu = dev->mtu;
450	return dev->hard_header ? mtu + dev->hard_header_len : mtu;
451}
452
453#endif
454