ip_dummynet.h revision 239124
1259698Sdim/*-
2259698Sdim * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
3259698Sdim * Portions Copyright (c) 2000 Akamba Corp.
4259698Sdim * All rights reserved
5259698Sdim *
6259698Sdim * Redistribution and use in source and binary forms, with or without
7259698Sdim * modification, are permitted provided that the following conditions
8259698Sdim * are met:
9259698Sdim * 1. Redistributions of source code must retain the above copyright
10259698Sdim *    notice, this list of conditions and the following disclaimer.
11259698Sdim * 2. Redistributions in binary form must reproduce the above copyright
12259698Sdim *    notice, this list of conditions and the following disclaimer in the
13259698Sdim *    documentation and/or other materials provided with the distribution.
14259698Sdim *
15259698Sdim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16259698Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17259698Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18259698Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19259698Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20259698Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21259698Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22259698Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23259698Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24259698Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25259698Sdim * SUCH DAMAGE.
26259698Sdim *
27259698Sdim * $FreeBSD: head/sys/netinet/ip_dummynet.h 239124 2012-08-07 07:52:25Z luigi $
28259698Sdim */
29259698Sdim
30259698Sdim#ifndef _IP_DUMMYNET_H
31259698Sdim#define _IP_DUMMYNET_H
32259698Sdim
33259698Sdim/*
34259698Sdim * Definition of the kernel-userland API for dummynet.
35259698Sdim *
36259698Sdim * Setsockopt() and getsockopt() pass a batch of objects, each
37259698Sdim * of them starting with a "struct dn_id" which should fully identify
38259698Sdim * the object and its relation with others in the sequence.
39259698Sdim * The first object in each request should have
40259698Sdim *	 type= DN_CMD_*, id = DN_API_VERSION.
41259698Sdim * For other objects, type and subtype specify the object, len indicates
42259698Sdim * the total length including the header, and 'id' identifies the specific
43259698Sdim * object.
44259698Sdim *
45259698Sdim * Most objects are numbered with an identifier in the range 1..65535.
46259698Sdim * DN_MAX_ID indicates the first value outside the range.
47259698Sdim */
48259698Sdim
49259698Sdim#define	DN_API_VERSION	12500000
50259698Sdim#define	DN_MAX_ID	0x10000
51259698Sdim
52259698Sdimstruct dn_id {
53259698Sdim	uint16_t	len;	/* total obj len including this header */
54259698Sdim	uint8_t		type;
55259698Sdim	uint8_t		subtype;
56259698Sdim	uint32_t	id;	/* generic id */
57259698Sdim};
58259698Sdim
59259698Sdim/*
60259698Sdim * These values are in the type field of struct dn_id.
61259698Sdim * To preserve the ABI, never rearrange the list or delete
62259698Sdim * entries with the exception of DN_LAST
63259698Sdim */
64259698Sdimenum {
65259698Sdim	DN_NONE = 0,
66259698Sdim	DN_LINK = 1,
67259698Sdim	DN_FS,
68259698Sdim	DN_SCH,
69259698Sdim	DN_SCH_I,
70259698Sdim	DN_QUEUE,
71259698Sdim	DN_DELAY_LINE,
72259698Sdim	DN_PROFILE,
73259698Sdim	DN_FLOW,		/* struct dn_flow */
74259698Sdim	DN_TEXT,		/* opaque text is the object */
75259698Sdim
76259698Sdim	DN_CMD_CONFIG = 0x80,	/* objects follow */
77259698Sdim	DN_CMD_DELETE,		/* subtype + list of entries */
78259698Sdim	DN_CMD_GET,		/* subtype + list of entries */
79259698Sdim	DN_CMD_FLUSH,
80259698Sdim	/* for compatibility with FreeBSD 7.2/8 */
81259698Sdim	DN_COMPAT_PIPE,
82259698Sdim	DN_COMPAT_QUEUE,
83259698Sdim	DN_GET_COMPAT,
84259698Sdim
85259698Sdim	/* special commands for emulation of sysctl variables */
86259698Sdim	DN_SYSCTL_GET,
87259698Sdim	DN_SYSCTL_SET,
88259698Sdim
89259698Sdim	DN_LAST,
90259698Sdim};
91259698Sdim
92259698Sdimenum { /* subtype for schedulers, flowset and the like */
93259698Sdim	DN_SCHED_UNKNOWN = 0,
94259698Sdim	DN_SCHED_FIFO = 1,
95259698Sdim	DN_SCHED_WF2QP = 2,
96259698Sdim	/* others are in individual modules */
97259698Sdim};
98259698Sdim
99259698Sdimenum {	/* user flags */
100259698Sdim	DN_HAVE_MASK	= 0x0001,	/* fs or sched has a mask */
101259698Sdim	DN_NOERROR	= 0x0002,	/* do not report errors */
102259698Sdim	DN_QHT_HASH	= 0x0004,	/* qht is a hash table */
103259698Sdim	DN_QSIZE_BYTES	= 0x0008,	/* queue size is in bytes */
104259698Sdim	DN_HAS_PROFILE	= 0x0010,	/* a link has a profile */
105259698Sdim	DN_IS_RED	= 0x0020,
106259698Sdim	DN_IS_GENTLE_RED= 0x0040,
107259698Sdim	DN_PIPE_CMD	= 0x1000,	/* pipe config... */
108259698Sdim};
109259698Sdim
110259698Sdim/*
111259698Sdim * link template.
112259698Sdim */
113259698Sdimstruct dn_link {
114259698Sdim	struct dn_id oid;
115259698Sdim
116259698Sdim	/*
117259698Sdim	 * Userland sets bw and delay in bits/s and milliseconds.
118259698Sdim	 * The kernel converts this back and forth to bits/tick and ticks.
119259698Sdim	 * XXX what about burst ?
120259698Sdim	 */
121259698Sdim	int32_t		link_nr;
122259698Sdim	int		bandwidth;	/* bit/s or bits/tick.   */
123259698Sdim	int		delay;		/* ms and ticks */
124259698Sdim	uint64_t	burst;		/* scaled. bits*Hz  XXX */
125259698Sdim};
126259698Sdim
127259698Sdim/*
128259698Sdim * A flowset, which is a template for flows. Contains parameters
129259698Sdim * from the command line: id, target scheduler, queue sizes, plr,
130259698Sdim * flow masks, buckets for the flow hash, and possibly scheduler-
131259698Sdim * specific parameters (weight, quantum and so on).
132259698Sdim */
133259698Sdimstruct dn_fs {
134259698Sdim	struct dn_id oid;
135259698Sdim	uint32_t fs_nr;		/* the flowset number */
136259698Sdim	uint32_t flags;		/* userland flags */
137259698Sdim	int qsize;		/* queue size in slots or bytes */
138259698Sdim	int32_t plr;		/* PLR, pkt loss rate (2^31-1 means 100%) */
139259698Sdim	uint32_t buckets;	/* buckets used for the queue hash table */
140259698Sdim
141259698Sdim	struct ipfw_flow_id flow_mask;
142259698Sdim	uint32_t sched_nr;	/* the scheduler we attach to */
143259698Sdim	/* generic scheduler parameters. Leave them at -1 if unset.
144259698Sdim	 * Now we use 0: weight, 1: lmax, 2: priority
145259698Sdim	 */
146259698Sdim	int par[4];
147259698Sdim
148259698Sdim	/* RED/GRED parameters.
149259698Sdim	 * weight and probabilities are in the range 0..1 represented
150259698Sdim	 * in fixed point arithmetic with SCALE_RED decimal bits.
151259698Sdim	 */
152259698Sdim#define SCALE_RED	16
153259698Sdim#define SCALE(x)	( (x) << SCALE_RED )
154259698Sdim#define SCALE_VAL(x)	( (x) >> SCALE_RED )
155259698Sdim#define SCALE_MUL(x,y)	( ( (x) * (y) ) >> SCALE_RED )
156259698Sdim	int w_q ;		/* queue weight (scaled) */
157259698Sdim	int max_th ;		/* maximum threshold for queue (scaled) */
158259698Sdim	int min_th ;		/* minimum threshold for queue (scaled) */
159259698Sdim	int max_p ;		/* maximum value for p_b (scaled) */
160259698Sdim
161259698Sdim};
162259698Sdim
163259698Sdim/*
164259698Sdim * dn_flow collects flow_id and stats for queues and scheduler
165259698Sdim * instances, and is used to pass these info to userland.
166259698Sdim * oid.type/oid.subtype describe the object, oid.id is number
167259698Sdim * of the parent object.
168259698Sdim */
169259698Sdimstruct dn_flow {
170259698Sdim	struct dn_id	oid;
171259698Sdim	struct ipfw_flow_id fid;
172259698Sdim	uint64_t	tot_pkts; /* statistics counters  */
173259698Sdim	uint64_t	tot_bytes;
174259698Sdim	uint32_t	length; /* Queue length, in packets */
175	uint32_t	len_bytes; /* Queue length, in bytes */
176	uint32_t	drops;
177};
178
179
180/*
181 * Scheduler template, mostly indicating the name, number,
182 * sched_mask and buckets.
183 */
184struct dn_sch {
185	struct dn_id	oid;
186	uint32_t	sched_nr; /* N, scheduler number */
187	uint32_t	buckets; /* number of buckets for the instances */
188	uint32_t	flags;	/* have_mask, ... */
189
190	char name[16];	/* null terminated */
191	/* mask to select the appropriate scheduler instance */
192	struct ipfw_flow_id sched_mask; /* M */
193};
194
195
196/* A delay profile is attached to a link.
197 * Note that a profile, as any other object, cannot be longer than 2^16
198 */
199#define	ED_MAX_SAMPLES_NO	1024
200struct dn_profile {
201	struct dn_id	oid;
202	/* fields to simulate a delay profile */
203#define ED_MAX_NAME_LEN		32
204	char	name[ED_MAX_NAME_LEN];
205	int	link_nr;
206	int	loss_level;
207	int	bandwidth;			// XXX use link bandwidth?
208	int	samples_no;			/* actual len of samples[] */
209	int	samples[ED_MAX_SAMPLES_NO];	/* may be shorter */
210};
211
212
213
214/*
215 * Overall structure of dummynet
216
217In dummynet, packets are selected with the firewall rules, and passed
218to two different objects: PIPE or QUEUE (bad name).
219
220A QUEUE defines a classifier, which groups packets into flows
221according to a 'mask', puts them into independent queues (one
222per flow) with configurable size and queue management policy,
223and passes flows to a scheduler:
224
225                 (flow_mask|sched_mask)  sched_mask
226	 +---------+   weight Wx  +-------------+
227         |         |->-[flow]-->--|             |-+
228    -->--| QUEUE x |   ...        |             | |
229         |         |->-[flow]-->--| SCHEDuler N | |
230	 +---------+              |             | |
231	     ...                  |             +--[LINK N]-->--
232	 +---------+   weight Wy  |             | +--[LINK N]-->--
233         |         |->-[flow]-->--|             | |
234    -->--| QUEUE y |   ...        |             | |
235         |         |->-[flow]-->--|             | |
236	 +---------+              +-------------+ |
237	                            +-------------+
238
239Many QUEUE objects can connect to the same scheduler, each
240QUEUE object can have its own set of parameters.
241
242In turn, the SCHEDuler 'forks' multiple instances according
243to a 'sched_mask', each instance manages its own set of queues
244and transmits on a private instance of a configurable LINK.
245
246A PIPE is a simplified version of the above, where there
247is no flow_mask, and each scheduler instance handles a single queue.
248
249The following data structures (visible from userland) describe
250the objects used by dummynet:
251
252 + dn_link, contains the main configuration parameters related
253   to delay and bandwidth;
254 + dn_profile describes a delay profile;
255 + dn_flow describes the flow status (flow id, statistics)
256
257 + dn_sch describes a scheduler
258 + dn_fs describes a flowset (msk, weight, queue parameters)
259
260 *
261 */
262
263#endif /* _IP_DUMMYNET_H */
264