ip_dummynet.h revision 239124
1259698Sdim/*- 2259698Sdim * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa 3259698Sdim * Portions Copyright (c) 2000 Akamba Corp. 4259698Sdim * All rights reserved 5259698Sdim * 6259698Sdim * Redistribution and use in source and binary forms, with or without 7259698Sdim * modification, are permitted provided that the following conditions 8259698Sdim * are met: 9259698Sdim * 1. Redistributions of source code must retain the above copyright 10259698Sdim * notice, this list of conditions and the following disclaimer. 11259698Sdim * 2. Redistributions in binary form must reproduce the above copyright 12259698Sdim * notice, this list of conditions and the following disclaimer in the 13259698Sdim * documentation and/or other materials provided with the distribution. 14259698Sdim * 15259698Sdim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16259698Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17259698Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18259698Sdim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19259698Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20259698Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21259698Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22259698Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23259698Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24259698Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25259698Sdim * SUCH DAMAGE. 26259698Sdim * 27259698Sdim * $FreeBSD: head/sys/netinet/ip_dummynet.h 239124 2012-08-07 07:52:25Z luigi $ 28259698Sdim */ 29259698Sdim 30259698Sdim#ifndef _IP_DUMMYNET_H 31259698Sdim#define _IP_DUMMYNET_H 32259698Sdim 33259698Sdim/* 34259698Sdim * Definition of the kernel-userland API for dummynet. 35259698Sdim * 36259698Sdim * Setsockopt() and getsockopt() pass a batch of objects, each 37259698Sdim * of them starting with a "struct dn_id" which should fully identify 38259698Sdim * the object and its relation with others in the sequence. 39259698Sdim * The first object in each request should have 40259698Sdim * type= DN_CMD_*, id = DN_API_VERSION. 41259698Sdim * For other objects, type and subtype specify the object, len indicates 42259698Sdim * the total length including the header, and 'id' identifies the specific 43259698Sdim * object. 44259698Sdim * 45259698Sdim * Most objects are numbered with an identifier in the range 1..65535. 46259698Sdim * DN_MAX_ID indicates the first value outside the range. 47259698Sdim */ 48259698Sdim 49259698Sdim#define DN_API_VERSION 12500000 50259698Sdim#define DN_MAX_ID 0x10000 51259698Sdim 52259698Sdimstruct dn_id { 53259698Sdim uint16_t len; /* total obj len including this header */ 54259698Sdim uint8_t type; 55259698Sdim uint8_t subtype; 56259698Sdim uint32_t id; /* generic id */ 57259698Sdim}; 58259698Sdim 59259698Sdim/* 60259698Sdim * These values are in the type field of struct dn_id. 61259698Sdim * To preserve the ABI, never rearrange the list or delete 62259698Sdim * entries with the exception of DN_LAST 63259698Sdim */ 64259698Sdimenum { 65259698Sdim DN_NONE = 0, 66259698Sdim DN_LINK = 1, 67259698Sdim DN_FS, 68259698Sdim DN_SCH, 69259698Sdim DN_SCH_I, 70259698Sdim DN_QUEUE, 71259698Sdim DN_DELAY_LINE, 72259698Sdim DN_PROFILE, 73259698Sdim DN_FLOW, /* struct dn_flow */ 74259698Sdim DN_TEXT, /* opaque text is the object */ 75259698Sdim 76259698Sdim DN_CMD_CONFIG = 0x80, /* objects follow */ 77259698Sdim DN_CMD_DELETE, /* subtype + list of entries */ 78259698Sdim DN_CMD_GET, /* subtype + list of entries */ 79259698Sdim DN_CMD_FLUSH, 80259698Sdim /* for compatibility with FreeBSD 7.2/8 */ 81259698Sdim DN_COMPAT_PIPE, 82259698Sdim DN_COMPAT_QUEUE, 83259698Sdim DN_GET_COMPAT, 84259698Sdim 85259698Sdim /* special commands for emulation of sysctl variables */ 86259698Sdim DN_SYSCTL_GET, 87259698Sdim DN_SYSCTL_SET, 88259698Sdim 89259698Sdim DN_LAST, 90259698Sdim}; 91259698Sdim 92259698Sdimenum { /* subtype for schedulers, flowset and the like */ 93259698Sdim DN_SCHED_UNKNOWN = 0, 94259698Sdim DN_SCHED_FIFO = 1, 95259698Sdim DN_SCHED_WF2QP = 2, 96259698Sdim /* others are in individual modules */ 97259698Sdim}; 98259698Sdim 99259698Sdimenum { /* user flags */ 100259698Sdim DN_HAVE_MASK = 0x0001, /* fs or sched has a mask */ 101259698Sdim DN_NOERROR = 0x0002, /* do not report errors */ 102259698Sdim DN_QHT_HASH = 0x0004, /* qht is a hash table */ 103259698Sdim DN_QSIZE_BYTES = 0x0008, /* queue size is in bytes */ 104259698Sdim DN_HAS_PROFILE = 0x0010, /* a link has a profile */ 105259698Sdim DN_IS_RED = 0x0020, 106259698Sdim DN_IS_GENTLE_RED= 0x0040, 107259698Sdim DN_PIPE_CMD = 0x1000, /* pipe config... */ 108259698Sdim}; 109259698Sdim 110259698Sdim/* 111259698Sdim * link template. 112259698Sdim */ 113259698Sdimstruct dn_link { 114259698Sdim struct dn_id oid; 115259698Sdim 116259698Sdim /* 117259698Sdim * Userland sets bw and delay in bits/s and milliseconds. 118259698Sdim * The kernel converts this back and forth to bits/tick and ticks. 119259698Sdim * XXX what about burst ? 120259698Sdim */ 121259698Sdim int32_t link_nr; 122259698Sdim int bandwidth; /* bit/s or bits/tick. */ 123259698Sdim int delay; /* ms and ticks */ 124259698Sdim uint64_t burst; /* scaled. bits*Hz XXX */ 125259698Sdim}; 126259698Sdim 127259698Sdim/* 128259698Sdim * A flowset, which is a template for flows. Contains parameters 129259698Sdim * from the command line: id, target scheduler, queue sizes, plr, 130259698Sdim * flow masks, buckets for the flow hash, and possibly scheduler- 131259698Sdim * specific parameters (weight, quantum and so on). 132259698Sdim */ 133259698Sdimstruct dn_fs { 134259698Sdim struct dn_id oid; 135259698Sdim uint32_t fs_nr; /* the flowset number */ 136259698Sdim uint32_t flags; /* userland flags */ 137259698Sdim int qsize; /* queue size in slots or bytes */ 138259698Sdim int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */ 139259698Sdim uint32_t buckets; /* buckets used for the queue hash table */ 140259698Sdim 141259698Sdim struct ipfw_flow_id flow_mask; 142259698Sdim uint32_t sched_nr; /* the scheduler we attach to */ 143259698Sdim /* generic scheduler parameters. Leave them at -1 if unset. 144259698Sdim * Now we use 0: weight, 1: lmax, 2: priority 145259698Sdim */ 146259698Sdim int par[4]; 147259698Sdim 148259698Sdim /* RED/GRED parameters. 149259698Sdim * weight and probabilities are in the range 0..1 represented 150259698Sdim * in fixed point arithmetic with SCALE_RED decimal bits. 151259698Sdim */ 152259698Sdim#define SCALE_RED 16 153259698Sdim#define SCALE(x) ( (x) << SCALE_RED ) 154259698Sdim#define SCALE_VAL(x) ( (x) >> SCALE_RED ) 155259698Sdim#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) 156259698Sdim int w_q ; /* queue weight (scaled) */ 157259698Sdim int max_th ; /* maximum threshold for queue (scaled) */ 158259698Sdim int min_th ; /* minimum threshold for queue (scaled) */ 159259698Sdim int max_p ; /* maximum value for p_b (scaled) */ 160259698Sdim 161259698Sdim}; 162259698Sdim 163259698Sdim/* 164259698Sdim * dn_flow collects flow_id and stats for queues and scheduler 165259698Sdim * instances, and is used to pass these info to userland. 166259698Sdim * oid.type/oid.subtype describe the object, oid.id is number 167259698Sdim * of the parent object. 168259698Sdim */ 169259698Sdimstruct dn_flow { 170259698Sdim struct dn_id oid; 171259698Sdim struct ipfw_flow_id fid; 172259698Sdim uint64_t tot_pkts; /* statistics counters */ 173259698Sdim uint64_t tot_bytes; 174259698Sdim uint32_t length; /* Queue length, in packets */ 175 uint32_t len_bytes; /* Queue length, in bytes */ 176 uint32_t drops; 177}; 178 179 180/* 181 * Scheduler template, mostly indicating the name, number, 182 * sched_mask and buckets. 183 */ 184struct dn_sch { 185 struct dn_id oid; 186 uint32_t sched_nr; /* N, scheduler number */ 187 uint32_t buckets; /* number of buckets for the instances */ 188 uint32_t flags; /* have_mask, ... */ 189 190 char name[16]; /* null terminated */ 191 /* mask to select the appropriate scheduler instance */ 192 struct ipfw_flow_id sched_mask; /* M */ 193}; 194 195 196/* A delay profile is attached to a link. 197 * Note that a profile, as any other object, cannot be longer than 2^16 198 */ 199#define ED_MAX_SAMPLES_NO 1024 200struct dn_profile { 201 struct dn_id oid; 202 /* fields to simulate a delay profile */ 203#define ED_MAX_NAME_LEN 32 204 char name[ED_MAX_NAME_LEN]; 205 int link_nr; 206 int loss_level; 207 int bandwidth; // XXX use link bandwidth? 208 int samples_no; /* actual len of samples[] */ 209 int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */ 210}; 211 212 213 214/* 215 * Overall structure of dummynet 216 217In dummynet, packets are selected with the firewall rules, and passed 218to two different objects: PIPE or QUEUE (bad name). 219 220A QUEUE defines a classifier, which groups packets into flows 221according to a 'mask', puts them into independent queues (one 222per flow) with configurable size and queue management policy, 223and passes flows to a scheduler: 224 225 (flow_mask|sched_mask) sched_mask 226 +---------+ weight Wx +-------------+ 227 | |->-[flow]-->--| |-+ 228 -->--| QUEUE x | ... | | | 229 | |->-[flow]-->--| SCHEDuler N | | 230 +---------+ | | | 231 ... | +--[LINK N]-->-- 232 +---------+ weight Wy | | +--[LINK N]-->-- 233 | |->-[flow]-->--| | | 234 -->--| QUEUE y | ... | | | 235 | |->-[flow]-->--| | | 236 +---------+ +-------------+ | 237 +-------------+ 238 239Many QUEUE objects can connect to the same scheduler, each 240QUEUE object can have its own set of parameters. 241 242In turn, the SCHEDuler 'forks' multiple instances according 243to a 'sched_mask', each instance manages its own set of queues 244and transmits on a private instance of a configurable LINK. 245 246A PIPE is a simplified version of the above, where there 247is no flow_mask, and each scheduler instance handles a single queue. 248 249The following data structures (visible from userland) describe 250the objects used by dummynet: 251 252 + dn_link, contains the main configuration parameters related 253 to delay and bandwidth; 254 + dn_profile describes a delay profile; 255 + dn_flow describes the flow status (flow id, statistics) 256 257 + dn_sch describes a scheduler 258 + dn_fs describes a flowset (msk, weight, queue parameters) 259 260 * 261 */ 262 263#endif /* _IP_DUMMYNET_H */ 264