ip_dummynet.h revision 71133
1/* 2 * Copyright (c) 1998-2000 Luigi Rizzo, Universita` di Pisa 3 * Portions Copyright (c) 2000 Akamba Corp. 4 * All rights reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: head/sys/netinet/ip_dummynet.h 71133 2001-01-16 23:49:49Z luigi $ 28 */ 29 30#ifndef _IP_DUMMYNET_H 31#define _IP_DUMMYNET_H 32 33/* 34 * Definition of dummynet data structures. 35 * We first start with the heap which is used by the scheduler. 36 * 37 * Each list contains a set of parameters identifying the pipe, and 38 * a set of packets queued on the pipe itself. 39 * 40 * I could have used queue macros, but the management i have 41 * is pretty simple and this makes the code more portable. 42 */ 43 44/* 45 * The key for the heap is used for two different values 46 1. timer ticks- max 10K/second, so 32 bits are enough 47 2. virtual times. These increase in steps of len/x, where len is the 48 packet length, and x is either the weight of the flow, or the 49 sum of all weights. 50 If we limit to max 1000 flows and a max weight of 100, then 51 x needs 17 bits. The packet size is 16 bits, so we can easily 52 overflow if we do not allow errors. 53 54 */ 55typedef u_int64_t dn_key ; /* sorting key */ 56#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) 57#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) 58#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) 59#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) 60/* XXX check names of next two macros */ 61#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) 62#define MY_M 16 /* number of left shift to obtain a larger precision */ 63/* 64 * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the 65 * virtual time wraps every 15 days. 66 */ 67 68#define OFFSET_OF(type, field) ((int)&( ((type *)0)->field) ) 69 70struct dn_heap_entry { 71 dn_key key ; /* sorting key. Topmost element is smallest one */ 72 void *object ; /* object pointer */ 73} ; 74 75struct dn_heap { 76 int size ; 77 int elements ; 78 int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ 79 struct dn_heap_entry *p ; /* really an array of "size" entries */ 80} ; 81 82/* 83 * MT_DUMMYNET is a new (fake) mbuf type that is prepended to the 84 * packet when it comes out of a pipe. The definition 85 * ought to go in /sys/sys/mbuf.h but here it is less intrusive. 86 */ 87 88#define MT_DUMMYNET MT_CONTROL 89 90 91/* 92 * struct dn_pkt identifies a packet in the dummynet queue. The 93 * first part is really an m_hdr for implementation purposes, and some 94 * fields are saved there. When passing the packet back to the ip_input/ 95 * ip_output(), the struct is prepended to the mbuf chain with type 96 * MT_DUMMYNET, and contains the pointer to the matching rule. 97 */ 98struct dn_pkt { 99 struct m_hdr hdr ; 100#define dn_next hdr.mh_nextpkt /* next element in queue */ 101#define DN_NEXT(x) (struct dn_pkt *)(x)->dn_next 102#define dn_m hdr.mh_next /* packet to be forwarded */ 103#define dn_dir hdr.mh_flags /* action when pkt extracted from a queue */ 104#define DN_TO_IP_OUT 1 105#define DN_TO_IP_IN 2 106#define DN_TO_BDG_FWD 3 107 108 dn_key output_time; /* when the pkt is due for delivery */ 109 struct ifnet *ifp; /* interface, for ip_output */ 110 struct sockaddr_in *dn_dst ; 111 struct route ro; /* route, for ip_output. MUST COPY */ 112 int flags ; /* flags, for ip_output (IPv6 ?) */ 113}; 114 115/* 116 * Overall structure (with WFQ): 117 118We have 3 data structures definining a pipe and associated queues: 119 + dn_pipe, which contains the main configuration parameters related 120 to delay and bandwidth 121 + dn_flow_set which contains WFQ configuration, flow 122 masks, plr and RED configuration 123 + dn_flow_queue which is the per-flow queue. 124 Multiple dn_flow_set can be linked to the same pipe, and multiple 125 dn_flow_queue can be linked to the same dn_flow_set. 126 127 During configuration we set the dn_flow_set and dn_pipe parameters. 128 At runtime: packets are sent to the dn_flow_set (either WFQ ones, or 129 the one embedded in the dn_pipe for fixed-rate flows) which in turn 130 dispatches them to the appropriate dn_flow_queue (created dynamically 131 according to the masks). 132 The transmit clock for fixed rate flows (ready_event) selects the 133 dn_flow_queue to be used to transmit the next packet. For WF2Q, 134 wfq_ready_event() extract a pipe which in turn selects the right 135 flow using a number of heaps defined into the pipe. 136 137 * 138 */ 139 140/* 141 * We use per flow queues. Hashing is used to select the right slot, 142 * then we scan the list to match the flow-id. 143 */ 144struct dn_flow_queue { 145 struct dn_flow_queue *next ; 146 struct ipfw_flow_id id ; 147 struct dn_pkt *head, *tail ; /* queue of packets */ 148 u_int len ; 149 u_int len_bytes ; 150 long numbytes ; /* credit for transmission (dynamic queues) */ 151 152 u_int64_t tot_pkts ; /* statistics counters */ 153 u_int64_t tot_bytes ; 154 u_int32_t drops ; 155 int hash_slot ; /* debugging/diagnostic */ 156 157 /* RED parameters */ 158 int avg ; /* average queue length est. (scaled) */ 159 int count ; /* arrivals since last RED drop */ 160 int random ; /* random value (scaled) */ 161 u_int32_t q_time ; /* start of queue idle time */ 162 163 /* WF2Q+ support */ 164 struct dn_flow_set *fs ; /* parent flow set */ 165 int blh_pos ; /* position in backlogged_heap */ 166 dn_key sched_time ; /* current time when queue enters ready_heap */ 167 168 dn_key S,F ; /* start-time, finishing time */ 169 /* setting F < S means the timestamp is invalid. We only need 170 * to test this when the queue is empty. 171 */ 172} ; 173 174struct dn_flow_set { 175 struct dn_flow_set *next; /* next flow set in all_flow_sets list */ 176 177 u_short fs_nr ; /* flow_set number */ 178 u_short flags_fs; 179#define DN_HAVE_FLOW_MASK 0x0001 180#define DN_IS_PIPE 0x4000 181#define DN_IS_QUEUE 0x8000 182#define DN_IS_RED 0x0002 183#define DN_IS_GENTLE_RED 0x0004 184#define DN_QSIZE_IS_BYTES 0x0008 /* queue measured in bytes */ 185 186 struct dn_pipe *pipe ; /* pointer to parent pipe */ 187 u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ 188 189 int weight ; /* WFQ queue weight */ 190 int qsize ; /* queue size in slots or bytes */ 191 int plr ; /* pkt loss rate (2^31-1 means 100%) */ 192 193 struct ipfw_flow_id flow_mask ; 194 /* hash table of queues onto this flow_set */ 195 int rq_size ; /* number of slots */ 196 int rq_elements ; /* active elements */ 197 struct dn_flow_queue **rq; /* array of rq_size entries */ 198 u_int32_t last_expired ; /* do not expire too frequently */ 199 /* XXX some RED parameters as well ? */ 200 int backlogged ; /* #active queues for this flowset */ 201 202 /* RED parameters */ 203#define SCALE_RED 16 204#define SCALE(x) ( (x) << SCALE_RED ) 205#define SCALE_VAL(x) ( (x) >> SCALE_RED ) 206#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) 207 int w_q ; /* queue weight (scaled) */ 208 int max_th ; /* maximum threshold for queue (scaled) */ 209 int min_th ; /* minimum threshold for queue (scaled) */ 210 int max_p ; /* maximum value for p_b (scaled) */ 211 u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ 212 u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ 213 u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ 214 u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ 215 u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ 216 u_int lookup_depth ; /* depth of lookup table */ 217 int lookup_step ; /* granularity inside the lookup table */ 218 int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ 219 int avg_pkt_size ; /* medium packet size */ 220 int max_pkt_size ; /* max packet size */ 221} ; 222 223/* 224 * Pipe descriptor. Contains global parameters, delay-line queue. 225 * 226 * For WF2Q support it also has 3 heaps holding dn_flow_queue: 227 * not_eligible_heap, for queues whose start time is higher 228 * than the virtual time. Sorted by start time. 229 * scheduler_heap, for queues eligible for scheduling. Sorted by 230 * finish time. 231 * backlogged_heap, all flows in the two heaps above, sorted by 232 * start time. This is used to compute the virtual time. 233 * 234 */ 235struct dn_pipe { /* a pipe */ 236 struct dn_pipe *next ; 237 238 int pipe_nr ; /* number */ 239 int bandwidth; /* really, bytes/tick. */ 240 int delay ; /* really, ticks */ 241 242 struct dn_pkt *head, *tail ; /* packets in delay line */ 243 244 /* WF2Q+ */ 245 struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ 246 struct dn_heap not_eligible_heap; /* top extract- key Start time */ 247 struct dn_heap backlogged_heap ; /* random extract - key Start time */ 248 struct dn_heap idle_heap ; /* random extract - key Start=Finish time */ 249 250 dn_key V ; /* virtual time */ 251 int sum; /* sum of weights of all active sessions */ 252 int numbytes; /* bit i can transmit (more or less). */ 253 254 dn_key sched_time ; /* first time pipe is scheduled in ready_heap */ 255 256 /* the tx clock can come from an interface. In this case, the 257 * name is below, and the pointer is filled when the rule is 258 * configured. We identify this by setting the if_name to a 259 * non-empty string. 260 */ 261 char if_name[16]; 262 struct ifnet *ifp ; 263 int ready ; /* set if ifp != NULL and we got a signal from it */ 264 265 struct dn_flow_set fs ; /* used with fixed-rate flows */ 266}; 267 268#ifdef _KERNEL 269 270MALLOC_DECLARE(M_IPFW); 271 272typedef int ip_dn_ctl_t __P((struct sockopt *)) ; 273extern ip_dn_ctl_t *ip_dn_ctl_ptr; 274 275void dn_rule_delete(void *r); /* used in ip_fw.c */ 276int dummynet_io(int pipe, int dir, 277 struct mbuf *m, struct ifnet *ifp, struct route *ro, 278 struct sockaddr_in * dst, 279 struct ip_fw_chain *rule, int flags); 280#endif 281 282#endif /* _IP_DUMMYNET_H */ 283