ip_dummynet.h revision 61413
1263851Sjkim/* 2263851Sjkim * Copyright (c) 1998-2000 Luigi Rizzo, Universita` di Pisa 3263851Sjkim * Portions Copyright (c) 2000 Akamba Corp. 4263851Sjkim * All rights reserved 5263851Sjkim * 6263851Sjkim * Redistribution and use in source and binary forms, with or without 7263851Sjkim * modification, are permitted provided that the following conditions 8298714Sjkim * are met: 9263851Sjkim * 1. Redistributions of source code must retain the above copyright 10263851Sjkim * notice, this list of conditions and the following disclaimer. 11263851Sjkim * 2. Redistributions in binary form must reproduce the above copyright 12263851Sjkim * notice, this list of conditions and the following disclaimer in the 13263851Sjkim * documentation and/or other materials provided with the distribution. 14263851Sjkim * 15263851Sjkim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16263851Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17263851Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18263851Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19263851Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20263851Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21263851Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22263851Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23263851Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24263851Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25263851Sjkim * SUCH DAMAGE. 26263851Sjkim * 27263851Sjkim * $FreeBSD: head/sys/netinet/ip_dummynet.h 61413 2000-06-08 09:45:23Z luigi $ 28263851Sjkim */ 29263851Sjkim 30263851Sjkim#ifndef _IP_DUMMYNET_H 31263851Sjkim#define _IP_DUMMYNET_H 32263851Sjkim 33263851Sjkim/* 34263851Sjkim * Definition of dummynet data structures. 35263851Sjkim * We first start with the heap which is used by the scheduler. 36263851Sjkim * 37263851Sjkim * Each list contains a set of parameters identifying the pipe, and 38263851Sjkim * a set of packets queued on the pipe itself. 39263851Sjkim * 40263851Sjkim * I could have used queue macros, but the management i have 41263851Sjkim * is pretty simple and this makes the code more portable. 42263851Sjkim */ 43263851Sjkim 44272444Sjkim/* 45272444Sjkim * The key for the heap is used for two different values 46272444Sjkim 1. timer ticks- max 10K/second, so 32 bits are enough 47272444Sjkim 2. virtual times. These increase in steps of len/x, where len is the 48263851Sjkim packet length, and x is either the weight of the flow, or the 49263851Sjkim sum of all weights. 50263851Sjkim If we limit to max 1000 flows and a max weight of 100, then 51263851Sjkim x needs 17 bits. The packet size is 16 bits, so we can easily 52263851Sjkim overflow if we do not allow errors. 53263851Sjkim 54263851Sjkim */ 55263851Sjkimtypedef u_int64_t dn_key ; /* sorting key */ 56263851Sjkim#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) 57263851Sjkim#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) 58263851Sjkim#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) 59263851Sjkim#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) 60263851Sjkim/* XXX check names of next two macros */ 61263851Sjkim#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) 62263851Sjkim#define MY_M 16 /* number of left shift to obtain a larger precision */ 63263851Sjkim/* 64263851Sjkim * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the 65263851Sjkim * virtual time wraps every 15 days. 66263851Sjkim */ 67263851Sjkim 68263851Sjkim#define OFFSET_OF(type, field) ((int)&( ((type *)0)->field) ) 69263851Sjkim 70263851Sjkimstruct dn_heap_entry { 71263851Sjkim dn_key key ; /* sorting key. Topmost element is smallest one */ 72263851Sjkim void *object ; /* object pointer */ 73263851Sjkim} ; 74263851Sjkim 75263851Sjkimstruct dn_heap { 76263851Sjkim int size ; 77263851Sjkim int elements ; 78263851Sjkim int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ 79263851Sjkim struct dn_heap_entry *p ; /* really an array of "size" entries */ 80284583Sjkim} ; 81263851Sjkim 82263851Sjkim/* 83263851Sjkim * MT_DUMMYNET is a new (fake) mbuf type that is prepended to the 84263851Sjkim * packet when it comes out of a pipe. The definition 85263851Sjkim * ought to go in /sys/sys/mbuf.h but here it is less intrusive. 86263851Sjkim */ 87263851Sjkim 88263851Sjkim#define MT_DUMMYNET MT_CONTROL 89263851Sjkim 90263851Sjkim 91263851Sjkim/* 92263851Sjkim * struct dn_pkt identifies a packet in the dummynet queue. The 93263851Sjkim * first part is really an m_hdr for implementation purposes, and some 94263851Sjkim * fields are saved there. When passing the packet back to the ip_input/ 95263851Sjkim * ip_output(), the struct is prepended to the mbuf chain with type 96263851Sjkim * MT_DUMMYNET, and contains the pointer to the matching rule. 97263851Sjkim */ 98263851Sjkimstruct dn_pkt { 99263851Sjkim struct m_hdr hdr ; 100263851Sjkim#define dn_next hdr.mh_nextpkt /* next element in queue */ 101263851Sjkim#define DN_NEXT(x) (struct dn_pkt *)(x)->dn_next 102263851Sjkim#define dn_m hdr.mh_next /* packet to be forwarded */ 103263851Sjkim#define dn_dir hdr.mh_flags /* action when pkt extracted from a queue */ 104263851Sjkim#define DN_TO_IP_OUT 1 105263851Sjkim#define DN_TO_IP_IN 2 106263851Sjkim#define DN_TO_BDG_FWD 3 107263851Sjkim 108263851Sjkim dn_key output_time; /* when the pkt is due for delivery */ 109263851Sjkim struct ifnet *ifp; /* interface, for ip_output */ 110263851Sjkim struct sockaddr_in *dn_dst ; 111263851Sjkim struct route ro; /* route, for ip_output. MUST COPY */ 112263851Sjkim int flags ; /* flags, for ip_output (IPv6 ?) */ 113263851Sjkim}; 114263851Sjkim 115263851Sjkim/* 116263851Sjkim * Overall structure (with WFQ): 117263851Sjkim 118263851SjkimWe have 3 data structures definining a pipe and associated queues: 119263851Sjkim + dn_pipe, which contains the main configuration parameters related 120263851Sjkim to delay and bandwidth 121263851Sjkim + dn_flow_set which contains WFQ configuration, flow 122263851Sjkim masks, plr and RED configuration 123263851Sjkim + dn_flow_queue which is the per-flow queue. 124281396Sjkim Multiple dn_flow_set can be linked to the same pipe, and multiple 125298714Sjkim dn_flow_queue can be linked to the same dn_flow_set. 126263851Sjkim 127263851Sjkim During configuration we set the dn_flow_set and dn_pipe parameters. 128263851Sjkim At runtime: packets are sent to the dn_flow_set (either WFQ ones, or 129263851Sjkim the one embedded in the dn_pipe for fixed-rate flows) which in turn 130263851Sjkim dispatches them to the appropriate dn_flow_queue (created dynamically 131263851Sjkim according to the masks). 132263851Sjkim The transmit clock for fixed rate flows (ready_event) selects the 133263851Sjkim dn_flow_queue to be used to transmit the next packet. For WF2Q, 134263851Sjkim wfq_ready_event() extract a pipe which in turn selects the right 135263851Sjkim flow using a number of heaps defined into the pipe. 136263851Sjkim 137263851Sjkim * 138263851Sjkim */ 139263851Sjkim 140263851Sjkim/* 141263851Sjkim * We use per flow queues. Hashing is used to select the right slot, 142263851Sjkim * then we scan the list to match the flow-id. 143263851Sjkim */ 144263851Sjkimstruct dn_flow_queue { 145263851Sjkim struct dn_flow_queue *next ; 146263851Sjkim struct ipfw_flow_id id ; 147263851Sjkim struct dn_pkt *head, *tail ; /* queue of packets */ 148263851Sjkim u_int len ; 149263851Sjkim u_int len_bytes ; 150263851Sjkim long numbytes ; /* credit for transmission (dynamic queues) */ 151263851Sjkim 152263851Sjkim u_int64_t tot_pkts ; /* statistics counters */ 153263851Sjkim u_int64_t tot_bytes ; 154263851Sjkim u_int32_t drops ; 155263851Sjkim int hash_slot ; /* debugging/diagnostic */ 156263851Sjkim 157263851Sjkim /* RED parameters */ 158263851Sjkim int avg ; /* average queue length est. (scaled) */ 159263851Sjkim int count ; /* arrivals since last RED drop */ 160263851Sjkim int random ; /* random value (scaled) */ 161263851Sjkim u_int32_t q_time ; /* start of queue idle time */ 162263851Sjkim 163263851Sjkim /* WF2Q+ support */ 164263851Sjkim struct dn_flow_set *fs ; /* parent flow set */ 165263851Sjkim int blh_pos ; /* position in backlogged_heap */ 166263851Sjkim dn_key sched_time ; /* current time when queue enters ready_heap */ 167263851Sjkim 168263851Sjkim dn_key S,F ; /* start-time, finishing time */ 169263851Sjkim} ; 170263851Sjkim 171263851Sjkimstruct dn_flow_set { 172263851Sjkim struct dn_flow_set *next; /* next flow set in all_flow_sets list */ 173263851Sjkim 174263851Sjkim u_short fs_nr ; /* flow_set number */ 175263851Sjkim u_short flags_fs; 176263851Sjkim#define DN_HAVE_FLOW_MASK 0x0001 177263851Sjkim#define DN_IS_PIPE 0x4000 178263851Sjkim#define DN_IS_QUEUE 0x8000 179263851Sjkim#define DN_IS_RED 0x0002 180263851Sjkim#define DN_IS_GENTLE_RED 0x0004 181263851Sjkim#define DN_QSIZE_IS_BYTES 0x0008 /* queue measured in bytes */ 182263851Sjkim 183263851Sjkim struct dn_pipe *pipe ; /* pointer to parent pipe */ 184263851Sjkim u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ 185263851Sjkim 186263851Sjkim int weight ; /* WFQ queue weight */ 187263851Sjkim int qsize ; /* queue size in slots or bytes */ 188263851Sjkim int plr ; /* pkt loss rate (2^31-1 means 100%) */ 189263851Sjkim 190263851Sjkim struct ipfw_flow_id flow_mask ; 191263851Sjkim /* hash table of queues onto this flow_set */ 192263851Sjkim int rq_size ; /* number of slots */ 193263851Sjkim int rq_elements ; /* active elements */ 194263851Sjkim struct dn_flow_queue **rq; /* array of rq_size entries */ 195263851Sjkim u_int32_t last_expired ; /* do not expire too frequently */ 196263851Sjkim /* XXX some RED parameters as well ? */ 197263851Sjkim int backlogged ; /* #active queues for this flowset */ 198263851Sjkim 199263851Sjkim /* RED parameters */ 200263851Sjkim#define SCALE_RED 16 201263851Sjkim#define SCALE(x) ( (x) << SCALE_RED ) 202263851Sjkim#define SCALE_VAL(x) ( (x) >> SCALE_RED ) 203263851Sjkim#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) 204263851Sjkim int w_q ; /* queue weight (scaled) */ 205263851Sjkim int max_th ; /* maximum threshold for queue (scaled) */ 206263851Sjkim int min_th ; /* minimum threshold for queue (scaled) */ 207263851Sjkim int max_p ; /* maximum value for p_b (scaled) */ 208263851Sjkim u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ 209263851Sjkim u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ 210263851Sjkim u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ 211263851Sjkim u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ 212263851Sjkim u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ 213263851Sjkim u_int lookup_depth ; /* depth of lookup table */ 214263851Sjkim int lookup_step ; /* granularity inside the lookup table */ 215263851Sjkim int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ 216263851Sjkim int avg_pkt_size ; /* medium packet size */ 217263851Sjkim int max_pkt_size ; /* max packet size */ 218263851Sjkim} ; 219263851Sjkim 220263851Sjkim/* 221263851Sjkim * Pipe descriptor. Contains global parameters, delay-line queue. 222263851Sjkim * 223263851Sjkim * For WF2Q support it also has 3 heaps holding dn_flow_queue: 224263851Sjkim * not_eligible_heap, for queues whose start time is higher 225263851Sjkim * than the virtual time. Sorted by start time. 226263851Sjkim * scheduler_heap, for queues eligible for scheduling. Sorted by 227263851Sjkim * finish time. 228263851Sjkim * backlogged_heap, all flows in the two heaps above, sorted by 229263851Sjkim * start time. This is used to compute the virtual time. 230263851Sjkim * 231281396Sjkim */ 232298714Sjkimstruct dn_pipe { /* a pipe */ 233263851Sjkim struct dn_pipe *next ; 234263851Sjkim 235263851Sjkim int pipe_nr ; /* number */ 236263851Sjkim int bandwidth; /* really, bytes/tick. */ 237263851Sjkim int delay ; /* really, ticks */ 238263851Sjkim 239263851Sjkim struct dn_pkt *head, *tail ; /* packets in delay line */ 240263851Sjkim 241263851Sjkim /* WF2Q+ */ 242263851Sjkim struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ 243263851Sjkim struct dn_heap not_eligible_heap; /* top extract- key Start time */ 244263851Sjkim struct dn_heap backlogged_heap ; /* random extract - key Start time */ 245263851Sjkim 246263851Sjkim dn_key V ; /* virtual time */ 247263851Sjkim int sum; /* sum of weights of all active sessions */ 248263851Sjkim int numbytes; /* bit i can transmit (more or less). */ 249263851Sjkim 250263851Sjkim dn_key sched_time ; /* first time pipe is scheduled in ready_heap */ 251263851Sjkim 252263851Sjkim /* the tx clock can come from an interface. In this case, the 253263851Sjkim * name is below, and the pointer is filled when the rule is 254263851Sjkim * configured. We identify this by setting the if_name to a 255263851Sjkim * non-empty string. 256263851Sjkim */ 257263851Sjkim char if_name[16]; 258263851Sjkim struct ifnet *ifp ; 259263851Sjkim int ready ; /* set if ifp != NULL and we got a signal from it */ 260263851Sjkim 261263851Sjkim struct dn_flow_set fs ; /* used with fixed-rate flows */ 262263851Sjkim}; 263263851Sjkim 264263851Sjkim#ifdef _KERNEL 265263851Sjkim 266263851SjkimMALLOC_DECLARE(M_IPFW); 267263851Sjkim 268263851Sjkimtypedef int ip_dn_ctl_t __P((struct sockopt *)) ; 269263851Sjkimextern ip_dn_ctl_t *ip_dn_ctl_ptr; 270263851Sjkim 271263851Sjkimvoid dn_rule_delete(void *r); /* used in ip_fw.c */ 272263851Sjkimint dummynet_io(int pipe, int dir, 273263851Sjkim struct mbuf *m, struct ifnet *ifp, struct route *ro, 274263851Sjkim struct sockaddr_in * dst, 275263851Sjkim struct ip_fw_chain *rule, int flags); 276263851Sjkim#endif 277263851Sjkim 278263851Sjkim#endif /* _IP_DUMMYNET_H */ 279263851Sjkim