30 31/* 32 * Configuration and internal object management for dummynet. 33 */ 34 35#include "opt_inet6.h" 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/malloc.h> 40#include <sys/mbuf.h> 41#include <sys/kernel.h> 42#include <sys/lock.h> 43#include <sys/module.h> 44#include <sys/priv.h> 45#include <sys/proc.h> 46#include <sys/rwlock.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/time.h> 50#include <sys/taskqueue.h> 51#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ 52#include <netinet/in.h> 53#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ 54#include <netinet/ip_fw.h> 55#include <netinet/ipfw/ip_fw_private.h> 56#include <netinet/ipfw/dn_heap.h> 57#include <netinet/ip_dummynet.h> 58#include <netinet/ipfw/ip_dn_private.h> 59#include <netinet/ipfw/dn_sched.h> 60 61/* which objects to copy */ 62#define DN_C_LINK 0x01 63#define DN_C_SCH 0x02 64#define DN_C_FLOW 0x04 65#define DN_C_FS 0x08 66#define DN_C_QUEUE 0x10 67 68/* we use this argument in case of a schk_new */ 69struct schk_new_arg { 70 struct dn_alg *fp; 71 struct dn_sch *sch; 72}; 73 74/*---- callout hooks. ----*/ 75static struct callout dn_timeout; 76static struct task dn_task; 77static struct taskqueue *dn_tq = NULL; 78 79static void 80dummynet(void * __unused unused) 81{ 82 83 taskqueue_enqueue(dn_tq, &dn_task); 84} 85 86void 87dn_reschedule(void) 88{ 89 callout_reset(&dn_timeout, 1, dummynet, NULL); 90} 91/*----- end of callout hooks -----*/ 92 93/* Return a scheduler descriptor given the type or name. */ 94static struct dn_alg * 95find_sched_type(int type, char *name) 96{ 97 struct dn_alg *d; 98 99 SLIST_FOREACH(d, &dn_cfg.schedlist, next) { 100 if (d->type == type || (name && !strcmp(d->name, name))) 101 return d; 102 } 103 return NULL; /* not found */ 104} 105 106int 107ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) 108{ 109 int oldv = *v; 110 const char *op = NULL; 111 if (oldv < lo) { 112 *v = dflt; 113 op = "Bump"; 114 } else if (oldv > hi) { 115 *v = hi; 116 op = "Clamp"; 117 } else 118 return *v; 119 if (op && msg) 120 printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 121 return *v; 122} 123 124/*---- flow_id mask, hash and compare functions ---*/ 125/* 126 * The flow_id includes the 5-tuple, the queue/pipe number 127 * which we store in the extra area in host order, 128 * and for ipv6 also the flow_id6. 129 * XXX see if we want the tos byte (can store in 'flags') 130 */ 131static struct ipfw_flow_id * 132flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) 133{ 134 int is_v6 = IS_IP6_FLOW_ID(id); 135 136 id->dst_port &= mask->dst_port; 137 id->src_port &= mask->src_port; 138 id->proto &= mask->proto; 139 id->extra &= mask->extra; 140 if (is_v6) { 141 APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); 142 APPLY_MASK(&id->src_ip6, &mask->src_ip6); 143 id->flow_id6 &= mask->flow_id6; 144 } else { 145 id->dst_ip &= mask->dst_ip; 146 id->src_ip &= mask->src_ip; 147 } 148 return id; 149} 150 151/* computes an OR of two masks, result in dst and also returned */ 152static struct ipfw_flow_id * 153flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst) 154{ 155 int is_v6 = IS_IP6_FLOW_ID(dst); 156 157 dst->dst_port |= src->dst_port; 158 dst->src_port |= src->src_port; 159 dst->proto |= src->proto; 160 dst->extra |= src->extra; 161 if (is_v6) { 162#define OR_MASK(_d, _s) \ 163 (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ 164 (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \ 165 (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \ 166 (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3]; 167 OR_MASK(&dst->dst_ip6, &src->dst_ip6); 168 OR_MASK(&dst->src_ip6, &src->src_ip6); 169#undef OR_MASK 170 dst->flow_id6 |= src->flow_id6; 171 } else { 172 dst->dst_ip |= src->dst_ip; 173 dst->src_ip |= src->src_ip; 174 } 175 return dst; 176} 177 178static int 179nonzero_mask(struct ipfw_flow_id *m) 180{ 181 if (m->dst_port || m->src_port || m->proto || m->extra) 182 return 1; 183 if (IS_IP6_FLOW_ID(m)) { 184 return 185 m->dst_ip6.__u6_addr.__u6_addr32[0] || 186 m->dst_ip6.__u6_addr.__u6_addr32[1] || 187 m->dst_ip6.__u6_addr.__u6_addr32[2] || 188 m->dst_ip6.__u6_addr.__u6_addr32[3] || 189 m->src_ip6.__u6_addr.__u6_addr32[0] || 190 m->src_ip6.__u6_addr.__u6_addr32[1] || 191 m->src_ip6.__u6_addr.__u6_addr32[2] || 192 m->src_ip6.__u6_addr.__u6_addr32[3] || 193 m->flow_id6; 194 } else { 195 return m->dst_ip || m->src_ip; 196 } 197} 198 199/* XXX we may want a better hash function */ 200static uint32_t 201flow_id_hash(struct ipfw_flow_id *id) 202{ 203 uint32_t i; 204 205 if (IS_IP6_FLOW_ID(id)) { 206 uint32_t *d = (uint32_t *)&id->dst_ip6; 207 uint32_t *s = (uint32_t *)&id->src_ip6; 208 i = (d[0] ) ^ (d[1]) ^ 209 (d[2] ) ^ (d[3]) ^ 210 (d[0] >> 15) ^ (d[1] >> 15) ^ 211 (d[2] >> 15) ^ (d[3] >> 15) ^ 212 (s[0] << 1) ^ (s[1] << 1) ^ 213 (s[2] << 1) ^ (s[3] << 1) ^ 214 (s[0] << 16) ^ (s[1] << 16) ^ 215 (s[2] << 16) ^ (s[3] << 16) ^ 216 (id->dst_port << 1) ^ (id->src_port) ^ 217 (id->extra) ^ 218 (id->proto ) ^ (id->flow_id6); 219 } else { 220 i = (id->dst_ip) ^ (id->dst_ip >> 15) ^ 221 (id->src_ip << 1) ^ (id->src_ip >> 16) ^ 222 (id->extra) ^ 223 (id->dst_port << 1) ^ (id->src_port) ^ (id->proto); 224 } 225 return i; 226} 227 228/* Like bcmp, returns 0 if ids match, 1 otherwise. */ 229static int 230flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) 231{ 232 int is_v6 = IS_IP6_FLOW_ID(id1); 233 234 if (!is_v6) { 235 if (IS_IP6_FLOW_ID(id2)) 236 return 1; /* different address families */ 237 238 return (id1->dst_ip == id2->dst_ip && 239 id1->src_ip == id2->src_ip && 240 id1->dst_port == id2->dst_port && 241 id1->src_port == id2->src_port && 242 id1->proto == id2->proto && 243 id1->extra == id2->extra) ? 0 : 1; 244 } 245 /* the ipv6 case */ 246 return ( 247 !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) && 248 !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) && 249 id1->dst_port == id2->dst_port && 250 id1->src_port == id2->src_port && 251 id1->proto == id2->proto && 252 id1->extra == id2->extra && 253 id1->flow_id6 == id2->flow_id6) ? 0 : 1; 254} 255/*--------- end of flow-id mask, hash and compare ---------*/ 256 257/*--- support functions for the qht hashtable ---- 258 * Entries are hashed by flow-id 259 */ 260static uint32_t 261q_hash(uintptr_t key, int flags, void *arg) 262{ 263 /* compute the hash slot from the flow id */ 264 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 265 &((struct dn_queue *)key)->ni.fid : 266 (struct ipfw_flow_id *)key; 267 268 return flow_id_hash(id); 269} 270 271static int 272q_match(void *obj, uintptr_t key, int flags, void *arg) 273{ 274 struct dn_queue *o = (struct dn_queue *)obj; 275 struct ipfw_flow_id *id2; 276 277 if (flags & DNHT_KEY_IS_OBJ) { 278 /* compare pointers */ 279 id2 = &((struct dn_queue *)key)->ni.fid; 280 } else { 281 id2 = (struct ipfw_flow_id *)key; 282 } 283 return (0 == flow_id_cmp(&o->ni.fid, id2)); 284} 285 286/* 287 * create a new queue instance for the given 'key'. 288 */ 289static void * 290q_new(uintptr_t key, int flags, void *arg) 291{ 292 struct dn_queue *q, *template = arg; 293 struct dn_fsk *fs = template->fs; 294 int size = sizeof(*q) + fs->sched->fp->q_datalen; 295 296 q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO); 297 if (q == NULL) { 298 D("no memory for new queue"); 299 return NULL; 300 } 301 302 set_oid(&q->ni.oid, DN_QUEUE, size); 303 if (fs->fs.flags & DN_QHT_HASH) 304 q->ni.fid = *(struct ipfw_flow_id *)key; 305 q->fs = fs; 306 q->_si = template->_si; 307 q->_si->q_count++; 308 309 if (fs->sched->fp->new_queue) 310 fs->sched->fp->new_queue(q); 311 dn_cfg.queue_count++; 312 return q; 313} 314 315/* 316 * Notify schedulers that a queue is going away. 317 * If (flags & DN_DESTROY), also free the packets. 318 * The version for callbacks is called q_delete_cb(). 319 */ 320static void 321dn_delete_queue(struct dn_queue *q, int flags) 322{ 323 struct dn_fsk *fs = q->fs; 324 325 // D("fs %p si %p\n", fs, q->_si); 326 /* notify the parent scheduler that the queue is going away */ 327 if (fs && fs->sched->fp->free_queue) 328 fs->sched->fp->free_queue(q); 329 q->_si->q_count--; 330 q->_si = NULL; 331 if (flags & DN_DESTROY) { 332 if (q->mq.head) 333 dn_free_pkts(q->mq.head); 334 bzero(q, sizeof(*q)); // safety 335 free(q, M_DUMMYNET); 336 dn_cfg.queue_count--; 337 } 338} 339 340static int 341q_delete_cb(void *q, void *arg) 342{ 343 int flags = (int)(uintptr_t)arg; 344 dn_delete_queue(q, flags); 345 return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0; 346} 347 348/* 349 * calls dn_delete_queue/q_delete_cb on all queues, 350 * which notifies the parent scheduler and possibly drains packets. 351 * flags & DN_DESTROY: drains queues and destroy qht; 352 */ 353static void 354qht_delete(struct dn_fsk *fs, int flags) 355{ 356 ND("fs %d start flags %d qht %p", 357 fs->fs.fs_nr, flags, fs->qht); 358 if (!fs->qht) 359 return; 360 if (fs->fs.flags & DN_QHT_HASH) { 361 dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags); 362 if (flags & DN_DESTROY) { 363 dn_ht_free(fs->qht, 0); 364 fs->qht = NULL; 365 } 366 } else { 367 dn_delete_queue((struct dn_queue *)(fs->qht), flags); 368 if (flags & DN_DESTROY) 369 fs->qht = NULL; 370 } 371} 372 373/* 374 * Find and possibly create the queue for a MULTIQUEUE scheduler. 375 * We never call it for !MULTIQUEUE (the queue is in the sch_inst). 376 */ 377struct dn_queue * 378ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si, 379 struct ipfw_flow_id *id) 380{ 381 struct dn_queue template; 382 383 template._si = si; 384 template.fs = fs; 385 386 if (fs->fs.flags & DN_QHT_HASH) { 387 struct ipfw_flow_id masked_id; 388 if (fs->qht == NULL) { 389 fs->qht = dn_ht_init(NULL, fs->fs.buckets, 390 offsetof(struct dn_queue, q_next), 391 q_hash, q_match, q_new); 392 if (fs->qht == NULL) 393 return NULL; 394 } 395 masked_id = *id; 396 flow_id_mask(&fs->fsk_mask, &masked_id); 397 return dn_ht_find(fs->qht, (uintptr_t)&masked_id, 398 DNHT_INSERT, &template); 399 } else { 400 if (fs->qht == NULL) 401 fs->qht = q_new(0, 0, &template); 402 return (struct dn_queue *)fs->qht; 403 } 404} 405/*--- end of queue hash table ---*/ 406 407/*--- support functions for the sch_inst hashtable ---- 408 * 409 * These are hashed by flow-id 410 */ 411static uint32_t 412si_hash(uintptr_t key, int flags, void *arg) 413{ 414 /* compute the hash slot from the flow id */ 415 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 416 &((struct dn_sch_inst *)key)->ni.fid : 417 (struct ipfw_flow_id *)key; 418 419 return flow_id_hash(id); 420} 421 422static int 423si_match(void *obj, uintptr_t key, int flags, void *arg) 424{ 425 struct dn_sch_inst *o = obj; 426 struct ipfw_flow_id *id2; 427 428 id2 = (flags & DNHT_KEY_IS_OBJ) ? 429 &((struct dn_sch_inst *)key)->ni.fid : 430 (struct ipfw_flow_id *)key; 431 return flow_id_cmp(&o->ni.fid, id2) == 0; 432} 433 434/* 435 * create a new instance for the given 'key' 436 * Allocate memory for instance, delay line and scheduler private data. 437 */ 438static void * 439si_new(uintptr_t key, int flags, void *arg) 440{ 441 struct dn_schk *s = arg; 442 struct dn_sch_inst *si; 443 int l = sizeof(*si) + s->fp->si_datalen; 444 445 si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 446 if (si == NULL) 447 goto error; 448 /* Set length only for the part passed up to userland. */ 449 set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow)); 450 set_oid(&(si->dline.oid), DN_DELAY_LINE, 451 sizeof(struct delay_line)); 452 /* mark si and dline as outside the event queue */ 453 si->ni.oid.id = si->dline.oid.id = -1; 454 455 si->sched = s; 456 si->dline.si = si; 457 458 if (s->fp->new_sched && s->fp->new_sched(si)) { 459 D("new_sched error"); 460 goto error; 461 } 462 if (s->sch.flags & DN_HAVE_MASK) 463 si->ni.fid = *(struct ipfw_flow_id *)key; 464 465 dn_cfg.si_count++; 466 return si; 467 468error: 469 if (si) { 470 bzero(si, sizeof(*si)); // safety 471 free(si, M_DUMMYNET); 472 } 473 return NULL; 474} 475 476/* 477 * Callback from siht to delete all scheduler instances. Remove 478 * si and delay line from the system heap, destroy all queues. 479 * We assume that all flowset have been notified and do not 480 * point to us anymore. 481 */ 482static int 483si_destroy(void *_si, void *arg) 484{ 485 struct dn_sch_inst *si = _si; 486 struct dn_schk *s = si->sched; 487 struct delay_line *dl = &si->dline; 488 489 if (dl->oid.subtype) /* remove delay line from event heap */ 490 heap_extract(&dn_cfg.evheap, dl); 491 dn_free_pkts(dl->mq.head); /* drain delay line */ 492 if (si->kflags & DN_ACTIVE) /* remove si from event heap */ 493 heap_extract(&dn_cfg.evheap, si); 494 if (s->fp->free_sched) 495 s->fp->free_sched(si); 496 bzero(si, sizeof(*si)); /* safety */ 497 free(si, M_DUMMYNET); 498 dn_cfg.si_count--; 499 return DNHT_SCAN_DEL; 500} 501 502/* 503 * Find the scheduler instance for this packet. If we need to apply 504 * a mask, do on a local copy of the flow_id to preserve the original. 505 * Assume siht is always initialized if we have a mask. 506 */ 507struct dn_sch_inst * 508ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id) 509{ 510 511 if (s->sch.flags & DN_HAVE_MASK) { 512 struct ipfw_flow_id id_t = *id; 513 flow_id_mask(&s->sch.sched_mask, &id_t); 514 return dn_ht_find(s->siht, (uintptr_t)&id_t, 515 DNHT_INSERT, s); 516 } 517 if (!s->siht) 518 s->siht = si_new(0, 0, s); 519 return (struct dn_sch_inst *)s->siht; 520} 521 522/* callback to flush credit for the scheduler instance */ 523static int 524si_reset_credit(void *_si, void *arg) 525{ 526 struct dn_sch_inst *si = _si; 527 struct dn_link *p = &si->sched->link; 528 529 si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0); 530 return 0; 531} 532 533static void 534schk_reset_credit(struct dn_schk *s) 535{ 536 if (s->sch.flags & DN_HAVE_MASK) 537 dn_ht_scan(s->siht, si_reset_credit, NULL); 538 else if (s->siht) 539 si_reset_credit(s->siht, NULL); 540} 541/*---- end of sch_inst hashtable ---------------------*/ 542 543/*------------------------------------------------------- 544 * flowset hash (fshash) support. Entries are hashed by fs_nr. 545 * New allocations are put in the fsunlinked list, from which 546 * they are removed when they point to a specific scheduler. 547 */ 548static uint32_t 549fsk_hash(uintptr_t key, int flags, void *arg) 550{ 551 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 552 ((struct dn_fsk *)key)->fs.fs_nr; 553 554 return ( (i>>8)^(i>>4)^i ); 555} 556 557static int 558fsk_match(void *obj, uintptr_t key, int flags, void *arg) 559{ 560 struct dn_fsk *fs = obj; 561 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 562 ((struct dn_fsk *)key)->fs.fs_nr; 563 564 return (fs->fs.fs_nr == i); 565} 566 567static void * 568fsk_new(uintptr_t key, int flags, void *arg) 569{ 570 struct dn_fsk *fs; 571 572 fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO); 573 if (fs) { 574 set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs)); 575 dn_cfg.fsk_count++; 576 fs->drain_bucket = 0; 577 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 578 } 579 return fs; 580} 581 582/* 583 * detach flowset from its current scheduler. Flags as follows: 584 * DN_DETACH removes from the fsk_list 585 * DN_DESTROY deletes individual queues 586 * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked). 587 */ 588static void 589fsk_detach(struct dn_fsk *fs, int flags) 590{ 591 if (flags & DN_DELETE_FS) 592 flags |= DN_DESTROY; 593 ND("fs %d from sched %d flags %s %s %s", 594 fs->fs.fs_nr, fs->fs.sched_nr, 595 (flags & DN_DELETE_FS) ? "DEL_FS":"", 596 (flags & DN_DESTROY) ? "DEL":"", 597 (flags & DN_DETACH) ? "DET":""); 598 if (flags & DN_DETACH) { /* detach from the list */ 599 struct dn_fsk_head *h; 600 h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu; 601 SLIST_REMOVE(h, fs, dn_fsk, sch_chain); 602 } 603 /* Free the RED parameters, they will be recomputed on 604 * subsequent attach if needed. 605 */ 606 if (fs->w_q_lookup) 607 free(fs->w_q_lookup, M_DUMMYNET); 608 fs->w_q_lookup = NULL; 609 qht_delete(fs, flags); 610 if (fs->sched && fs->sched->fp->free_fsk) 611 fs->sched->fp->free_fsk(fs); 612 fs->sched = NULL; 613 if (flags & DN_DELETE_FS) { 614 bzero(fs, sizeof(fs)); /* safety */ 615 free(fs, M_DUMMYNET); 616 dn_cfg.fsk_count--; 617 } else { 618 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 619 } 620} 621 622/* 623 * Detach or destroy all flowsets in a list. 624 * flags specifies what to do: 625 * DN_DESTROY: flush all queues 626 * DN_DELETE_FS: DN_DESTROY + destroy flowset 627 * DN_DELETE_FS implies DN_DESTROY 628 */ 629static void 630fsk_detach_list(struct dn_fsk_head *h, int flags) 631{ 632 struct dn_fsk *fs; 633 int n = 0; /* only for stats */ 634 635 ND("head %p flags %x", h, flags); 636 while ((fs = SLIST_FIRST(h))) { 637 SLIST_REMOVE_HEAD(h, sch_chain); 638 n++; 639 fsk_detach(fs, flags); 640 } 641 ND("done %d flowsets", n); 642} 643 644/* 645 * called on 'queue X delete' -- removes the flowset from fshash, 646 * deletes all queues for the flowset, and removes the flowset. 647 */ 648static int 649delete_fs(int i, int locked) 650{ 651 struct dn_fsk *fs; 652 int err = 0; 653 654 if (!locked) 655 DN_BH_WLOCK(); 656 fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL); 657 ND("fs %d found %p", i, fs); 658 if (fs) { 659 fsk_detach(fs, DN_DETACH | DN_DELETE_FS); 660 err = 0; 661 } else 662 err = EINVAL; 663 if (!locked) 664 DN_BH_WUNLOCK(); 665 return err; 666} 667 668/*----- end of flowset hashtable support -------------*/ 669 670/*------------------------------------------------------------ 671 * Scheduler hash. When searching by index we pass sched_nr, 672 * otherwise we pass struct dn_sch * which is the first field in 673 * struct dn_schk so we can cast between the two. We use this trick 674 * because in the create phase (but it should be fixed). 675 */ 676static uint32_t 677schk_hash(uintptr_t key, int flags, void *_arg) 678{ 679 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 680 ((struct dn_schk *)key)->sch.sched_nr; 681 return ( (i>>8)^(i>>4)^i ); 682} 683 684static int 685schk_match(void *obj, uintptr_t key, int flags, void *_arg) 686{ 687 struct dn_schk *s = (struct dn_schk *)obj; 688 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 689 ((struct dn_schk *)key)->sch.sched_nr; 690 return (s->sch.sched_nr == i); 691} 692 693/* 694 * Create the entry and intialize with the sched hash if needed. 695 * Leave s->fp unset so we can tell whether a dn_ht_find() returns 696 * a new object or a previously existing one. 697 */ 698static void * 699schk_new(uintptr_t key, int flags, void *arg) 700{ 701 struct schk_new_arg *a = arg; 702 struct dn_schk *s; 703 int l = sizeof(*s) +a->fp->schk_datalen; 704 705 s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 706 if (s == NULL) 707 return NULL; 708 set_oid(&s->link.oid, DN_LINK, sizeof(s->link)); 709 s->sch = *a->sch; // copy initial values 710 s->link.link_nr = s->sch.sched_nr; 711 SLIST_INIT(&s->fsk_list); 712 /* initialize the hash table or create the single instance */ 713 s->fp = a->fp; /* si_new needs this */ 714 s->drain_bucket = 0; 715 if (s->sch.flags & DN_HAVE_MASK) { 716 s->siht = dn_ht_init(NULL, s->sch.buckets, 717 offsetof(struct dn_sch_inst, si_next), 718 si_hash, si_match, si_new); 719 if (s->siht == NULL) { 720 free(s, M_DUMMYNET); 721 return NULL; 722 } 723 } 724 s->fp = NULL; /* mark as a new scheduler */ 725 dn_cfg.schk_count++; 726 return s; 727} 728 729/* 730 * Callback for sched delete. Notify all attached flowsets to 731 * detach from the scheduler, destroy the internal flowset, and 732 * all instances. The scheduler goes away too. 733 * arg is 0 (only detach flowsets and destroy instances) 734 * DN_DESTROY (detach & delete queues, delete schk) 735 * or DN_DELETE_FS (delete queues and flowsets, delete schk) 736 */ 737static int 738schk_delete_cb(void *obj, void *arg) 739{ 740 struct dn_schk *s = obj; 741#if 0 742 int a = (int)arg; 743 ND("sched %d arg %s%s", 744 s->sch.sched_nr, 745 a&DN_DESTROY ? "DEL ":"", 746 a&DN_DELETE_FS ? "DEL_FS":""); 747#endif 748 fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0); 749 /* no more flowset pointing to us now */ 750 if (s->sch.flags & DN_HAVE_MASK) 751 dn_ht_scan(s->siht, si_destroy, NULL); 752 else if (s->siht) 753 si_destroy(s->siht, NULL); 754 if (s->profile) { 755 free(s->profile, M_DUMMYNET); 756 s->profile = NULL; 757 } 758 s->siht = NULL; 759 if (s->fp->destroy) 760 s->fp->destroy(s); 761 bzero(s, sizeof(*s)); // safety 762 free(obj, M_DUMMYNET); 763 dn_cfg.schk_count--; 764 return DNHT_SCAN_DEL; 765} 766 767/* 768 * called on a 'sched X delete' command. Deletes a single scheduler. 769 * This is done by removing from the schedhash, unlinking all 770 * flowsets and deleting their traffic. 771 */ 772static int 773delete_schk(int i) 774{ 775 struct dn_schk *s; 776 777 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 778 ND("%d %p", i, s); 779 if (!s) 780 return EINVAL; 781 delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */ 782 /* then detach flowsets, delete traffic */ 783 schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY); 784 return 0; 785} 786/*--- end of schk hashtable support ---*/ 787 788static int 789copy_obj(char **start, char *end, void *_o, const char *msg, int i) 790{ 791 struct dn_id *o = _o; 792 int have = end - *start; 793 794 if (have < o->len || o->len == 0 || o->type == 0) { 795 D("(WARN) type %d %s %d have %d need %d", 796 o->type, msg, i, have, o->len); 797 return 1; 798 } 799 ND("type %d %s %d len %d", o->type, msg, i, o->len); 800 bcopy(_o, *start, o->len); 801 if (o->type == DN_LINK) { 802 /* Adjust burst parameter for link */ 803 struct dn_link *l = (struct dn_link *)*start; 804 l->burst = div64(l->burst, 8 * hz); 805 } else if (o->type == DN_SCH) { 806 /* Set id->id to the number of instances */ 807 struct dn_schk *s = _o; 808 struct dn_id *id = (struct dn_id *)(*start); 809 id->id = (s->sch.flags & DN_HAVE_MASK) ? 810 dn_ht_entries(s->siht) : (s->siht ? 1 : 0); 811 } 812 *start += o->len; 813 return 0; 814} 815 816/* Specific function to copy a queue. 817 * Copies only the user-visible part of a queue (which is in 818 * a struct dn_flow), and sets len accordingly. 819 */ 820static int 821copy_obj_q(char **start, char *end, void *_o, const char *msg, int i) 822{ 823 struct dn_id *o = _o; 824 int have = end - *start; 825 int len = sizeof(struct dn_flow); /* see above comment */ 826 827 if (have < len || o->len == 0 || o->type != DN_QUEUE) { 828 D("ERROR type %d %s %d have %d need %d", 829 o->type, msg, i, have, len); 830 return 1; 831 } 832 ND("type %d %s %d len %d", o->type, msg, i, len); 833 bcopy(_o, *start, len); 834 ((struct dn_id*)(*start))->len = len; 835 *start += len; 836 return 0; 837} 838 839static int 840copy_q_cb(void *obj, void *arg) 841{ 842 struct dn_queue *q = obj; 843 struct copy_args *a = arg; 844 struct dn_flow *ni = (struct dn_flow *)(*a->start); 845 if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1)) 846 return DNHT_SCAN_END; 847 ni->oid.type = DN_FLOW; /* override the DN_QUEUE */ 848 ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL); 849 return 0; 850} 851 852static int 853copy_q(struct copy_args *a, struct dn_fsk *fs, int flags) 854{ 855 if (!fs->qht) 856 return 0; 857 if (fs->fs.flags & DN_QHT_HASH) 858 dn_ht_scan(fs->qht, copy_q_cb, a); 859 else 860 copy_q_cb(fs->qht, a); 861 return 0; 862} 863 864/* 865 * This routine only copies the initial part of a profile ? XXX 866 */ 867static int 868copy_profile(struct copy_args *a, struct dn_profile *p) 869{ 870 int have = a->end - *a->start; 871 /* XXX here we check for max length */ 872 int profile_len = sizeof(struct dn_profile) - 873 ED_MAX_SAMPLES_NO*sizeof(int); 874 875 if (p == NULL) 876 return 0; 877 if (have < profile_len) { 878 D("error have %d need %d", have, profile_len); 879 return 1; 880 } 881 bcopy(p, *a->start, profile_len); 882 ((struct dn_id *)(*a->start))->len = profile_len; 883 *a->start += profile_len; 884 return 0; 885} 886 887static int 888copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags) 889{ 890 struct dn_fs *ufs = (struct dn_fs *)(*a->start); 891 if (!fs) 892 return 0; 893 ND("flowset %d", fs->fs.fs_nr); 894 if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr)) 895 return DNHT_SCAN_END; 896 ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ? 897 dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0); 898 if (flags) { /* copy queues */ 899 copy_q(a, fs, 0); 900 } 901 return 0; 902} 903 904static int 905copy_si_cb(void *obj, void *arg) 906{ 907 struct dn_sch_inst *si = obj; 908 struct copy_args *a = arg; 909 struct dn_flow *ni = (struct dn_flow *)(*a->start); 910 if (copy_obj(a->start, a->end, &si->ni, "inst", 911 si->sched->sch.sched_nr)) 912 return DNHT_SCAN_END; 913 ni->oid.type = DN_FLOW; /* override the DN_SCH_I */ 914 ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL); 915 return 0; 916} 917 918static int 919copy_si(struct copy_args *a, struct dn_schk *s, int flags) 920{ 921 if (s->sch.flags & DN_HAVE_MASK) 922 dn_ht_scan(s->siht, copy_si_cb, a); 923 else if (s->siht) 924 copy_si_cb(s->siht, a); 925 return 0; 926} 927 928/* 929 * compute a list of children of a scheduler and copy up 930 */ 931static int 932copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags) 933{ 934 struct dn_fsk *fs; 935 struct dn_id *o; 936 uint32_t *p; 937 938 int n = 0, space = sizeof(*o); 939 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 940 if (fs->fs.fs_nr < DN_MAX_ID) 941 n++; 942 } 943 space += n * sizeof(uint32_t); 944 DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n); 945 if (a->end - *(a->start) < space) 946 return DNHT_SCAN_END; 947 o = (struct dn_id *)(*(a->start)); 948 o->len = space; 949 *a->start += o->len; 950 o->type = DN_TEXT; 951 p = (uint32_t *)(o+1); 952 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) 953 if (fs->fs.fs_nr < DN_MAX_ID) 954 *p++ = fs->fs.fs_nr; 955 return 0; 956} 957 958static int 959copy_data_helper(void *_o, void *_arg) 960{ 961 struct copy_args *a = _arg; 962 uint32_t *r = a->extra->r; /* start of first range */ 963 uint32_t *lim; /* first invalid pointer */ 964 int n; 965 966 lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len); 967 968 if (a->type == DN_LINK || a->type == DN_SCH) { 969 /* pipe|sched show, we receive a dn_schk */ 970 struct dn_schk *s = _o; 971 972 n = s->sch.sched_nr; 973 if (a->type == DN_SCH && n >= DN_MAX_ID) 974 return 0; /* not a scheduler */ 975 if (a->type == DN_LINK && n <= DN_MAX_ID) 976 return 0; /* not a pipe */ 977 978 /* see if the object is within one of our ranges */ 979 for (;r < lim; r += 2) { 980 if (n < r[0] || n > r[1]) 981 continue; 982 /* Found a valid entry, copy and we are done */ 983 if (a->flags & DN_C_LINK) { 984 if (copy_obj(a->start, a->end, 985 &s->link, "link", n)) 986 return DNHT_SCAN_END; 987 if (copy_profile(a, s->profile)) 988 return DNHT_SCAN_END; 989 if (copy_flowset(a, s->fs, 0)) 990 return DNHT_SCAN_END; 991 } 992 if (a->flags & DN_C_SCH) { 993 if (copy_obj(a->start, a->end, 994 &s->sch, "sched", n)) 995 return DNHT_SCAN_END; 996 /* list all attached flowsets */ 997 if (copy_fsk_list(a, s, 0)) 998 return DNHT_SCAN_END; 999 } 1000 if (a->flags & DN_C_FLOW) 1001 copy_si(a, s, 0); 1002 break; 1003 } 1004 } else if (a->type == DN_FS) { 1005 /* queue show, skip internal flowsets */ 1006 struct dn_fsk *fs = _o; 1007 1008 n = fs->fs.fs_nr; 1009 if (n >= DN_MAX_ID) 1010 return 0; 1011 /* see if the object is within one of our ranges */ 1012 for (;r < lim; r += 2) { 1013 if (n < r[0] || n > r[1]) 1014 continue; 1015 if (copy_flowset(a, fs, 0)) 1016 return DNHT_SCAN_END; 1017 copy_q(a, fs, 0); 1018 break; /* we are done */ 1019 } 1020 } 1021 return 0; 1022} 1023 1024static inline struct dn_schk * 1025locate_scheduler(int i) 1026{ 1027 return dn_ht_find(dn_cfg.schedhash, i, 0, NULL); 1028} 1029 1030/* 1031 * red parameters are in fixed point arithmetic. 1032 */ 1033static int 1034config_red(struct dn_fsk *fs) 1035{ 1036 int64_t s, idle, weight, w0; 1037 int t, i; 1038 1039 fs->w_q = fs->fs.w_q; 1040 fs->max_p = fs->fs.max_p; 1041 D("called"); 1042 /* Doing stuff that was in userland */ 1043 i = fs->sched->link.bandwidth; 1044 s = (i <= 0) ? 0 : 1045 hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i; 1046 1047 idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */ 1048 fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth); 1049 /* fs->lookup_step not scaled, */ 1050 if (!fs->lookup_step) 1051 fs->lookup_step = 1; 1052 w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled 1053 1054 for (t = fs->lookup_step; t > 1; --t) 1055 weight = SCALE_MUL(weight, w0); 1056 fs->lookup_weight = (int)(weight); // scaled 1057 1058 /* Now doing stuff that was in kerneland */ 1059 fs->min_th = SCALE(fs->fs.min_th); 1060 fs->max_th = SCALE(fs->fs.max_th); 1061 1062 fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th); 1063 fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th)); 1064 1065 if (fs->fs.flags & DN_IS_GENTLE_RED) { 1066 fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th; 1067 fs->c_4 = SCALE(1) - 2 * fs->max_p; 1068 } 1069 1070 /* If the lookup table already exist, free and create it again. */ 1071 if (fs->w_q_lookup) { 1072 free(fs->w_q_lookup, M_DUMMYNET); 1073 fs->w_q_lookup = NULL; 1074 } 1075 if (dn_cfg.red_lookup_depth == 0) { 1076 printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" 1077 "must be > 0\n"); 1078 fs->fs.flags &= ~DN_IS_RED; 1079 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1080 return (EINVAL); 1081 } 1082 fs->lookup_depth = dn_cfg.red_lookup_depth; 1083 fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int), 1084 M_DUMMYNET, M_NOWAIT); 1085 if (fs->w_q_lookup == NULL) { 1086 printf("dummynet: sorry, cannot allocate red lookup table\n"); 1087 fs->fs.flags &= ~DN_IS_RED; 1088 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1089 return(ENOSPC); 1090 } 1091 1092 /* Fill the lookup table with (1 - w_q)^x */ 1093 fs->w_q_lookup[0] = SCALE(1) - fs->w_q; 1094 1095 for (i = 1; i < fs->lookup_depth; i++) 1096 fs->w_q_lookup[i] = 1097 SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight); 1098 1099 if (dn_cfg.red_avg_pkt_size < 1) 1100 dn_cfg.red_avg_pkt_size = 512; 1101 fs->avg_pkt_size = dn_cfg.red_avg_pkt_size; 1102 if (dn_cfg.red_max_pkt_size < 1) 1103 dn_cfg.red_max_pkt_size = 1500; 1104 fs->max_pkt_size = dn_cfg.red_max_pkt_size; 1105 D("exit"); 1106 return 0; 1107} 1108 1109/* Scan all flowset attached to this scheduler and update red */ 1110static void 1111update_red(struct dn_schk *s) 1112{ 1113 struct dn_fsk *fs; 1114 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1115 if (fs && (fs->fs.flags & DN_IS_RED)) 1116 config_red(fs); 1117 } 1118} 1119 1120/* attach flowset to scheduler s, possibly requeue */ 1121static void 1122fsk_attach(struct dn_fsk *fs, struct dn_schk *s) 1123{ 1124 ND("remove fs %d from fsunlinked, link to sched %d", 1125 fs->fs.fs_nr, s->sch.sched_nr); 1126 SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain); 1127 fs->sched = s; 1128 SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); 1129 if (s->fp->new_fsk) 1130 s->fp->new_fsk(fs); 1131 /* XXX compute fsk_mask */ 1132 fs->fsk_mask = fs->fs.flow_mask; 1133 if (fs->sched->sch.flags & DN_HAVE_MASK) 1134 flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask); 1135 if (fs->qht) { 1136 /* 1137 * we must drain qht according to the old 1138 * type, and reinsert according to the new one. 1139 * The requeue is complex -- in general we need to 1140 * reclassify every single packet. 1141 * For the time being, let's hope qht is never set 1142 * when we reach this point. 1143 */ 1144 D("XXX TODO requeue from fs %d to sch %d", 1145 fs->fs.fs_nr, s->sch.sched_nr); 1146 fs->qht = NULL; 1147 } 1148 /* set the new type for qht */ 1149 if (nonzero_mask(&fs->fsk_mask)) 1150 fs->fs.flags |= DN_QHT_HASH; 1151 else 1152 fs->fs.flags &= ~DN_QHT_HASH; 1153 1154 /* XXX config_red() can fail... */ 1155 if (fs->fs.flags & DN_IS_RED) 1156 config_red(fs); 1157} 1158 1159/* update all flowsets which may refer to this scheduler */ 1160static void 1161update_fs(struct dn_schk *s) 1162{ 1163 struct dn_fsk *fs, *tmp; 1164 1165 SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) { 1166 if (s->sch.sched_nr != fs->fs.sched_nr) { 1167 D("fs %d for sch %d not %d still unlinked", 1168 fs->fs.fs_nr, fs->fs.sched_nr, 1169 s->sch.sched_nr); 1170 continue; 1171 } 1172 fsk_attach(fs, s); 1173 } 1174} 1175 1176/* 1177 * Configuration -- to preserve backward compatibility we use 1178 * the following scheme (N is 65536) 1179 * NUMBER SCHED LINK FLOWSET 1180 * 1 .. N-1 (1)WFQ (2)WFQ (3)queue 1181 * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1 1182 * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1 1183 * 1184 * "pipe i config" configures #1, #2 and #3 1185 * "sched i config" configures #1 and possibly #6 1186 * "queue i config" configures #3 1187 * #1 is configured with 'pipe i config' or 'sched i config' 1188 * #2 is configured with 'pipe i config', and created if not 1189 * existing with 'sched i config' 1190 * #3 is configured with 'queue i config' 1191 * #4 is automatically configured after #1, can only be FIFO 1192 * #5 is automatically configured after #2 1193 * #6 is automatically created when #1 is !MULTIQUEUE, 1194 * and can be updated. 1195 * #7 is automatically configured after #2 1196 */ 1197 1198/* 1199 * configure a link (and its FIFO instance) 1200 */ 1201static int 1202config_link(struct dn_link *p, struct dn_id *arg) 1203{ 1204 int i; 1205 1206 if (p->oid.len != sizeof(*p)) { 1207 D("invalid pipe len %d", p->oid.len); 1208 return EINVAL; 1209 } 1210 i = p->link_nr; 1211 if (i <= 0 || i >= DN_MAX_ID) 1212 return EINVAL; 1213 /* 1214 * The config program passes parameters as follows: 1215 * bw = bits/second (0 means no limits), 1216 * delay = ms, must be translated into ticks. 1217 * qsize = slots/bytes 1218 * burst ??? 1219 */ 1220 p->delay = (p->delay * hz) / 1000; 1221 /* Scale burst size: bytes -> bits * hz */ 1222 p->burst *= 8 * hz; 1223 1224 DN_BH_WLOCK(); 1225 /* do it twice, base link and FIFO link */ 1226 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1227 struct dn_schk *s = locate_scheduler(i); 1228 if (s == NULL) { 1229 DN_BH_WUNLOCK(); 1230 D("sched %d not found", i); 1231 return EINVAL; 1232 } 1233 /* remove profile if exists */ 1234 if (s->profile) { 1235 free(s->profile, M_DUMMYNET); 1236 s->profile = NULL; 1237 } 1238 /* copy all parameters */ 1239 s->link.oid = p->oid; 1240 s->link.link_nr = i; 1241 s->link.delay = p->delay; 1242 if (s->link.bandwidth != p->bandwidth) { 1243 /* XXX bandwidth changes, need to update red params */ 1244 s->link.bandwidth = p->bandwidth; 1245 update_red(s); 1246 } 1247 s->link.burst = p->burst; 1248 schk_reset_credit(s); 1249 } 1250 dn_cfg.id++; 1251 DN_BH_WUNLOCK(); 1252 return 0; 1253} 1254 1255/* 1256 * configure a flowset. Can be called from inside with locked=1, 1257 */ 1258static struct dn_fsk * 1259config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) 1260{ 1261 int i; 1262 struct dn_fsk *fs; 1263 1264 if (nfs->oid.len != sizeof(*nfs)) { 1265 D("invalid flowset len %d", nfs->oid.len); 1266 return NULL; 1267 } 1268 i = nfs->fs_nr; 1269 if (i <= 0 || i >= 3*DN_MAX_ID) 1270 return NULL; 1271 ND("flowset %d", i); 1272 /* XXX other sanity checks */ 1273 if (nfs->flags & DN_QSIZE_BYTES) { 1274 ipdn_bound_var(&nfs->qsize, 16384, 1275 1500, dn_cfg.byte_limit, NULL); // "queue byte size"); 1276 } else { 1277 ipdn_bound_var(&nfs->qsize, 50, 1278 1, dn_cfg.slot_limit, NULL); // "queue slot size"); 1279 } 1280 if (nfs->flags & DN_HAVE_MASK) { 1281 /* make sure we have some buckets */ 1282 ipdn_bound_var(&nfs->buckets, dn_cfg.hash_size, 1283 1, dn_cfg.max_hash_size, "flowset buckets"); 1284 } else { 1285 nfs->buckets = 1; /* we only need 1 */ 1286 } 1287 if (!locked) 1288 DN_BH_WLOCK(); 1289 do { /* exit with break when done */ 1290 struct dn_schk *s; 1291 int flags = nfs->sched_nr ? DNHT_INSERT : 0; 1292 int j; 1293 int oldc = dn_cfg.fsk_count; 1294 fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL); 1295 if (fs == NULL) { 1296 D("missing sched for flowset %d", i); 1297 break; 1298 } 1299 /* grab some defaults from the existing one */ 1300 if (nfs->sched_nr == 0) /* reuse */ 1301 nfs->sched_nr = fs->fs.sched_nr; 1302 for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) { 1303 if (nfs->par[j] == -1) /* reuse */ 1304 nfs->par[j] = fs->fs.par[j]; 1305 } 1306 if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) { 1307 ND("flowset %d unchanged", i); 1308 break; /* no change, nothing to do */ 1309 } 1310 if (oldc != dn_cfg.fsk_count) /* new item */ 1311 dn_cfg.id++; 1312 s = locate_scheduler(nfs->sched_nr); 1313 /* detach from old scheduler if needed, preserving 1314 * queues if we need to reattach. Then update the 1315 * configuration, and possibly attach to the new sched. 1316 */ 1317 DX(2, "fs %d changed sched %d@%p to %d@%p", 1318 fs->fs.fs_nr, 1319 fs->fs.sched_nr, fs->sched, nfs->sched_nr, s); 1320 if (fs->sched) { 1321 int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY); 1322 flags |= DN_DESTROY; /* XXX temporary */ 1323 fsk_detach(fs, flags); 1324 } 1325 fs->fs = *nfs; /* copy configuration */ 1326 if (s != NULL) 1327 fsk_attach(fs, s); 1328 } while (0); 1329 if (!locked) 1330 DN_BH_WUNLOCK(); 1331 return fs; 1332} 1333 1334/* 1335 * config/reconfig a scheduler and its FIFO variant. 1336 * For !MULTIQUEUE schedulers, also set up the flowset. 1337 * 1338 * On reconfigurations (detected because s->fp is set), 1339 * detach existing flowsets preserving traffic, preserve link, 1340 * and delete the old scheduler creating a new one. 1341 */ 1342static int 1343config_sched(struct dn_sch *_nsch, struct dn_id *arg) 1344{ 1345 struct dn_schk *s; 1346 struct schk_new_arg a; /* argument for schk_new */ 1347 int i; 1348 struct dn_link p; /* copy of oldlink */ 1349 struct dn_profile *pf = NULL; /* copy of old link profile */ 1350 /* Used to preserv mask parameter */ 1351 struct ipfw_flow_id new_mask; 1352 int new_buckets = 0; 1353 int new_flags = 0; 1354 int pipe_cmd; 1355 int err = ENOMEM; 1356 1357 a.sch = _nsch; 1358 if (a.sch->oid.len != sizeof(*a.sch)) { 1359 D("bad sched len %d", a.sch->oid.len); 1360 return EINVAL; 1361 } 1362 i = a.sch->sched_nr; 1363 if (i <= 0 || i >= DN_MAX_ID) 1364 return EINVAL; 1365 /* make sure we have some buckets */ 1366 if (a.sch->flags & DN_HAVE_MASK) 1367 ipdn_bound_var(&a.sch->buckets, dn_cfg.hash_size, 1368 1, dn_cfg.max_hash_size, "sched buckets"); 1369 /* XXX other sanity checks */ 1370 bzero(&p, sizeof(p)); 1371 1372 pipe_cmd = a.sch->flags & DN_PIPE_CMD; 1373 a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set? 1374 if (pipe_cmd) { 1375 /* Copy mask parameter */ 1376 new_mask = a.sch->sched_mask; 1377 new_buckets = a.sch->buckets; 1378 new_flags = a.sch->flags; 1379 } 1380 DN_BH_WLOCK(); 1381again: /* run twice, for wfq and fifo */ 1382 /* 1383 * lookup the type. If not supplied, use the previous one 1384 * or default to WF2Q+. Otherwise, return an error. 1385 */ 1386 dn_cfg.id++; 1387 a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name); 1388 if (a.fp != NULL) { 1389 /* found. Lookup or create entry */ 1390 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a); 1391 } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) { 1392 /* No type. search existing s* or retry with WF2Q+ */ 1393 s = dn_ht_find(dn_cfg.schedhash, i, 0, &a); 1394 if (s != NULL) { 1395 a.fp = s->fp; 1396 /* Scheduler exists, skip to FIFO scheduler 1397 * if command was pipe config... 1398 */ 1399 if (pipe_cmd) 1400 goto next; 1401 } else { 1402 /* New scheduler, create a wf2q+ with no mask 1403 * if command was pipe config... 1404 */ 1405 if (pipe_cmd) { 1406 /* clear mask parameter */ 1407 bzero(&a.sch->sched_mask, sizeof(new_mask)); 1408 a.sch->buckets = 0; 1409 a.sch->flags &= ~DN_HAVE_MASK; 1410 } 1411 a.sch->oid.subtype = DN_SCHED_WF2QP; 1412 goto again; 1413 } 1414 } else { 1415 D("invalid scheduler type %d %s", 1416 a.sch->oid.subtype, a.sch->name); 1417 err = EINVAL; 1418 goto error; 1419 } 1420 /* normalize name and subtype */ 1421 a.sch->oid.subtype = a.fp->type; 1422 bzero(a.sch->name, sizeof(a.sch->name)); 1423 strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name)); 1424 if (s == NULL) { 1425 D("cannot allocate scheduler %d", i); 1426 goto error; 1427 } 1428 /* restore existing link if any */ 1429 if (p.link_nr) { 1430 s->link = p; 1431 if (!pf || pf->link_nr != p.link_nr) { /* no saved value */ 1432 s->profile = NULL; /* XXX maybe not needed */ 1433 } else { 1434 s->profile = malloc(sizeof(struct dn_profile), 1435 M_DUMMYNET, M_NOWAIT | M_ZERO); 1436 if (s->profile == NULL) { 1437 D("cannot allocate profile"); 1438 goto error; //XXX 1439 } 1440 bcopy(pf, s->profile, sizeof(*pf)); 1441 } 1442 } 1443 p.link_nr = 0; 1444 if (s->fp == NULL) { 1445 DX(2, "sched %d new type %s", i, a.fp->name); 1446 } else if (s->fp != a.fp || 1447 bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) { 1448 /* already existing. */ 1449 DX(2, "sched %d type changed from %s to %s", 1450 i, s->fp->name, a.fp->name); 1451 DX(4, " type/sub %d/%d -> %d/%d", 1452 s->sch.oid.type, s->sch.oid.subtype, 1453 a.sch->oid.type, a.sch->oid.subtype); 1454 if (s->link.link_nr == 0) 1455 D("XXX WARNING link 0 for sched %d", i); 1456 p = s->link; /* preserve link */ 1457 if (s->profile) {/* preserve profile */ 1458 if (!pf) 1459 pf = malloc(sizeof(*pf), 1460 M_DUMMYNET, M_NOWAIT | M_ZERO); 1461 if (pf) /* XXX should issue a warning otherwise */ 1462 bcopy(s->profile, pf, sizeof(*pf)); 1463 } 1464 /* remove from the hash */ 1465 dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 1466 /* Detach flowsets, preserve queues. */ 1467 // schk_delete_cb(s, NULL); 1468 // XXX temporarily, kill queues 1469 schk_delete_cb(s, (void *)DN_DESTROY); 1470 goto again; 1471 } else { 1472 DX(4, "sched %d unchanged type %s", i, a.fp->name); 1473 } 1474 /* complete initialization */ 1475 s->sch = *a.sch; 1476 s->fp = a.fp; 1477 s->cfg = arg; 1478 // XXX schk_reset_credit(s); 1479 /* create the internal flowset if needed, 1480 * trying to reuse existing ones if available 1481 */ 1482 if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) { 1483 s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL); 1484 if (!s->fs) { 1485 struct dn_fs fs; 1486 bzero(&fs, sizeof(fs)); 1487 set_oid(&fs.oid, DN_FS, sizeof(fs)); 1488 fs.fs_nr = i + DN_MAX_ID; 1489 fs.sched_nr = i; 1490 s->fs = config_fs(&fs, NULL, 1 /* locked */); 1491 } 1492 if (!s->fs) { 1493 schk_delete_cb(s, (void *)DN_DESTROY); 1494 D("error creating internal fs for %d", i); 1495 goto error; 1496 } 1497 } 1498 /* call init function after the flowset is created */ 1499 if (s->fp->config) 1500 s->fp->config(s); 1501 update_fs(s); 1502next: 1503 if (i < DN_MAX_ID) { /* now configure the FIFO instance */ 1504 i += DN_MAX_ID; 1505 if (pipe_cmd) { 1506 /* Restore mask parameter for FIFO */ 1507 a.sch->sched_mask = new_mask; 1508 a.sch->buckets = new_buckets; 1509 a.sch->flags = new_flags; 1510 } else { 1511 /* sched config shouldn't modify the FIFO scheduler */ 1512 if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) { 1513 /* FIFO already exist, don't touch it */ 1514 err = 0; /* and this is not an error */ 1515 goto error; 1516 } 1517 } 1518 a.sch->sched_nr = i; 1519 a.sch->oid.subtype = DN_SCHED_FIFO; 1520 bzero(a.sch->name, sizeof(a.sch->name)); 1521 goto again; 1522 } 1523 err = 0; 1524error: 1525 DN_BH_WUNLOCK(); 1526 if (pf) 1527 free(pf, M_DUMMYNET); 1528 return err; 1529} 1530 1531/* 1532 * attach a profile to a link 1533 */ 1534static int 1535config_profile(struct dn_profile *pf, struct dn_id *arg) 1536{ 1537 struct dn_schk *s; 1538 int i, olen, err = 0; 1539 1540 if (pf->oid.len < sizeof(*pf)) { 1541 D("short profile len %d", pf->oid.len); 1542 return EINVAL; 1543 } 1544 i = pf->link_nr; 1545 if (i <= 0 || i >= DN_MAX_ID) 1546 return EINVAL; 1547 /* XXX other sanity checks */ 1548 DN_BH_WLOCK(); 1549 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
| 30 31/* 32 * Configuration and internal object management for dummynet. 33 */ 34 35#include "opt_inet6.h" 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/malloc.h> 40#include <sys/mbuf.h> 41#include <sys/kernel.h> 42#include <sys/lock.h> 43#include <sys/module.h> 44#include <sys/priv.h> 45#include <sys/proc.h> 46#include <sys/rwlock.h> 47#include <sys/socket.h> 48#include <sys/socketvar.h> 49#include <sys/time.h> 50#include <sys/taskqueue.h> 51#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ 52#include <netinet/in.h> 53#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ 54#include <netinet/ip_fw.h> 55#include <netinet/ipfw/ip_fw_private.h> 56#include <netinet/ipfw/dn_heap.h> 57#include <netinet/ip_dummynet.h> 58#include <netinet/ipfw/ip_dn_private.h> 59#include <netinet/ipfw/dn_sched.h> 60 61/* which objects to copy */ 62#define DN_C_LINK 0x01 63#define DN_C_SCH 0x02 64#define DN_C_FLOW 0x04 65#define DN_C_FS 0x08 66#define DN_C_QUEUE 0x10 67 68/* we use this argument in case of a schk_new */ 69struct schk_new_arg { 70 struct dn_alg *fp; 71 struct dn_sch *sch; 72}; 73 74/*---- callout hooks. ----*/ 75static struct callout dn_timeout; 76static struct task dn_task; 77static struct taskqueue *dn_tq = NULL; 78 79static void 80dummynet(void * __unused unused) 81{ 82 83 taskqueue_enqueue(dn_tq, &dn_task); 84} 85 86void 87dn_reschedule(void) 88{ 89 callout_reset(&dn_timeout, 1, dummynet, NULL); 90} 91/*----- end of callout hooks -----*/ 92 93/* Return a scheduler descriptor given the type or name. */ 94static struct dn_alg * 95find_sched_type(int type, char *name) 96{ 97 struct dn_alg *d; 98 99 SLIST_FOREACH(d, &dn_cfg.schedlist, next) { 100 if (d->type == type || (name && !strcmp(d->name, name))) 101 return d; 102 } 103 return NULL; /* not found */ 104} 105 106int 107ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg) 108{ 109 int oldv = *v; 110 const char *op = NULL; 111 if (oldv < lo) { 112 *v = dflt; 113 op = "Bump"; 114 } else if (oldv > hi) { 115 *v = hi; 116 op = "Clamp"; 117 } else 118 return *v; 119 if (op && msg) 120 printf("%s %s to %d (was %d)\n", op, msg, *v, oldv); 121 return *v; 122} 123 124/*---- flow_id mask, hash and compare functions ---*/ 125/* 126 * The flow_id includes the 5-tuple, the queue/pipe number 127 * which we store in the extra area in host order, 128 * and for ipv6 also the flow_id6. 129 * XXX see if we want the tos byte (can store in 'flags') 130 */ 131static struct ipfw_flow_id * 132flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id) 133{ 134 int is_v6 = IS_IP6_FLOW_ID(id); 135 136 id->dst_port &= mask->dst_port; 137 id->src_port &= mask->src_port; 138 id->proto &= mask->proto; 139 id->extra &= mask->extra; 140 if (is_v6) { 141 APPLY_MASK(&id->dst_ip6, &mask->dst_ip6); 142 APPLY_MASK(&id->src_ip6, &mask->src_ip6); 143 id->flow_id6 &= mask->flow_id6; 144 } else { 145 id->dst_ip &= mask->dst_ip; 146 id->src_ip &= mask->src_ip; 147 } 148 return id; 149} 150 151/* computes an OR of two masks, result in dst and also returned */ 152static struct ipfw_flow_id * 153flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst) 154{ 155 int is_v6 = IS_IP6_FLOW_ID(dst); 156 157 dst->dst_port |= src->dst_port; 158 dst->src_port |= src->src_port; 159 dst->proto |= src->proto; 160 dst->extra |= src->extra; 161 if (is_v6) { 162#define OR_MASK(_d, _s) \ 163 (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \ 164 (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \ 165 (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \ 166 (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3]; 167 OR_MASK(&dst->dst_ip6, &src->dst_ip6); 168 OR_MASK(&dst->src_ip6, &src->src_ip6); 169#undef OR_MASK 170 dst->flow_id6 |= src->flow_id6; 171 } else { 172 dst->dst_ip |= src->dst_ip; 173 dst->src_ip |= src->src_ip; 174 } 175 return dst; 176} 177 178static int 179nonzero_mask(struct ipfw_flow_id *m) 180{ 181 if (m->dst_port || m->src_port || m->proto || m->extra) 182 return 1; 183 if (IS_IP6_FLOW_ID(m)) { 184 return 185 m->dst_ip6.__u6_addr.__u6_addr32[0] || 186 m->dst_ip6.__u6_addr.__u6_addr32[1] || 187 m->dst_ip6.__u6_addr.__u6_addr32[2] || 188 m->dst_ip6.__u6_addr.__u6_addr32[3] || 189 m->src_ip6.__u6_addr.__u6_addr32[0] || 190 m->src_ip6.__u6_addr.__u6_addr32[1] || 191 m->src_ip6.__u6_addr.__u6_addr32[2] || 192 m->src_ip6.__u6_addr.__u6_addr32[3] || 193 m->flow_id6; 194 } else { 195 return m->dst_ip || m->src_ip; 196 } 197} 198 199/* XXX we may want a better hash function */ 200static uint32_t 201flow_id_hash(struct ipfw_flow_id *id) 202{ 203 uint32_t i; 204 205 if (IS_IP6_FLOW_ID(id)) { 206 uint32_t *d = (uint32_t *)&id->dst_ip6; 207 uint32_t *s = (uint32_t *)&id->src_ip6; 208 i = (d[0] ) ^ (d[1]) ^ 209 (d[2] ) ^ (d[3]) ^ 210 (d[0] >> 15) ^ (d[1] >> 15) ^ 211 (d[2] >> 15) ^ (d[3] >> 15) ^ 212 (s[0] << 1) ^ (s[1] << 1) ^ 213 (s[2] << 1) ^ (s[3] << 1) ^ 214 (s[0] << 16) ^ (s[1] << 16) ^ 215 (s[2] << 16) ^ (s[3] << 16) ^ 216 (id->dst_port << 1) ^ (id->src_port) ^ 217 (id->extra) ^ 218 (id->proto ) ^ (id->flow_id6); 219 } else { 220 i = (id->dst_ip) ^ (id->dst_ip >> 15) ^ 221 (id->src_ip << 1) ^ (id->src_ip >> 16) ^ 222 (id->extra) ^ 223 (id->dst_port << 1) ^ (id->src_port) ^ (id->proto); 224 } 225 return i; 226} 227 228/* Like bcmp, returns 0 if ids match, 1 otherwise. */ 229static int 230flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) 231{ 232 int is_v6 = IS_IP6_FLOW_ID(id1); 233 234 if (!is_v6) { 235 if (IS_IP6_FLOW_ID(id2)) 236 return 1; /* different address families */ 237 238 return (id1->dst_ip == id2->dst_ip && 239 id1->src_ip == id2->src_ip && 240 id1->dst_port == id2->dst_port && 241 id1->src_port == id2->src_port && 242 id1->proto == id2->proto && 243 id1->extra == id2->extra) ? 0 : 1; 244 } 245 /* the ipv6 case */ 246 return ( 247 !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) && 248 !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) && 249 id1->dst_port == id2->dst_port && 250 id1->src_port == id2->src_port && 251 id1->proto == id2->proto && 252 id1->extra == id2->extra && 253 id1->flow_id6 == id2->flow_id6) ? 0 : 1; 254} 255/*--------- end of flow-id mask, hash and compare ---------*/ 256 257/*--- support functions for the qht hashtable ---- 258 * Entries are hashed by flow-id 259 */ 260static uint32_t 261q_hash(uintptr_t key, int flags, void *arg) 262{ 263 /* compute the hash slot from the flow id */ 264 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 265 &((struct dn_queue *)key)->ni.fid : 266 (struct ipfw_flow_id *)key; 267 268 return flow_id_hash(id); 269} 270 271static int 272q_match(void *obj, uintptr_t key, int flags, void *arg) 273{ 274 struct dn_queue *o = (struct dn_queue *)obj; 275 struct ipfw_flow_id *id2; 276 277 if (flags & DNHT_KEY_IS_OBJ) { 278 /* compare pointers */ 279 id2 = &((struct dn_queue *)key)->ni.fid; 280 } else { 281 id2 = (struct ipfw_flow_id *)key; 282 } 283 return (0 == flow_id_cmp(&o->ni.fid, id2)); 284} 285 286/* 287 * create a new queue instance for the given 'key'. 288 */ 289static void * 290q_new(uintptr_t key, int flags, void *arg) 291{ 292 struct dn_queue *q, *template = arg; 293 struct dn_fsk *fs = template->fs; 294 int size = sizeof(*q) + fs->sched->fp->q_datalen; 295 296 q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO); 297 if (q == NULL) { 298 D("no memory for new queue"); 299 return NULL; 300 } 301 302 set_oid(&q->ni.oid, DN_QUEUE, size); 303 if (fs->fs.flags & DN_QHT_HASH) 304 q->ni.fid = *(struct ipfw_flow_id *)key; 305 q->fs = fs; 306 q->_si = template->_si; 307 q->_si->q_count++; 308 309 if (fs->sched->fp->new_queue) 310 fs->sched->fp->new_queue(q); 311 dn_cfg.queue_count++; 312 return q; 313} 314 315/* 316 * Notify schedulers that a queue is going away. 317 * If (flags & DN_DESTROY), also free the packets. 318 * The version for callbacks is called q_delete_cb(). 319 */ 320static void 321dn_delete_queue(struct dn_queue *q, int flags) 322{ 323 struct dn_fsk *fs = q->fs; 324 325 // D("fs %p si %p\n", fs, q->_si); 326 /* notify the parent scheduler that the queue is going away */ 327 if (fs && fs->sched->fp->free_queue) 328 fs->sched->fp->free_queue(q); 329 q->_si->q_count--; 330 q->_si = NULL; 331 if (flags & DN_DESTROY) { 332 if (q->mq.head) 333 dn_free_pkts(q->mq.head); 334 bzero(q, sizeof(*q)); // safety 335 free(q, M_DUMMYNET); 336 dn_cfg.queue_count--; 337 } 338} 339 340static int 341q_delete_cb(void *q, void *arg) 342{ 343 int flags = (int)(uintptr_t)arg; 344 dn_delete_queue(q, flags); 345 return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0; 346} 347 348/* 349 * calls dn_delete_queue/q_delete_cb on all queues, 350 * which notifies the parent scheduler and possibly drains packets. 351 * flags & DN_DESTROY: drains queues and destroy qht; 352 */ 353static void 354qht_delete(struct dn_fsk *fs, int flags) 355{ 356 ND("fs %d start flags %d qht %p", 357 fs->fs.fs_nr, flags, fs->qht); 358 if (!fs->qht) 359 return; 360 if (fs->fs.flags & DN_QHT_HASH) { 361 dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags); 362 if (flags & DN_DESTROY) { 363 dn_ht_free(fs->qht, 0); 364 fs->qht = NULL; 365 } 366 } else { 367 dn_delete_queue((struct dn_queue *)(fs->qht), flags); 368 if (flags & DN_DESTROY) 369 fs->qht = NULL; 370 } 371} 372 373/* 374 * Find and possibly create the queue for a MULTIQUEUE scheduler. 375 * We never call it for !MULTIQUEUE (the queue is in the sch_inst). 376 */ 377struct dn_queue * 378ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si, 379 struct ipfw_flow_id *id) 380{ 381 struct dn_queue template; 382 383 template._si = si; 384 template.fs = fs; 385 386 if (fs->fs.flags & DN_QHT_HASH) { 387 struct ipfw_flow_id masked_id; 388 if (fs->qht == NULL) { 389 fs->qht = dn_ht_init(NULL, fs->fs.buckets, 390 offsetof(struct dn_queue, q_next), 391 q_hash, q_match, q_new); 392 if (fs->qht == NULL) 393 return NULL; 394 } 395 masked_id = *id; 396 flow_id_mask(&fs->fsk_mask, &masked_id); 397 return dn_ht_find(fs->qht, (uintptr_t)&masked_id, 398 DNHT_INSERT, &template); 399 } else { 400 if (fs->qht == NULL) 401 fs->qht = q_new(0, 0, &template); 402 return (struct dn_queue *)fs->qht; 403 } 404} 405/*--- end of queue hash table ---*/ 406 407/*--- support functions for the sch_inst hashtable ---- 408 * 409 * These are hashed by flow-id 410 */ 411static uint32_t 412si_hash(uintptr_t key, int flags, void *arg) 413{ 414 /* compute the hash slot from the flow id */ 415 struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ? 416 &((struct dn_sch_inst *)key)->ni.fid : 417 (struct ipfw_flow_id *)key; 418 419 return flow_id_hash(id); 420} 421 422static int 423si_match(void *obj, uintptr_t key, int flags, void *arg) 424{ 425 struct dn_sch_inst *o = obj; 426 struct ipfw_flow_id *id2; 427 428 id2 = (flags & DNHT_KEY_IS_OBJ) ? 429 &((struct dn_sch_inst *)key)->ni.fid : 430 (struct ipfw_flow_id *)key; 431 return flow_id_cmp(&o->ni.fid, id2) == 0; 432} 433 434/* 435 * create a new instance for the given 'key' 436 * Allocate memory for instance, delay line and scheduler private data. 437 */ 438static void * 439si_new(uintptr_t key, int flags, void *arg) 440{ 441 struct dn_schk *s = arg; 442 struct dn_sch_inst *si; 443 int l = sizeof(*si) + s->fp->si_datalen; 444 445 si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 446 if (si == NULL) 447 goto error; 448 /* Set length only for the part passed up to userland. */ 449 set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow)); 450 set_oid(&(si->dline.oid), DN_DELAY_LINE, 451 sizeof(struct delay_line)); 452 /* mark si and dline as outside the event queue */ 453 si->ni.oid.id = si->dline.oid.id = -1; 454 455 si->sched = s; 456 si->dline.si = si; 457 458 if (s->fp->new_sched && s->fp->new_sched(si)) { 459 D("new_sched error"); 460 goto error; 461 } 462 if (s->sch.flags & DN_HAVE_MASK) 463 si->ni.fid = *(struct ipfw_flow_id *)key; 464 465 dn_cfg.si_count++; 466 return si; 467 468error: 469 if (si) { 470 bzero(si, sizeof(*si)); // safety 471 free(si, M_DUMMYNET); 472 } 473 return NULL; 474} 475 476/* 477 * Callback from siht to delete all scheduler instances. Remove 478 * si and delay line from the system heap, destroy all queues. 479 * We assume that all flowset have been notified and do not 480 * point to us anymore. 481 */ 482static int 483si_destroy(void *_si, void *arg) 484{ 485 struct dn_sch_inst *si = _si; 486 struct dn_schk *s = si->sched; 487 struct delay_line *dl = &si->dline; 488 489 if (dl->oid.subtype) /* remove delay line from event heap */ 490 heap_extract(&dn_cfg.evheap, dl); 491 dn_free_pkts(dl->mq.head); /* drain delay line */ 492 if (si->kflags & DN_ACTIVE) /* remove si from event heap */ 493 heap_extract(&dn_cfg.evheap, si); 494 if (s->fp->free_sched) 495 s->fp->free_sched(si); 496 bzero(si, sizeof(*si)); /* safety */ 497 free(si, M_DUMMYNET); 498 dn_cfg.si_count--; 499 return DNHT_SCAN_DEL; 500} 501 502/* 503 * Find the scheduler instance for this packet. If we need to apply 504 * a mask, do on a local copy of the flow_id to preserve the original. 505 * Assume siht is always initialized if we have a mask. 506 */ 507struct dn_sch_inst * 508ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id) 509{ 510 511 if (s->sch.flags & DN_HAVE_MASK) { 512 struct ipfw_flow_id id_t = *id; 513 flow_id_mask(&s->sch.sched_mask, &id_t); 514 return dn_ht_find(s->siht, (uintptr_t)&id_t, 515 DNHT_INSERT, s); 516 } 517 if (!s->siht) 518 s->siht = si_new(0, 0, s); 519 return (struct dn_sch_inst *)s->siht; 520} 521 522/* callback to flush credit for the scheduler instance */ 523static int 524si_reset_credit(void *_si, void *arg) 525{ 526 struct dn_sch_inst *si = _si; 527 struct dn_link *p = &si->sched->link; 528 529 si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0); 530 return 0; 531} 532 533static void 534schk_reset_credit(struct dn_schk *s) 535{ 536 if (s->sch.flags & DN_HAVE_MASK) 537 dn_ht_scan(s->siht, si_reset_credit, NULL); 538 else if (s->siht) 539 si_reset_credit(s->siht, NULL); 540} 541/*---- end of sch_inst hashtable ---------------------*/ 542 543/*------------------------------------------------------- 544 * flowset hash (fshash) support. Entries are hashed by fs_nr. 545 * New allocations are put in the fsunlinked list, from which 546 * they are removed when they point to a specific scheduler. 547 */ 548static uint32_t 549fsk_hash(uintptr_t key, int flags, void *arg) 550{ 551 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 552 ((struct dn_fsk *)key)->fs.fs_nr; 553 554 return ( (i>>8)^(i>>4)^i ); 555} 556 557static int 558fsk_match(void *obj, uintptr_t key, int flags, void *arg) 559{ 560 struct dn_fsk *fs = obj; 561 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 562 ((struct dn_fsk *)key)->fs.fs_nr; 563 564 return (fs->fs.fs_nr == i); 565} 566 567static void * 568fsk_new(uintptr_t key, int flags, void *arg) 569{ 570 struct dn_fsk *fs; 571 572 fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO); 573 if (fs) { 574 set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs)); 575 dn_cfg.fsk_count++; 576 fs->drain_bucket = 0; 577 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 578 } 579 return fs; 580} 581 582/* 583 * detach flowset from its current scheduler. Flags as follows: 584 * DN_DETACH removes from the fsk_list 585 * DN_DESTROY deletes individual queues 586 * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked). 587 */ 588static void 589fsk_detach(struct dn_fsk *fs, int flags) 590{ 591 if (flags & DN_DELETE_FS) 592 flags |= DN_DESTROY; 593 ND("fs %d from sched %d flags %s %s %s", 594 fs->fs.fs_nr, fs->fs.sched_nr, 595 (flags & DN_DELETE_FS) ? "DEL_FS":"", 596 (flags & DN_DESTROY) ? "DEL":"", 597 (flags & DN_DETACH) ? "DET":""); 598 if (flags & DN_DETACH) { /* detach from the list */ 599 struct dn_fsk_head *h; 600 h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu; 601 SLIST_REMOVE(h, fs, dn_fsk, sch_chain); 602 } 603 /* Free the RED parameters, they will be recomputed on 604 * subsequent attach if needed. 605 */ 606 if (fs->w_q_lookup) 607 free(fs->w_q_lookup, M_DUMMYNET); 608 fs->w_q_lookup = NULL; 609 qht_delete(fs, flags); 610 if (fs->sched && fs->sched->fp->free_fsk) 611 fs->sched->fp->free_fsk(fs); 612 fs->sched = NULL; 613 if (flags & DN_DELETE_FS) { 614 bzero(fs, sizeof(fs)); /* safety */ 615 free(fs, M_DUMMYNET); 616 dn_cfg.fsk_count--; 617 } else { 618 SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain); 619 } 620} 621 622/* 623 * Detach or destroy all flowsets in a list. 624 * flags specifies what to do: 625 * DN_DESTROY: flush all queues 626 * DN_DELETE_FS: DN_DESTROY + destroy flowset 627 * DN_DELETE_FS implies DN_DESTROY 628 */ 629static void 630fsk_detach_list(struct dn_fsk_head *h, int flags) 631{ 632 struct dn_fsk *fs; 633 int n = 0; /* only for stats */ 634 635 ND("head %p flags %x", h, flags); 636 while ((fs = SLIST_FIRST(h))) { 637 SLIST_REMOVE_HEAD(h, sch_chain); 638 n++; 639 fsk_detach(fs, flags); 640 } 641 ND("done %d flowsets", n); 642} 643 644/* 645 * called on 'queue X delete' -- removes the flowset from fshash, 646 * deletes all queues for the flowset, and removes the flowset. 647 */ 648static int 649delete_fs(int i, int locked) 650{ 651 struct dn_fsk *fs; 652 int err = 0; 653 654 if (!locked) 655 DN_BH_WLOCK(); 656 fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL); 657 ND("fs %d found %p", i, fs); 658 if (fs) { 659 fsk_detach(fs, DN_DETACH | DN_DELETE_FS); 660 err = 0; 661 } else 662 err = EINVAL; 663 if (!locked) 664 DN_BH_WUNLOCK(); 665 return err; 666} 667 668/*----- end of flowset hashtable support -------------*/ 669 670/*------------------------------------------------------------ 671 * Scheduler hash. When searching by index we pass sched_nr, 672 * otherwise we pass struct dn_sch * which is the first field in 673 * struct dn_schk so we can cast between the two. We use this trick 674 * because in the create phase (but it should be fixed). 675 */ 676static uint32_t 677schk_hash(uintptr_t key, int flags, void *_arg) 678{ 679 uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key : 680 ((struct dn_schk *)key)->sch.sched_nr; 681 return ( (i>>8)^(i>>4)^i ); 682} 683 684static int 685schk_match(void *obj, uintptr_t key, int flags, void *_arg) 686{ 687 struct dn_schk *s = (struct dn_schk *)obj; 688 int i = !(flags & DNHT_KEY_IS_OBJ) ? key : 689 ((struct dn_schk *)key)->sch.sched_nr; 690 return (s->sch.sched_nr == i); 691} 692 693/* 694 * Create the entry and intialize with the sched hash if needed. 695 * Leave s->fp unset so we can tell whether a dn_ht_find() returns 696 * a new object or a previously existing one. 697 */ 698static void * 699schk_new(uintptr_t key, int flags, void *arg) 700{ 701 struct schk_new_arg *a = arg; 702 struct dn_schk *s; 703 int l = sizeof(*s) +a->fp->schk_datalen; 704 705 s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); 706 if (s == NULL) 707 return NULL; 708 set_oid(&s->link.oid, DN_LINK, sizeof(s->link)); 709 s->sch = *a->sch; // copy initial values 710 s->link.link_nr = s->sch.sched_nr; 711 SLIST_INIT(&s->fsk_list); 712 /* initialize the hash table or create the single instance */ 713 s->fp = a->fp; /* si_new needs this */ 714 s->drain_bucket = 0; 715 if (s->sch.flags & DN_HAVE_MASK) { 716 s->siht = dn_ht_init(NULL, s->sch.buckets, 717 offsetof(struct dn_sch_inst, si_next), 718 si_hash, si_match, si_new); 719 if (s->siht == NULL) { 720 free(s, M_DUMMYNET); 721 return NULL; 722 } 723 } 724 s->fp = NULL; /* mark as a new scheduler */ 725 dn_cfg.schk_count++; 726 return s; 727} 728 729/* 730 * Callback for sched delete. Notify all attached flowsets to 731 * detach from the scheduler, destroy the internal flowset, and 732 * all instances. The scheduler goes away too. 733 * arg is 0 (only detach flowsets and destroy instances) 734 * DN_DESTROY (detach & delete queues, delete schk) 735 * or DN_DELETE_FS (delete queues and flowsets, delete schk) 736 */ 737static int 738schk_delete_cb(void *obj, void *arg) 739{ 740 struct dn_schk *s = obj; 741#if 0 742 int a = (int)arg; 743 ND("sched %d arg %s%s", 744 s->sch.sched_nr, 745 a&DN_DESTROY ? "DEL ":"", 746 a&DN_DELETE_FS ? "DEL_FS":""); 747#endif 748 fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0); 749 /* no more flowset pointing to us now */ 750 if (s->sch.flags & DN_HAVE_MASK) 751 dn_ht_scan(s->siht, si_destroy, NULL); 752 else if (s->siht) 753 si_destroy(s->siht, NULL); 754 if (s->profile) { 755 free(s->profile, M_DUMMYNET); 756 s->profile = NULL; 757 } 758 s->siht = NULL; 759 if (s->fp->destroy) 760 s->fp->destroy(s); 761 bzero(s, sizeof(*s)); // safety 762 free(obj, M_DUMMYNET); 763 dn_cfg.schk_count--; 764 return DNHT_SCAN_DEL; 765} 766 767/* 768 * called on a 'sched X delete' command. Deletes a single scheduler. 769 * This is done by removing from the schedhash, unlinking all 770 * flowsets and deleting their traffic. 771 */ 772static int 773delete_schk(int i) 774{ 775 struct dn_schk *s; 776 777 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 778 ND("%d %p", i, s); 779 if (!s) 780 return EINVAL; 781 delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */ 782 /* then detach flowsets, delete traffic */ 783 schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY); 784 return 0; 785} 786/*--- end of schk hashtable support ---*/ 787 788static int 789copy_obj(char **start, char *end, void *_o, const char *msg, int i) 790{ 791 struct dn_id *o = _o; 792 int have = end - *start; 793 794 if (have < o->len || o->len == 0 || o->type == 0) { 795 D("(WARN) type %d %s %d have %d need %d", 796 o->type, msg, i, have, o->len); 797 return 1; 798 } 799 ND("type %d %s %d len %d", o->type, msg, i, o->len); 800 bcopy(_o, *start, o->len); 801 if (o->type == DN_LINK) { 802 /* Adjust burst parameter for link */ 803 struct dn_link *l = (struct dn_link *)*start; 804 l->burst = div64(l->burst, 8 * hz); 805 } else if (o->type == DN_SCH) { 806 /* Set id->id to the number of instances */ 807 struct dn_schk *s = _o; 808 struct dn_id *id = (struct dn_id *)(*start); 809 id->id = (s->sch.flags & DN_HAVE_MASK) ? 810 dn_ht_entries(s->siht) : (s->siht ? 1 : 0); 811 } 812 *start += o->len; 813 return 0; 814} 815 816/* Specific function to copy a queue. 817 * Copies only the user-visible part of a queue (which is in 818 * a struct dn_flow), and sets len accordingly. 819 */ 820static int 821copy_obj_q(char **start, char *end, void *_o, const char *msg, int i) 822{ 823 struct dn_id *o = _o; 824 int have = end - *start; 825 int len = sizeof(struct dn_flow); /* see above comment */ 826 827 if (have < len || o->len == 0 || o->type != DN_QUEUE) { 828 D("ERROR type %d %s %d have %d need %d", 829 o->type, msg, i, have, len); 830 return 1; 831 } 832 ND("type %d %s %d len %d", o->type, msg, i, len); 833 bcopy(_o, *start, len); 834 ((struct dn_id*)(*start))->len = len; 835 *start += len; 836 return 0; 837} 838 839static int 840copy_q_cb(void *obj, void *arg) 841{ 842 struct dn_queue *q = obj; 843 struct copy_args *a = arg; 844 struct dn_flow *ni = (struct dn_flow *)(*a->start); 845 if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1)) 846 return DNHT_SCAN_END; 847 ni->oid.type = DN_FLOW; /* override the DN_QUEUE */ 848 ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL); 849 return 0; 850} 851 852static int 853copy_q(struct copy_args *a, struct dn_fsk *fs, int flags) 854{ 855 if (!fs->qht) 856 return 0; 857 if (fs->fs.flags & DN_QHT_HASH) 858 dn_ht_scan(fs->qht, copy_q_cb, a); 859 else 860 copy_q_cb(fs->qht, a); 861 return 0; 862} 863 864/* 865 * This routine only copies the initial part of a profile ? XXX 866 */ 867static int 868copy_profile(struct copy_args *a, struct dn_profile *p) 869{ 870 int have = a->end - *a->start; 871 /* XXX here we check for max length */ 872 int profile_len = sizeof(struct dn_profile) - 873 ED_MAX_SAMPLES_NO*sizeof(int); 874 875 if (p == NULL) 876 return 0; 877 if (have < profile_len) { 878 D("error have %d need %d", have, profile_len); 879 return 1; 880 } 881 bcopy(p, *a->start, profile_len); 882 ((struct dn_id *)(*a->start))->len = profile_len; 883 *a->start += profile_len; 884 return 0; 885} 886 887static int 888copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags) 889{ 890 struct dn_fs *ufs = (struct dn_fs *)(*a->start); 891 if (!fs) 892 return 0; 893 ND("flowset %d", fs->fs.fs_nr); 894 if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr)) 895 return DNHT_SCAN_END; 896 ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ? 897 dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0); 898 if (flags) { /* copy queues */ 899 copy_q(a, fs, 0); 900 } 901 return 0; 902} 903 904static int 905copy_si_cb(void *obj, void *arg) 906{ 907 struct dn_sch_inst *si = obj; 908 struct copy_args *a = arg; 909 struct dn_flow *ni = (struct dn_flow *)(*a->start); 910 if (copy_obj(a->start, a->end, &si->ni, "inst", 911 si->sched->sch.sched_nr)) 912 return DNHT_SCAN_END; 913 ni->oid.type = DN_FLOW; /* override the DN_SCH_I */ 914 ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL); 915 return 0; 916} 917 918static int 919copy_si(struct copy_args *a, struct dn_schk *s, int flags) 920{ 921 if (s->sch.flags & DN_HAVE_MASK) 922 dn_ht_scan(s->siht, copy_si_cb, a); 923 else if (s->siht) 924 copy_si_cb(s->siht, a); 925 return 0; 926} 927 928/* 929 * compute a list of children of a scheduler and copy up 930 */ 931static int 932copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags) 933{ 934 struct dn_fsk *fs; 935 struct dn_id *o; 936 uint32_t *p; 937 938 int n = 0, space = sizeof(*o); 939 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 940 if (fs->fs.fs_nr < DN_MAX_ID) 941 n++; 942 } 943 space += n * sizeof(uint32_t); 944 DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n); 945 if (a->end - *(a->start) < space) 946 return DNHT_SCAN_END; 947 o = (struct dn_id *)(*(a->start)); 948 o->len = space; 949 *a->start += o->len; 950 o->type = DN_TEXT; 951 p = (uint32_t *)(o+1); 952 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) 953 if (fs->fs.fs_nr < DN_MAX_ID) 954 *p++ = fs->fs.fs_nr; 955 return 0; 956} 957 958static int 959copy_data_helper(void *_o, void *_arg) 960{ 961 struct copy_args *a = _arg; 962 uint32_t *r = a->extra->r; /* start of first range */ 963 uint32_t *lim; /* first invalid pointer */ 964 int n; 965 966 lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len); 967 968 if (a->type == DN_LINK || a->type == DN_SCH) { 969 /* pipe|sched show, we receive a dn_schk */ 970 struct dn_schk *s = _o; 971 972 n = s->sch.sched_nr; 973 if (a->type == DN_SCH && n >= DN_MAX_ID) 974 return 0; /* not a scheduler */ 975 if (a->type == DN_LINK && n <= DN_MAX_ID) 976 return 0; /* not a pipe */ 977 978 /* see if the object is within one of our ranges */ 979 for (;r < lim; r += 2) { 980 if (n < r[0] || n > r[1]) 981 continue; 982 /* Found a valid entry, copy and we are done */ 983 if (a->flags & DN_C_LINK) { 984 if (copy_obj(a->start, a->end, 985 &s->link, "link", n)) 986 return DNHT_SCAN_END; 987 if (copy_profile(a, s->profile)) 988 return DNHT_SCAN_END; 989 if (copy_flowset(a, s->fs, 0)) 990 return DNHT_SCAN_END; 991 } 992 if (a->flags & DN_C_SCH) { 993 if (copy_obj(a->start, a->end, 994 &s->sch, "sched", n)) 995 return DNHT_SCAN_END; 996 /* list all attached flowsets */ 997 if (copy_fsk_list(a, s, 0)) 998 return DNHT_SCAN_END; 999 } 1000 if (a->flags & DN_C_FLOW) 1001 copy_si(a, s, 0); 1002 break; 1003 } 1004 } else if (a->type == DN_FS) { 1005 /* queue show, skip internal flowsets */ 1006 struct dn_fsk *fs = _o; 1007 1008 n = fs->fs.fs_nr; 1009 if (n >= DN_MAX_ID) 1010 return 0; 1011 /* see if the object is within one of our ranges */ 1012 for (;r < lim; r += 2) { 1013 if (n < r[0] || n > r[1]) 1014 continue; 1015 if (copy_flowset(a, fs, 0)) 1016 return DNHT_SCAN_END; 1017 copy_q(a, fs, 0); 1018 break; /* we are done */ 1019 } 1020 } 1021 return 0; 1022} 1023 1024static inline struct dn_schk * 1025locate_scheduler(int i) 1026{ 1027 return dn_ht_find(dn_cfg.schedhash, i, 0, NULL); 1028} 1029 1030/* 1031 * red parameters are in fixed point arithmetic. 1032 */ 1033static int 1034config_red(struct dn_fsk *fs) 1035{ 1036 int64_t s, idle, weight, w0; 1037 int t, i; 1038 1039 fs->w_q = fs->fs.w_q; 1040 fs->max_p = fs->fs.max_p; 1041 D("called"); 1042 /* Doing stuff that was in userland */ 1043 i = fs->sched->link.bandwidth; 1044 s = (i <= 0) ? 0 : 1045 hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i; 1046 1047 idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */ 1048 fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth); 1049 /* fs->lookup_step not scaled, */ 1050 if (!fs->lookup_step) 1051 fs->lookup_step = 1; 1052 w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled 1053 1054 for (t = fs->lookup_step; t > 1; --t) 1055 weight = SCALE_MUL(weight, w0); 1056 fs->lookup_weight = (int)(weight); // scaled 1057 1058 /* Now doing stuff that was in kerneland */ 1059 fs->min_th = SCALE(fs->fs.min_th); 1060 fs->max_th = SCALE(fs->fs.max_th); 1061 1062 fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th); 1063 fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th)); 1064 1065 if (fs->fs.flags & DN_IS_GENTLE_RED) { 1066 fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th; 1067 fs->c_4 = SCALE(1) - 2 * fs->max_p; 1068 } 1069 1070 /* If the lookup table already exist, free and create it again. */ 1071 if (fs->w_q_lookup) { 1072 free(fs->w_q_lookup, M_DUMMYNET); 1073 fs->w_q_lookup = NULL; 1074 } 1075 if (dn_cfg.red_lookup_depth == 0) { 1076 printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" 1077 "must be > 0\n"); 1078 fs->fs.flags &= ~DN_IS_RED; 1079 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1080 return (EINVAL); 1081 } 1082 fs->lookup_depth = dn_cfg.red_lookup_depth; 1083 fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int), 1084 M_DUMMYNET, M_NOWAIT); 1085 if (fs->w_q_lookup == NULL) { 1086 printf("dummynet: sorry, cannot allocate red lookup table\n"); 1087 fs->fs.flags &= ~DN_IS_RED; 1088 fs->fs.flags &= ~DN_IS_GENTLE_RED; 1089 return(ENOSPC); 1090 } 1091 1092 /* Fill the lookup table with (1 - w_q)^x */ 1093 fs->w_q_lookup[0] = SCALE(1) - fs->w_q; 1094 1095 for (i = 1; i < fs->lookup_depth; i++) 1096 fs->w_q_lookup[i] = 1097 SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight); 1098 1099 if (dn_cfg.red_avg_pkt_size < 1) 1100 dn_cfg.red_avg_pkt_size = 512; 1101 fs->avg_pkt_size = dn_cfg.red_avg_pkt_size; 1102 if (dn_cfg.red_max_pkt_size < 1) 1103 dn_cfg.red_max_pkt_size = 1500; 1104 fs->max_pkt_size = dn_cfg.red_max_pkt_size; 1105 D("exit"); 1106 return 0; 1107} 1108 1109/* Scan all flowset attached to this scheduler and update red */ 1110static void 1111update_red(struct dn_schk *s) 1112{ 1113 struct dn_fsk *fs; 1114 SLIST_FOREACH(fs, &s->fsk_list, sch_chain) { 1115 if (fs && (fs->fs.flags & DN_IS_RED)) 1116 config_red(fs); 1117 } 1118} 1119 1120/* attach flowset to scheduler s, possibly requeue */ 1121static void 1122fsk_attach(struct dn_fsk *fs, struct dn_schk *s) 1123{ 1124 ND("remove fs %d from fsunlinked, link to sched %d", 1125 fs->fs.fs_nr, s->sch.sched_nr); 1126 SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain); 1127 fs->sched = s; 1128 SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain); 1129 if (s->fp->new_fsk) 1130 s->fp->new_fsk(fs); 1131 /* XXX compute fsk_mask */ 1132 fs->fsk_mask = fs->fs.flow_mask; 1133 if (fs->sched->sch.flags & DN_HAVE_MASK) 1134 flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask); 1135 if (fs->qht) { 1136 /* 1137 * we must drain qht according to the old 1138 * type, and reinsert according to the new one. 1139 * The requeue is complex -- in general we need to 1140 * reclassify every single packet. 1141 * For the time being, let's hope qht is never set 1142 * when we reach this point. 1143 */ 1144 D("XXX TODO requeue from fs %d to sch %d", 1145 fs->fs.fs_nr, s->sch.sched_nr); 1146 fs->qht = NULL; 1147 } 1148 /* set the new type for qht */ 1149 if (nonzero_mask(&fs->fsk_mask)) 1150 fs->fs.flags |= DN_QHT_HASH; 1151 else 1152 fs->fs.flags &= ~DN_QHT_HASH; 1153 1154 /* XXX config_red() can fail... */ 1155 if (fs->fs.flags & DN_IS_RED) 1156 config_red(fs); 1157} 1158 1159/* update all flowsets which may refer to this scheduler */ 1160static void 1161update_fs(struct dn_schk *s) 1162{ 1163 struct dn_fsk *fs, *tmp; 1164 1165 SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) { 1166 if (s->sch.sched_nr != fs->fs.sched_nr) { 1167 D("fs %d for sch %d not %d still unlinked", 1168 fs->fs.fs_nr, fs->fs.sched_nr, 1169 s->sch.sched_nr); 1170 continue; 1171 } 1172 fsk_attach(fs, s); 1173 } 1174} 1175 1176/* 1177 * Configuration -- to preserve backward compatibility we use 1178 * the following scheme (N is 65536) 1179 * NUMBER SCHED LINK FLOWSET 1180 * 1 .. N-1 (1)WFQ (2)WFQ (3)queue 1181 * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1 1182 * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1 1183 * 1184 * "pipe i config" configures #1, #2 and #3 1185 * "sched i config" configures #1 and possibly #6 1186 * "queue i config" configures #3 1187 * #1 is configured with 'pipe i config' or 'sched i config' 1188 * #2 is configured with 'pipe i config', and created if not 1189 * existing with 'sched i config' 1190 * #3 is configured with 'queue i config' 1191 * #4 is automatically configured after #1, can only be FIFO 1192 * #5 is automatically configured after #2 1193 * #6 is automatically created when #1 is !MULTIQUEUE, 1194 * and can be updated. 1195 * #7 is automatically configured after #2 1196 */ 1197 1198/* 1199 * configure a link (and its FIFO instance) 1200 */ 1201static int 1202config_link(struct dn_link *p, struct dn_id *arg) 1203{ 1204 int i; 1205 1206 if (p->oid.len != sizeof(*p)) { 1207 D("invalid pipe len %d", p->oid.len); 1208 return EINVAL; 1209 } 1210 i = p->link_nr; 1211 if (i <= 0 || i >= DN_MAX_ID) 1212 return EINVAL; 1213 /* 1214 * The config program passes parameters as follows: 1215 * bw = bits/second (0 means no limits), 1216 * delay = ms, must be translated into ticks. 1217 * qsize = slots/bytes 1218 * burst ??? 1219 */ 1220 p->delay = (p->delay * hz) / 1000; 1221 /* Scale burst size: bytes -> bits * hz */ 1222 p->burst *= 8 * hz; 1223 1224 DN_BH_WLOCK(); 1225 /* do it twice, base link and FIFO link */ 1226 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) { 1227 struct dn_schk *s = locate_scheduler(i); 1228 if (s == NULL) { 1229 DN_BH_WUNLOCK(); 1230 D("sched %d not found", i); 1231 return EINVAL; 1232 } 1233 /* remove profile if exists */ 1234 if (s->profile) { 1235 free(s->profile, M_DUMMYNET); 1236 s->profile = NULL; 1237 } 1238 /* copy all parameters */ 1239 s->link.oid = p->oid; 1240 s->link.link_nr = i; 1241 s->link.delay = p->delay; 1242 if (s->link.bandwidth != p->bandwidth) { 1243 /* XXX bandwidth changes, need to update red params */ 1244 s->link.bandwidth = p->bandwidth; 1245 update_red(s); 1246 } 1247 s->link.burst = p->burst; 1248 schk_reset_credit(s); 1249 } 1250 dn_cfg.id++; 1251 DN_BH_WUNLOCK(); 1252 return 0; 1253} 1254 1255/* 1256 * configure a flowset. Can be called from inside with locked=1, 1257 */ 1258static struct dn_fsk * 1259config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) 1260{ 1261 int i; 1262 struct dn_fsk *fs; 1263 1264 if (nfs->oid.len != sizeof(*nfs)) { 1265 D("invalid flowset len %d", nfs->oid.len); 1266 return NULL; 1267 } 1268 i = nfs->fs_nr; 1269 if (i <= 0 || i >= 3*DN_MAX_ID) 1270 return NULL; 1271 ND("flowset %d", i); 1272 /* XXX other sanity checks */ 1273 if (nfs->flags & DN_QSIZE_BYTES) { 1274 ipdn_bound_var(&nfs->qsize, 16384, 1275 1500, dn_cfg.byte_limit, NULL); // "queue byte size"); 1276 } else { 1277 ipdn_bound_var(&nfs->qsize, 50, 1278 1, dn_cfg.slot_limit, NULL); // "queue slot size"); 1279 } 1280 if (nfs->flags & DN_HAVE_MASK) { 1281 /* make sure we have some buckets */ 1282 ipdn_bound_var(&nfs->buckets, dn_cfg.hash_size, 1283 1, dn_cfg.max_hash_size, "flowset buckets"); 1284 } else { 1285 nfs->buckets = 1; /* we only need 1 */ 1286 } 1287 if (!locked) 1288 DN_BH_WLOCK(); 1289 do { /* exit with break when done */ 1290 struct dn_schk *s; 1291 int flags = nfs->sched_nr ? DNHT_INSERT : 0; 1292 int j; 1293 int oldc = dn_cfg.fsk_count; 1294 fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL); 1295 if (fs == NULL) { 1296 D("missing sched for flowset %d", i); 1297 break; 1298 } 1299 /* grab some defaults from the existing one */ 1300 if (nfs->sched_nr == 0) /* reuse */ 1301 nfs->sched_nr = fs->fs.sched_nr; 1302 for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) { 1303 if (nfs->par[j] == -1) /* reuse */ 1304 nfs->par[j] = fs->fs.par[j]; 1305 } 1306 if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) { 1307 ND("flowset %d unchanged", i); 1308 break; /* no change, nothing to do */ 1309 } 1310 if (oldc != dn_cfg.fsk_count) /* new item */ 1311 dn_cfg.id++; 1312 s = locate_scheduler(nfs->sched_nr); 1313 /* detach from old scheduler if needed, preserving 1314 * queues if we need to reattach. Then update the 1315 * configuration, and possibly attach to the new sched. 1316 */ 1317 DX(2, "fs %d changed sched %d@%p to %d@%p", 1318 fs->fs.fs_nr, 1319 fs->fs.sched_nr, fs->sched, nfs->sched_nr, s); 1320 if (fs->sched) { 1321 int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY); 1322 flags |= DN_DESTROY; /* XXX temporary */ 1323 fsk_detach(fs, flags); 1324 } 1325 fs->fs = *nfs; /* copy configuration */ 1326 if (s != NULL) 1327 fsk_attach(fs, s); 1328 } while (0); 1329 if (!locked) 1330 DN_BH_WUNLOCK(); 1331 return fs; 1332} 1333 1334/* 1335 * config/reconfig a scheduler and its FIFO variant. 1336 * For !MULTIQUEUE schedulers, also set up the flowset. 1337 * 1338 * On reconfigurations (detected because s->fp is set), 1339 * detach existing flowsets preserving traffic, preserve link, 1340 * and delete the old scheduler creating a new one. 1341 */ 1342static int 1343config_sched(struct dn_sch *_nsch, struct dn_id *arg) 1344{ 1345 struct dn_schk *s; 1346 struct schk_new_arg a; /* argument for schk_new */ 1347 int i; 1348 struct dn_link p; /* copy of oldlink */ 1349 struct dn_profile *pf = NULL; /* copy of old link profile */ 1350 /* Used to preserv mask parameter */ 1351 struct ipfw_flow_id new_mask; 1352 int new_buckets = 0; 1353 int new_flags = 0; 1354 int pipe_cmd; 1355 int err = ENOMEM; 1356 1357 a.sch = _nsch; 1358 if (a.sch->oid.len != sizeof(*a.sch)) { 1359 D("bad sched len %d", a.sch->oid.len); 1360 return EINVAL; 1361 } 1362 i = a.sch->sched_nr; 1363 if (i <= 0 || i >= DN_MAX_ID) 1364 return EINVAL; 1365 /* make sure we have some buckets */ 1366 if (a.sch->flags & DN_HAVE_MASK) 1367 ipdn_bound_var(&a.sch->buckets, dn_cfg.hash_size, 1368 1, dn_cfg.max_hash_size, "sched buckets"); 1369 /* XXX other sanity checks */ 1370 bzero(&p, sizeof(p)); 1371 1372 pipe_cmd = a.sch->flags & DN_PIPE_CMD; 1373 a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set? 1374 if (pipe_cmd) { 1375 /* Copy mask parameter */ 1376 new_mask = a.sch->sched_mask; 1377 new_buckets = a.sch->buckets; 1378 new_flags = a.sch->flags; 1379 } 1380 DN_BH_WLOCK(); 1381again: /* run twice, for wfq and fifo */ 1382 /* 1383 * lookup the type. If not supplied, use the previous one 1384 * or default to WF2Q+. Otherwise, return an error. 1385 */ 1386 dn_cfg.id++; 1387 a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name); 1388 if (a.fp != NULL) { 1389 /* found. Lookup or create entry */ 1390 s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a); 1391 } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) { 1392 /* No type. search existing s* or retry with WF2Q+ */ 1393 s = dn_ht_find(dn_cfg.schedhash, i, 0, &a); 1394 if (s != NULL) { 1395 a.fp = s->fp; 1396 /* Scheduler exists, skip to FIFO scheduler 1397 * if command was pipe config... 1398 */ 1399 if (pipe_cmd) 1400 goto next; 1401 } else { 1402 /* New scheduler, create a wf2q+ with no mask 1403 * if command was pipe config... 1404 */ 1405 if (pipe_cmd) { 1406 /* clear mask parameter */ 1407 bzero(&a.sch->sched_mask, sizeof(new_mask)); 1408 a.sch->buckets = 0; 1409 a.sch->flags &= ~DN_HAVE_MASK; 1410 } 1411 a.sch->oid.subtype = DN_SCHED_WF2QP; 1412 goto again; 1413 } 1414 } else { 1415 D("invalid scheduler type %d %s", 1416 a.sch->oid.subtype, a.sch->name); 1417 err = EINVAL; 1418 goto error; 1419 } 1420 /* normalize name and subtype */ 1421 a.sch->oid.subtype = a.fp->type; 1422 bzero(a.sch->name, sizeof(a.sch->name)); 1423 strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name)); 1424 if (s == NULL) { 1425 D("cannot allocate scheduler %d", i); 1426 goto error; 1427 } 1428 /* restore existing link if any */ 1429 if (p.link_nr) { 1430 s->link = p; 1431 if (!pf || pf->link_nr != p.link_nr) { /* no saved value */ 1432 s->profile = NULL; /* XXX maybe not needed */ 1433 } else { 1434 s->profile = malloc(sizeof(struct dn_profile), 1435 M_DUMMYNET, M_NOWAIT | M_ZERO); 1436 if (s->profile == NULL) { 1437 D("cannot allocate profile"); 1438 goto error; //XXX 1439 } 1440 bcopy(pf, s->profile, sizeof(*pf)); 1441 } 1442 } 1443 p.link_nr = 0; 1444 if (s->fp == NULL) { 1445 DX(2, "sched %d new type %s", i, a.fp->name); 1446 } else if (s->fp != a.fp || 1447 bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) { 1448 /* already existing. */ 1449 DX(2, "sched %d type changed from %s to %s", 1450 i, s->fp->name, a.fp->name); 1451 DX(4, " type/sub %d/%d -> %d/%d", 1452 s->sch.oid.type, s->sch.oid.subtype, 1453 a.sch->oid.type, a.sch->oid.subtype); 1454 if (s->link.link_nr == 0) 1455 D("XXX WARNING link 0 for sched %d", i); 1456 p = s->link; /* preserve link */ 1457 if (s->profile) {/* preserve profile */ 1458 if (!pf) 1459 pf = malloc(sizeof(*pf), 1460 M_DUMMYNET, M_NOWAIT | M_ZERO); 1461 if (pf) /* XXX should issue a warning otherwise */ 1462 bcopy(s->profile, pf, sizeof(*pf)); 1463 } 1464 /* remove from the hash */ 1465 dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); 1466 /* Detach flowsets, preserve queues. */ 1467 // schk_delete_cb(s, NULL); 1468 // XXX temporarily, kill queues 1469 schk_delete_cb(s, (void *)DN_DESTROY); 1470 goto again; 1471 } else { 1472 DX(4, "sched %d unchanged type %s", i, a.fp->name); 1473 } 1474 /* complete initialization */ 1475 s->sch = *a.sch; 1476 s->fp = a.fp; 1477 s->cfg = arg; 1478 // XXX schk_reset_credit(s); 1479 /* create the internal flowset if needed, 1480 * trying to reuse existing ones if available 1481 */ 1482 if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) { 1483 s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL); 1484 if (!s->fs) { 1485 struct dn_fs fs; 1486 bzero(&fs, sizeof(fs)); 1487 set_oid(&fs.oid, DN_FS, sizeof(fs)); 1488 fs.fs_nr = i + DN_MAX_ID; 1489 fs.sched_nr = i; 1490 s->fs = config_fs(&fs, NULL, 1 /* locked */); 1491 } 1492 if (!s->fs) { 1493 schk_delete_cb(s, (void *)DN_DESTROY); 1494 D("error creating internal fs for %d", i); 1495 goto error; 1496 } 1497 } 1498 /* call init function after the flowset is created */ 1499 if (s->fp->config) 1500 s->fp->config(s); 1501 update_fs(s); 1502next: 1503 if (i < DN_MAX_ID) { /* now configure the FIFO instance */ 1504 i += DN_MAX_ID; 1505 if (pipe_cmd) { 1506 /* Restore mask parameter for FIFO */ 1507 a.sch->sched_mask = new_mask; 1508 a.sch->buckets = new_buckets; 1509 a.sch->flags = new_flags; 1510 } else { 1511 /* sched config shouldn't modify the FIFO scheduler */ 1512 if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) { 1513 /* FIFO already exist, don't touch it */ 1514 err = 0; /* and this is not an error */ 1515 goto error; 1516 } 1517 } 1518 a.sch->sched_nr = i; 1519 a.sch->oid.subtype = DN_SCHED_FIFO; 1520 bzero(a.sch->name, sizeof(a.sch->name)); 1521 goto again; 1522 } 1523 err = 0; 1524error: 1525 DN_BH_WUNLOCK(); 1526 if (pf) 1527 free(pf, M_DUMMYNET); 1528 return err; 1529} 1530 1531/* 1532 * attach a profile to a link 1533 */ 1534static int 1535config_profile(struct dn_profile *pf, struct dn_id *arg) 1536{ 1537 struct dn_schk *s; 1538 int i, olen, err = 0; 1539 1540 if (pf->oid.len < sizeof(*pf)) { 1541 D("short profile len %d", pf->oid.len); 1542 return EINVAL; 1543 } 1544 i = pf->link_nr; 1545 if (i <= 0 || i >= DN_MAX_ID) 1546 return EINVAL; 1547 /* XXX other sanity checks */ 1548 DN_BH_WLOCK(); 1549 for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
|
1654 case DN_CMD_CONFIG: /* simply a header */ 1655 break; 1656 1657 case DN_CMD_DELETE: 1658 /* the argument is in the first uintptr_t after o */ 1659 a = (uintptr_t *)(o+1); 1660 if (o->len < sizeof(*o) + sizeof(*a)) { 1661 err = EINVAL; 1662 break; 1663 } 1664 switch (o->subtype) { 1665 case DN_LINK: 1666 /* delete base and derived schedulers */ 1667 DN_BH_WLOCK(); 1668 err = delete_schk(*a); 1669 err2 = delete_schk(*a + DN_MAX_ID); 1670 DN_BH_WUNLOCK(); 1671 if (!err) 1672 err = err2; 1673 break; 1674 1675 default: 1676 D("invalid delete type %d", 1677 o->subtype); 1678 err = EINVAL; 1679 break; 1680 1681 case DN_FS: 1682 err = (*a <1 || *a >= DN_MAX_ID) ? 1683 EINVAL : delete_fs(*a, 0) ; 1684 break; 1685 } 1686 break; 1687 1688 case DN_CMD_FLUSH: 1689 DN_BH_WLOCK(); 1690 dummynet_flush(); 1691 DN_BH_WUNLOCK(); 1692 break; 1693 case DN_TEXT: /* store argument the next block */ 1694 prev = NULL; 1695 arg = o; 1696 break; 1697 case DN_LINK: 1698 err = config_link((struct dn_link *)o, arg); 1699 break; 1700 case DN_PROFILE: 1701 err = config_profile((struct dn_profile *)o, arg); 1702 break; 1703 case DN_SCH: 1704 err = config_sched((struct dn_sch *)o, arg); 1705 break; 1706 case DN_FS: 1707 err = (NULL==config_fs((struct dn_fs *)o, arg, 0)); 1708 break; 1709 } 1710 if (prev) 1711 arg = NULL; 1712 if (err != 0) 1713 break; 1714 } 1715 return err; 1716} 1717 1718static int 1719compute_space(struct dn_id *cmd, struct copy_args *a) 1720{ 1721 int x = 0, need = 0; 1722 int profile_size = sizeof(struct dn_profile) - 1723 ED_MAX_SAMPLES_NO*sizeof(int); 1724 1725 /* NOTE about compute space: 1726 * NP = dn_cfg.schk_count 1727 * NSI = dn_cfg.si_count 1728 * NF = dn_cfg.fsk_count 1729 * NQ = dn_cfg.queue_count 1730 * - ipfw pipe show 1731 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 1732 * link, scheduler template, flowset 1733 * integrated in scheduler and header 1734 * for flowset list 1735 * (NSI)*(dn_flow) all scheduler instance (includes 1736 * the queue instance) 1737 * - ipfw sched show 1738 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 1739 * link, scheduler template, flowset 1740 * integrated in scheduler and header 1741 * for flowset list 1742 * (NSI * dn_flow) all scheduler instances 1743 * (NF * sizeof(uint_32)) space for flowset list linked to scheduler 1744 * (NQ * dn_queue) all queue [XXXfor now not listed] 1745 * - ipfw queue show 1746 * (NF * dn_fs) all flowset 1747 * (NQ * dn_queue) all queues 1748 */ 1749 switch (cmd->subtype) { 1750 default: 1751 return -1; 1752 /* XXX where do LINK and SCH differ ? */ 1753 /* 'ipfw sched show' could list all queues associated to 1754 * a scheduler. This feature for now is disabled 1755 */ 1756 case DN_LINK: /* pipe show */ 1757 x = DN_C_LINK | DN_C_SCH | DN_C_FLOW; 1758 need += dn_cfg.schk_count * 1759 (sizeof(struct dn_fs) + profile_size) / 2; 1760 need += dn_cfg.fsk_count * sizeof(uint32_t); 1761 break; 1762 case DN_SCH: /* sched show */ 1763 need += dn_cfg.schk_count * 1764 (sizeof(struct dn_fs) + profile_size) / 2; 1765 need += dn_cfg.fsk_count * sizeof(uint32_t); 1766 x = DN_C_SCH | DN_C_LINK | DN_C_FLOW; 1767 break; 1768 case DN_FS: /* queue show */ 1769 x = DN_C_FS | DN_C_QUEUE; 1770 break; 1771 case DN_GET_COMPAT: /* compatibility mode */ 1772 need = dn_compat_calc_size(); 1773 break; 1774 } 1775 a->flags = x; 1776 if (x & DN_C_SCH) { 1777 need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2; 1778 /* NOT also, each fs might be attached to a sched */ 1779 need += dn_cfg.schk_count * sizeof(struct dn_id) / 2; 1780 } 1781 if (x & DN_C_FS) 1782 need += dn_cfg.fsk_count * sizeof(struct dn_fs); 1783 if (x & DN_C_LINK) { 1784 need += dn_cfg.schk_count * sizeof(struct dn_link) / 2; 1785 } 1786 /* 1787 * When exporting a queue to userland, only pass up the 1788 * struct dn_flow, which is the only visible part. 1789 */ 1790 1791 if (x & DN_C_QUEUE) 1792 need += dn_cfg.queue_count * sizeof(struct dn_flow); 1793 if (x & DN_C_FLOW) 1794 need += dn_cfg.si_count * (sizeof(struct dn_flow)); 1795 return need; 1796} 1797 1798/* 1799 * If compat != NULL dummynet_get is called in compatibility mode. 1800 * *compat will be the pointer to the buffer to pass to ipfw 1801 */ 1802int 1803dummynet_get(struct sockopt *sopt, void **compat) 1804{ 1805 int have, i, need, error; 1806 char *start = NULL, *buf; 1807 size_t sopt_valsize; 1808 struct dn_id *cmd; 1809 struct copy_args a; 1810 struct copy_range r; 1811 int l = sizeof(struct dn_id); 1812 1813 bzero(&a, sizeof(a)); 1814 bzero(&r, sizeof(r)); 1815 1816 /* save and restore original sopt_valsize around copyin */ 1817 sopt_valsize = sopt->sopt_valsize; 1818 1819 cmd = &r.o; 1820 1821 if (!compat) { 1822 /* copy at least an oid, and possibly a full object */ 1823 error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); 1824 sopt->sopt_valsize = sopt_valsize; 1825 if (error) 1826 goto done; 1827 l = cmd->len; 1828#ifdef EMULATE_SYSCTL 1829 /* sysctl emulation. */ 1830 if (cmd->type == DN_SYSCTL_GET) 1831 return kesysctl_emu_get(sopt); 1832#endif 1833 if (l > sizeof(r)) { 1834 /* request larger than default, allocate buffer */ 1835 cmd = malloc(l, M_DUMMYNET, M_WAIT); 1836 if (cmd == NULL) 1837 return ENOMEM; //XXX 1838 error = sooptcopyin(sopt, cmd, l, l); 1839 sopt->sopt_valsize = sopt_valsize; 1840 if (error) 1841 goto done; 1842 } 1843 } else { /* compatibility */ 1844 error = 0; 1845 cmd->type = DN_CMD_GET; 1846 cmd->len = sizeof(struct dn_id); 1847 cmd->subtype = DN_GET_COMPAT; 1848 // cmd->id = sopt_valsize; 1849 D("compatibility mode"); 1850 } 1851 a.extra = (struct copy_range *)cmd; 1852 if (cmd->len == sizeof(*cmd)) { /* no range, create a default */ 1853 uint32_t *rp = (uint32_t *)(cmd + 1); 1854 cmd->len += 2* sizeof(uint32_t); 1855 rp[0] = 1; 1856 rp[1] = DN_MAX_ID - 1; 1857 if (cmd->subtype == DN_LINK) { 1858 rp[0] += DN_MAX_ID; 1859 rp[1] += DN_MAX_ID; 1860 } 1861 } 1862 /* Count space (under lock) and allocate (outside lock). 1863 * Exit with lock held if we manage to get enough buffer. 1864 * Try a few times then give up. 1865 */ 1866 for (have = 0, i = 0; i < 10; i++) { 1867 DN_BH_WLOCK(); 1868 need = compute_space(cmd, &a); 1869 1870 /* if there is a range, ignore value from compute_space() */ 1871 if (l > sizeof(*cmd)) 1872 need = sopt_valsize - sizeof(*cmd); 1873 1874 if (need < 0) { 1875 DN_BH_WUNLOCK(); 1876 error = EINVAL; 1877 goto done; 1878 } 1879 need += sizeof(*cmd); 1880 cmd->id = need; 1881 if (have >= need) 1882 break; 1883 1884 DN_BH_WUNLOCK(); 1885 if (start) 1886 free(start, M_DUMMYNET); 1887 start = NULL; 1888 if (need > sopt_valsize) 1889 break; 1890 1891 have = need; 1892 start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO); 1893 if (start == NULL) { 1894 error = ENOMEM; 1895 goto done; 1896 } 1897 } 1898 1899 if (start == NULL) { 1900 if (compat) { 1901 *compat = NULL; 1902 error = 1; // XXX 1903 } else { 1904 error = sooptcopyout(sopt, cmd, sizeof(*cmd)); 1905 } 1906 goto done; 1907 } 1908 ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " 1909 "%d:%d si %d, %d:%d queues %d", 1910 dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH, 1911 dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK, 1912 dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS, 1913 dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I, 1914 dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE); 1915 sopt->sopt_valsize = sopt_valsize; 1916 a.type = cmd->subtype; 1917 1918 if (compat == NULL) { 1919 bcopy(cmd, start, sizeof(*cmd)); 1920 ((struct dn_id*)(start))->len = sizeof(struct dn_id); 1921 buf = start + sizeof(*cmd); 1922 } else 1923 buf = start; 1924 a.start = &buf; 1925 a.end = start + have; 1926 /* start copying other objects */ 1927 if (compat) { 1928 a.type = DN_COMPAT_PIPE; 1929 dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a); 1930 a.type = DN_COMPAT_QUEUE; 1931 dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a); 1932 } else if (a.type == DN_FS) { 1933 dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a); 1934 } else { 1935 dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a); 1936 } 1937 DN_BH_WUNLOCK(); 1938 1939 if (compat) { 1940 *compat = start; 1941 sopt->sopt_valsize = buf - start; 1942 /* free() is done by ip_dummynet_compat() */ 1943 start = NULL; //XXX hack 1944 } else { 1945 error = sooptcopyout(sopt, start, buf - start); 1946 } 1947done: 1948 if (cmd && cmd != &r.o) 1949 free(cmd, M_DUMMYNET); 1950 if (start) 1951 free(start, M_DUMMYNET); 1952 return error; 1953} 1954 1955/* Callback called on scheduler instance to delete it if idle */ 1956static int 1957drain_scheduler_cb(void *_si, void *arg) 1958{ 1959 struct dn_sch_inst *si = _si; 1960 1961 if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL) 1962 return 0; 1963 1964 if (si->sched->fp->flags & DN_MULTIQUEUE) { 1965 if (si->q_count == 0) 1966 return si_destroy(si, NULL); 1967 else 1968 return 0; 1969 } else { /* !DN_MULTIQUEUE */ 1970 if ((si+1)->ni.length == 0) 1971 return si_destroy(si, NULL); 1972 else 1973 return 0; 1974 } 1975 return 0; /* unreachable */ 1976} 1977 1978/* Callback called on scheduler to check if it has instances */ 1979static int 1980drain_scheduler_sch_cb(void *_s, void *arg) 1981{ 1982 struct dn_schk *s = _s; 1983 1984 if (s->sch.flags & DN_HAVE_MASK) { 1985 dn_ht_scan_bucket(s->siht, &s->drain_bucket, 1986 drain_scheduler_cb, NULL); 1987 s->drain_bucket++; 1988 } else { 1989 if (s->siht) { 1990 if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL) 1991 s->siht = NULL; 1992 } 1993 } 1994 return 0; 1995} 1996 1997/* Called every tick, try to delete a 'bucket' of scheduler */ 1998void 1999dn_drain_scheduler(void) 2000{ 2001 dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch, 2002 drain_scheduler_sch_cb, NULL); 2003 dn_cfg.drain_sch++; 2004} 2005 2006/* Callback called on queue to delete if it is idle */ 2007static int 2008drain_queue_cb(void *_q, void *arg) 2009{ 2010 struct dn_queue *q = _q; 2011 2012 if (q->ni.length == 0) { 2013 dn_delete_queue(q, DN_DESTROY); 2014 return DNHT_SCAN_DEL; /* queue is deleted */ 2015 } 2016 2017 return 0; /* queue isn't deleted */ 2018} 2019 2020/* Callback called on flowset used to check if it has queues */ 2021static int 2022drain_queue_fs_cb(void *_fs, void *arg) 2023{ 2024 struct dn_fsk *fs = _fs; 2025 2026 if (fs->fs.flags & DN_QHT_HASH) { 2027 /* Flowset has a hash table for queues */ 2028 dn_ht_scan_bucket(fs->qht, &fs->drain_bucket, 2029 drain_queue_cb, NULL); 2030 fs->drain_bucket++; 2031 } else { 2032 /* No hash table for this flowset, null the pointer 2033 * if the queue is deleted 2034 */ 2035 if (fs->qht) { 2036 if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL) 2037 fs->qht = NULL; 2038 } 2039 } 2040 return 0; 2041} 2042 2043/* Called every tick, try to delete a 'bucket' of queue */ 2044void 2045dn_drain_queue(void) 2046{ 2047 /* scan a bucket of flowset */ 2048 dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs, 2049 drain_queue_fs_cb, NULL); 2050 dn_cfg.drain_fs++; 2051} 2052 2053/* 2054 * Handler for the various dummynet socket options 2055 */ 2056static int 2057ip_dn_ctl(struct sockopt *sopt) 2058{ 2059 void *p = NULL; 2060 int error, l; 2061 2062 error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); 2063 if (error) 2064 return (error); 2065 2066 /* Disallow sets in really-really secure mode. */ 2067 if (sopt->sopt_dir == SOPT_SET) { 2068 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 2069 if (error) 2070 return (error); 2071 } 2072 2073 switch (sopt->sopt_name) { 2074 default : 2075 D("dummynet: unknown option %d", sopt->sopt_name); 2076 error = EINVAL; 2077 break; 2078 2079 case IP_DUMMYNET_FLUSH: 2080 case IP_DUMMYNET_CONFIGURE: 2081 case IP_DUMMYNET_DEL: /* remove a pipe or queue */ 2082 case IP_DUMMYNET_GET: 2083 D("dummynet: compat option %d", sopt->sopt_name); 2084 error = ip_dummynet_compat(sopt); 2085 break; 2086 2087 case IP_DUMMYNET3 : 2088 if (sopt->sopt_dir == SOPT_GET) { 2089 error = dummynet_get(sopt, NULL); 2090 break; 2091 } 2092 l = sopt->sopt_valsize; 2093 if (l < sizeof(struct dn_id) || l > 12000) { 2094 D("argument len %d invalid", l); 2095 break; 2096 } 2097 p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ? 2098 error = sooptcopyin(sopt, p, l, l); 2099 if (error) 2100 break ; 2101 error = do_config(p, l); 2102 break; 2103 } 2104 2105 if (p != NULL) 2106 free(p, M_TEMP); 2107 2108 return error ; 2109} 2110 2111 2112static void 2113ip_dn_init(void) 2114{ 2115 if (dn_cfg.init_done) 2116 return; 2117 printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet); 2118 dn_cfg.init_done = 1; 2119 /* Set defaults here. MSVC does not accept initializers, 2120 * and this is also useful for vimages 2121 */ 2122 /* queue limits */ 2123 dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */ 2124 dn_cfg.byte_limit = 1024 * 1024; 2125 dn_cfg.expire = 1; 2126 2127 /* RED parameters */ 2128 dn_cfg.red_lookup_depth = 256; /* default lookup table depth */ 2129 dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */ 2130 dn_cfg.red_max_pkt_size = 1500; /* default max packet size */ 2131 2132 /* hash tables */ 2133 dn_cfg.max_hash_size = 1024; /* max in the hash tables */ 2134 dn_cfg.hash_size = 64; /* default hash size */ 2135 2136 /* create hash tables for schedulers and flowsets. 2137 * In both we search by key and by pointer. 2138 */ 2139 dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size, 2140 offsetof(struct dn_schk, schk_next), 2141 schk_hash, schk_match, schk_new); 2142 dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size, 2143 offsetof(struct dn_fsk, fsk_next), 2144 fsk_hash, fsk_match, fsk_new); 2145 2146 /* bucket index to drain object */ 2147 dn_cfg.drain_fs = 0; 2148 dn_cfg.drain_sch = 0; 2149 2150 heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 2151 SLIST_INIT(&dn_cfg.fsu); 2152 SLIST_INIT(&dn_cfg.schedlist); 2153 2154 DN_LOCK_INIT(); 2155 2156 TASK_INIT(&dn_task, 0, dummynet_task, curvnet); 2157 dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT, 2158 taskqueue_thread_enqueue, &dn_tq); 2159 taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); 2160 2161 callout_init(&dn_timeout, CALLOUT_MPSAFE); 2162 callout_reset(&dn_timeout, 1, dummynet, NULL); 2163 2164 /* Initialize curr_time adjustment mechanics. */ 2165 getmicrouptime(&dn_cfg.prev_t); 2166} 2167 2168#ifdef KLD_MODULE 2169static void 2170ip_dn_destroy(int last) 2171{ 2172 callout_drain(&dn_timeout); 2173 2174 DN_BH_WLOCK(); 2175 if (last) { 2176 printf("%s removing last instance\n", __FUNCTION__); 2177 ip_dn_ctl_ptr = NULL; 2178 ip_dn_io_ptr = NULL; 2179 } 2180 2181 dummynet_flush(); 2182 DN_BH_WUNLOCK(); 2183 taskqueue_drain(dn_tq, &dn_task); 2184 taskqueue_free(dn_tq); 2185 2186 dn_ht_free(dn_cfg.schedhash, 0); 2187 dn_ht_free(dn_cfg.fshash, 0); 2188 heap_free(&dn_cfg.evheap); 2189 2190 DN_LOCK_DESTROY(); 2191} 2192#endif /* KLD_MODULE */ 2193 2194static int 2195dummynet_modevent(module_t mod, int type, void *data) 2196{ 2197 2198 if (type == MOD_LOAD) { 2199 if (ip_dn_io_ptr) { 2200 printf("DUMMYNET already loaded\n"); 2201 return EEXIST ; 2202 } 2203 ip_dn_init(); 2204 ip_dn_ctl_ptr = ip_dn_ctl; 2205 ip_dn_io_ptr = dummynet_io; 2206 return 0; 2207 } else if (type == MOD_UNLOAD) { 2208#if !defined(KLD_MODULE) 2209 printf("dummynet statically compiled, cannot unload\n"); 2210 return EINVAL ; 2211#else 2212 ip_dn_destroy(1 /* last */); 2213 return 0; 2214#endif 2215 } else 2216 return EOPNOTSUPP; 2217} 2218 2219/* modevent helpers for the modules */ 2220static int 2221load_dn_sched(struct dn_alg *d) 2222{ 2223 struct dn_alg *s; 2224 2225 if (d == NULL) 2226 return 1; /* error */ 2227 ip_dn_init(); /* just in case, we need the lock */ 2228 2229 /* Check that mandatory funcs exists */ 2230 if (d->enqueue == NULL || d->dequeue == NULL) { 2231 D("missing enqueue or dequeue for %s", d->name); 2232 return 1; 2233 } 2234 2235 /* Search if scheduler already exists */ 2236 DN_BH_WLOCK(); 2237 SLIST_FOREACH(s, &dn_cfg.schedlist, next) { 2238 if (strcmp(s->name, d->name) == 0) { 2239 D("%s already loaded", d->name); 2240 break; /* scheduler already exists */ 2241 } 2242 } 2243 if (s == NULL) 2244 SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next); 2245 DN_BH_WUNLOCK(); 2246 D("dn_sched %s %sloaded", d->name, s ? "not ":""); 2247 return s ? 1 : 0; 2248} 2249 2250static int 2251unload_dn_sched(struct dn_alg *s) 2252{ 2253 struct dn_alg *tmp, *r; 2254 int err = EINVAL; 2255 2256 D("called for %s", s->name); 2257 2258 DN_BH_WLOCK(); 2259 SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) { 2260 if (strcmp(s->name, r->name) != 0) 2261 continue; 2262 D("ref_count = %d", r->ref_count); 2263 err = (r->ref_count != 0) ? EBUSY : 0; 2264 if (err == 0) 2265 SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next); 2266 break; 2267 } 2268 DN_BH_WUNLOCK(); 2269 D("dn_sched %s %sunloaded", s->name, err ? "not ":""); 2270 return err; 2271} 2272 2273int 2274dn_sched_modevent(module_t mod, int cmd, void *arg) 2275{ 2276 struct dn_alg *sch = arg; 2277 2278 if (cmd == MOD_LOAD) 2279 return load_dn_sched(sch); 2280 else if (cmd == MOD_UNLOAD) 2281 return unload_dn_sched(sch); 2282 else 2283 return EINVAL; 2284} 2285 2286static moduledata_t dummynet_mod = { 2287 "dummynet", dummynet_modevent, NULL 2288}; 2289 2290#define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN 2291#define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ 2292DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); 2293MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); 2294MODULE_VERSION(dummynet, 1); 2295 2296/* 2297 * Starting up. Done in order after dummynet_modevent() has been called. 2298 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 2299 */ 2300//VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL); 2301 2302/* 2303 * Shutdown handlers up shop. These are done in REVERSE ORDER, but still 2304 * after dummynet_modevent() has been called. Not called on reboot. 2305 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 2306 * or when the module is unloaded. 2307 */ 2308//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL); 2309 2310/* end of file */
| 1656 case DN_CMD_CONFIG: /* simply a header */ 1657 break; 1658 1659 case DN_CMD_DELETE: 1660 /* the argument is in the first uintptr_t after o */ 1661 a = (uintptr_t *)(o+1); 1662 if (o->len < sizeof(*o) + sizeof(*a)) { 1663 err = EINVAL; 1664 break; 1665 } 1666 switch (o->subtype) { 1667 case DN_LINK: 1668 /* delete base and derived schedulers */ 1669 DN_BH_WLOCK(); 1670 err = delete_schk(*a); 1671 err2 = delete_schk(*a + DN_MAX_ID); 1672 DN_BH_WUNLOCK(); 1673 if (!err) 1674 err = err2; 1675 break; 1676 1677 default: 1678 D("invalid delete type %d", 1679 o->subtype); 1680 err = EINVAL; 1681 break; 1682 1683 case DN_FS: 1684 err = (*a <1 || *a >= DN_MAX_ID) ? 1685 EINVAL : delete_fs(*a, 0) ; 1686 break; 1687 } 1688 break; 1689 1690 case DN_CMD_FLUSH: 1691 DN_BH_WLOCK(); 1692 dummynet_flush(); 1693 DN_BH_WUNLOCK(); 1694 break; 1695 case DN_TEXT: /* store argument the next block */ 1696 prev = NULL; 1697 arg = o; 1698 break; 1699 case DN_LINK: 1700 err = config_link((struct dn_link *)o, arg); 1701 break; 1702 case DN_PROFILE: 1703 err = config_profile((struct dn_profile *)o, arg); 1704 break; 1705 case DN_SCH: 1706 err = config_sched((struct dn_sch *)o, arg); 1707 break; 1708 case DN_FS: 1709 err = (NULL==config_fs((struct dn_fs *)o, arg, 0)); 1710 break; 1711 } 1712 if (prev) 1713 arg = NULL; 1714 if (err != 0) 1715 break; 1716 } 1717 return err; 1718} 1719 1720static int 1721compute_space(struct dn_id *cmd, struct copy_args *a) 1722{ 1723 int x = 0, need = 0; 1724 int profile_size = sizeof(struct dn_profile) - 1725 ED_MAX_SAMPLES_NO*sizeof(int); 1726 1727 /* NOTE about compute space: 1728 * NP = dn_cfg.schk_count 1729 * NSI = dn_cfg.si_count 1730 * NF = dn_cfg.fsk_count 1731 * NQ = dn_cfg.queue_count 1732 * - ipfw pipe show 1733 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 1734 * link, scheduler template, flowset 1735 * integrated in scheduler and header 1736 * for flowset list 1737 * (NSI)*(dn_flow) all scheduler instance (includes 1738 * the queue instance) 1739 * - ipfw sched show 1740 * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler 1741 * link, scheduler template, flowset 1742 * integrated in scheduler and header 1743 * for flowset list 1744 * (NSI * dn_flow) all scheduler instances 1745 * (NF * sizeof(uint_32)) space for flowset list linked to scheduler 1746 * (NQ * dn_queue) all queue [XXXfor now not listed] 1747 * - ipfw queue show 1748 * (NF * dn_fs) all flowset 1749 * (NQ * dn_queue) all queues 1750 */ 1751 switch (cmd->subtype) { 1752 default: 1753 return -1; 1754 /* XXX where do LINK and SCH differ ? */ 1755 /* 'ipfw sched show' could list all queues associated to 1756 * a scheduler. This feature for now is disabled 1757 */ 1758 case DN_LINK: /* pipe show */ 1759 x = DN_C_LINK | DN_C_SCH | DN_C_FLOW; 1760 need += dn_cfg.schk_count * 1761 (sizeof(struct dn_fs) + profile_size) / 2; 1762 need += dn_cfg.fsk_count * sizeof(uint32_t); 1763 break; 1764 case DN_SCH: /* sched show */ 1765 need += dn_cfg.schk_count * 1766 (sizeof(struct dn_fs) + profile_size) / 2; 1767 need += dn_cfg.fsk_count * sizeof(uint32_t); 1768 x = DN_C_SCH | DN_C_LINK | DN_C_FLOW; 1769 break; 1770 case DN_FS: /* queue show */ 1771 x = DN_C_FS | DN_C_QUEUE; 1772 break; 1773 case DN_GET_COMPAT: /* compatibility mode */ 1774 need = dn_compat_calc_size(); 1775 break; 1776 } 1777 a->flags = x; 1778 if (x & DN_C_SCH) { 1779 need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2; 1780 /* NOT also, each fs might be attached to a sched */ 1781 need += dn_cfg.schk_count * sizeof(struct dn_id) / 2; 1782 } 1783 if (x & DN_C_FS) 1784 need += dn_cfg.fsk_count * sizeof(struct dn_fs); 1785 if (x & DN_C_LINK) { 1786 need += dn_cfg.schk_count * sizeof(struct dn_link) / 2; 1787 } 1788 /* 1789 * When exporting a queue to userland, only pass up the 1790 * struct dn_flow, which is the only visible part. 1791 */ 1792 1793 if (x & DN_C_QUEUE) 1794 need += dn_cfg.queue_count * sizeof(struct dn_flow); 1795 if (x & DN_C_FLOW) 1796 need += dn_cfg.si_count * (sizeof(struct dn_flow)); 1797 return need; 1798} 1799 1800/* 1801 * If compat != NULL dummynet_get is called in compatibility mode. 1802 * *compat will be the pointer to the buffer to pass to ipfw 1803 */ 1804int 1805dummynet_get(struct sockopt *sopt, void **compat) 1806{ 1807 int have, i, need, error; 1808 char *start = NULL, *buf; 1809 size_t sopt_valsize; 1810 struct dn_id *cmd; 1811 struct copy_args a; 1812 struct copy_range r; 1813 int l = sizeof(struct dn_id); 1814 1815 bzero(&a, sizeof(a)); 1816 bzero(&r, sizeof(r)); 1817 1818 /* save and restore original sopt_valsize around copyin */ 1819 sopt_valsize = sopt->sopt_valsize; 1820 1821 cmd = &r.o; 1822 1823 if (!compat) { 1824 /* copy at least an oid, and possibly a full object */ 1825 error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); 1826 sopt->sopt_valsize = sopt_valsize; 1827 if (error) 1828 goto done; 1829 l = cmd->len; 1830#ifdef EMULATE_SYSCTL 1831 /* sysctl emulation. */ 1832 if (cmd->type == DN_SYSCTL_GET) 1833 return kesysctl_emu_get(sopt); 1834#endif 1835 if (l > sizeof(r)) { 1836 /* request larger than default, allocate buffer */ 1837 cmd = malloc(l, M_DUMMYNET, M_WAIT); 1838 if (cmd == NULL) 1839 return ENOMEM; //XXX 1840 error = sooptcopyin(sopt, cmd, l, l); 1841 sopt->sopt_valsize = sopt_valsize; 1842 if (error) 1843 goto done; 1844 } 1845 } else { /* compatibility */ 1846 error = 0; 1847 cmd->type = DN_CMD_GET; 1848 cmd->len = sizeof(struct dn_id); 1849 cmd->subtype = DN_GET_COMPAT; 1850 // cmd->id = sopt_valsize; 1851 D("compatibility mode"); 1852 } 1853 a.extra = (struct copy_range *)cmd; 1854 if (cmd->len == sizeof(*cmd)) { /* no range, create a default */ 1855 uint32_t *rp = (uint32_t *)(cmd + 1); 1856 cmd->len += 2* sizeof(uint32_t); 1857 rp[0] = 1; 1858 rp[1] = DN_MAX_ID - 1; 1859 if (cmd->subtype == DN_LINK) { 1860 rp[0] += DN_MAX_ID; 1861 rp[1] += DN_MAX_ID; 1862 } 1863 } 1864 /* Count space (under lock) and allocate (outside lock). 1865 * Exit with lock held if we manage to get enough buffer. 1866 * Try a few times then give up. 1867 */ 1868 for (have = 0, i = 0; i < 10; i++) { 1869 DN_BH_WLOCK(); 1870 need = compute_space(cmd, &a); 1871 1872 /* if there is a range, ignore value from compute_space() */ 1873 if (l > sizeof(*cmd)) 1874 need = sopt_valsize - sizeof(*cmd); 1875 1876 if (need < 0) { 1877 DN_BH_WUNLOCK(); 1878 error = EINVAL; 1879 goto done; 1880 } 1881 need += sizeof(*cmd); 1882 cmd->id = need; 1883 if (have >= need) 1884 break; 1885 1886 DN_BH_WUNLOCK(); 1887 if (start) 1888 free(start, M_DUMMYNET); 1889 start = NULL; 1890 if (need > sopt_valsize) 1891 break; 1892 1893 have = need; 1894 start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO); 1895 if (start == NULL) { 1896 error = ENOMEM; 1897 goto done; 1898 } 1899 } 1900 1901 if (start == NULL) { 1902 if (compat) { 1903 *compat = NULL; 1904 error = 1; // XXX 1905 } else { 1906 error = sooptcopyout(sopt, cmd, sizeof(*cmd)); 1907 } 1908 goto done; 1909 } 1910 ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " 1911 "%d:%d si %d, %d:%d queues %d", 1912 dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH, 1913 dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK, 1914 dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS, 1915 dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I, 1916 dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE); 1917 sopt->sopt_valsize = sopt_valsize; 1918 a.type = cmd->subtype; 1919 1920 if (compat == NULL) { 1921 bcopy(cmd, start, sizeof(*cmd)); 1922 ((struct dn_id*)(start))->len = sizeof(struct dn_id); 1923 buf = start + sizeof(*cmd); 1924 } else 1925 buf = start; 1926 a.start = &buf; 1927 a.end = start + have; 1928 /* start copying other objects */ 1929 if (compat) { 1930 a.type = DN_COMPAT_PIPE; 1931 dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a); 1932 a.type = DN_COMPAT_QUEUE; 1933 dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a); 1934 } else if (a.type == DN_FS) { 1935 dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a); 1936 } else { 1937 dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a); 1938 } 1939 DN_BH_WUNLOCK(); 1940 1941 if (compat) { 1942 *compat = start; 1943 sopt->sopt_valsize = buf - start; 1944 /* free() is done by ip_dummynet_compat() */ 1945 start = NULL; //XXX hack 1946 } else { 1947 error = sooptcopyout(sopt, start, buf - start); 1948 } 1949done: 1950 if (cmd && cmd != &r.o) 1951 free(cmd, M_DUMMYNET); 1952 if (start) 1953 free(start, M_DUMMYNET); 1954 return error; 1955} 1956 1957/* Callback called on scheduler instance to delete it if idle */ 1958static int 1959drain_scheduler_cb(void *_si, void *arg) 1960{ 1961 struct dn_sch_inst *si = _si; 1962 1963 if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL) 1964 return 0; 1965 1966 if (si->sched->fp->flags & DN_MULTIQUEUE) { 1967 if (si->q_count == 0) 1968 return si_destroy(si, NULL); 1969 else 1970 return 0; 1971 } else { /* !DN_MULTIQUEUE */ 1972 if ((si+1)->ni.length == 0) 1973 return si_destroy(si, NULL); 1974 else 1975 return 0; 1976 } 1977 return 0; /* unreachable */ 1978} 1979 1980/* Callback called on scheduler to check if it has instances */ 1981static int 1982drain_scheduler_sch_cb(void *_s, void *arg) 1983{ 1984 struct dn_schk *s = _s; 1985 1986 if (s->sch.flags & DN_HAVE_MASK) { 1987 dn_ht_scan_bucket(s->siht, &s->drain_bucket, 1988 drain_scheduler_cb, NULL); 1989 s->drain_bucket++; 1990 } else { 1991 if (s->siht) { 1992 if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL) 1993 s->siht = NULL; 1994 } 1995 } 1996 return 0; 1997} 1998 1999/* Called every tick, try to delete a 'bucket' of scheduler */ 2000void 2001dn_drain_scheduler(void) 2002{ 2003 dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch, 2004 drain_scheduler_sch_cb, NULL); 2005 dn_cfg.drain_sch++; 2006} 2007 2008/* Callback called on queue to delete if it is idle */ 2009static int 2010drain_queue_cb(void *_q, void *arg) 2011{ 2012 struct dn_queue *q = _q; 2013 2014 if (q->ni.length == 0) { 2015 dn_delete_queue(q, DN_DESTROY); 2016 return DNHT_SCAN_DEL; /* queue is deleted */ 2017 } 2018 2019 return 0; /* queue isn't deleted */ 2020} 2021 2022/* Callback called on flowset used to check if it has queues */ 2023static int 2024drain_queue_fs_cb(void *_fs, void *arg) 2025{ 2026 struct dn_fsk *fs = _fs; 2027 2028 if (fs->fs.flags & DN_QHT_HASH) { 2029 /* Flowset has a hash table for queues */ 2030 dn_ht_scan_bucket(fs->qht, &fs->drain_bucket, 2031 drain_queue_cb, NULL); 2032 fs->drain_bucket++; 2033 } else { 2034 /* No hash table for this flowset, null the pointer 2035 * if the queue is deleted 2036 */ 2037 if (fs->qht) { 2038 if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL) 2039 fs->qht = NULL; 2040 } 2041 } 2042 return 0; 2043} 2044 2045/* Called every tick, try to delete a 'bucket' of queue */ 2046void 2047dn_drain_queue(void) 2048{ 2049 /* scan a bucket of flowset */ 2050 dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs, 2051 drain_queue_fs_cb, NULL); 2052 dn_cfg.drain_fs++; 2053} 2054 2055/* 2056 * Handler for the various dummynet socket options 2057 */ 2058static int 2059ip_dn_ctl(struct sockopt *sopt) 2060{ 2061 void *p = NULL; 2062 int error, l; 2063 2064 error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); 2065 if (error) 2066 return (error); 2067 2068 /* Disallow sets in really-really secure mode. */ 2069 if (sopt->sopt_dir == SOPT_SET) { 2070 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 2071 if (error) 2072 return (error); 2073 } 2074 2075 switch (sopt->sopt_name) { 2076 default : 2077 D("dummynet: unknown option %d", sopt->sopt_name); 2078 error = EINVAL; 2079 break; 2080 2081 case IP_DUMMYNET_FLUSH: 2082 case IP_DUMMYNET_CONFIGURE: 2083 case IP_DUMMYNET_DEL: /* remove a pipe or queue */ 2084 case IP_DUMMYNET_GET: 2085 D("dummynet: compat option %d", sopt->sopt_name); 2086 error = ip_dummynet_compat(sopt); 2087 break; 2088 2089 case IP_DUMMYNET3 : 2090 if (sopt->sopt_dir == SOPT_GET) { 2091 error = dummynet_get(sopt, NULL); 2092 break; 2093 } 2094 l = sopt->sopt_valsize; 2095 if (l < sizeof(struct dn_id) || l > 12000) { 2096 D("argument len %d invalid", l); 2097 break; 2098 } 2099 p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ? 2100 error = sooptcopyin(sopt, p, l, l); 2101 if (error) 2102 break ; 2103 error = do_config(p, l); 2104 break; 2105 } 2106 2107 if (p != NULL) 2108 free(p, M_TEMP); 2109 2110 return error ; 2111} 2112 2113 2114static void 2115ip_dn_init(void) 2116{ 2117 if (dn_cfg.init_done) 2118 return; 2119 printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet); 2120 dn_cfg.init_done = 1; 2121 /* Set defaults here. MSVC does not accept initializers, 2122 * and this is also useful for vimages 2123 */ 2124 /* queue limits */ 2125 dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */ 2126 dn_cfg.byte_limit = 1024 * 1024; 2127 dn_cfg.expire = 1; 2128 2129 /* RED parameters */ 2130 dn_cfg.red_lookup_depth = 256; /* default lookup table depth */ 2131 dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */ 2132 dn_cfg.red_max_pkt_size = 1500; /* default max packet size */ 2133 2134 /* hash tables */ 2135 dn_cfg.max_hash_size = 1024; /* max in the hash tables */ 2136 dn_cfg.hash_size = 64; /* default hash size */ 2137 2138 /* create hash tables for schedulers and flowsets. 2139 * In both we search by key and by pointer. 2140 */ 2141 dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size, 2142 offsetof(struct dn_schk, schk_next), 2143 schk_hash, schk_match, schk_new); 2144 dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size, 2145 offsetof(struct dn_fsk, fsk_next), 2146 fsk_hash, fsk_match, fsk_new); 2147 2148 /* bucket index to drain object */ 2149 dn_cfg.drain_fs = 0; 2150 dn_cfg.drain_sch = 0; 2151 2152 heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); 2153 SLIST_INIT(&dn_cfg.fsu); 2154 SLIST_INIT(&dn_cfg.schedlist); 2155 2156 DN_LOCK_INIT(); 2157 2158 TASK_INIT(&dn_task, 0, dummynet_task, curvnet); 2159 dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT, 2160 taskqueue_thread_enqueue, &dn_tq); 2161 taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); 2162 2163 callout_init(&dn_timeout, CALLOUT_MPSAFE); 2164 callout_reset(&dn_timeout, 1, dummynet, NULL); 2165 2166 /* Initialize curr_time adjustment mechanics. */ 2167 getmicrouptime(&dn_cfg.prev_t); 2168} 2169 2170#ifdef KLD_MODULE 2171static void 2172ip_dn_destroy(int last) 2173{ 2174 callout_drain(&dn_timeout); 2175 2176 DN_BH_WLOCK(); 2177 if (last) { 2178 printf("%s removing last instance\n", __FUNCTION__); 2179 ip_dn_ctl_ptr = NULL; 2180 ip_dn_io_ptr = NULL; 2181 } 2182 2183 dummynet_flush(); 2184 DN_BH_WUNLOCK(); 2185 taskqueue_drain(dn_tq, &dn_task); 2186 taskqueue_free(dn_tq); 2187 2188 dn_ht_free(dn_cfg.schedhash, 0); 2189 dn_ht_free(dn_cfg.fshash, 0); 2190 heap_free(&dn_cfg.evheap); 2191 2192 DN_LOCK_DESTROY(); 2193} 2194#endif /* KLD_MODULE */ 2195 2196static int 2197dummynet_modevent(module_t mod, int type, void *data) 2198{ 2199 2200 if (type == MOD_LOAD) { 2201 if (ip_dn_io_ptr) { 2202 printf("DUMMYNET already loaded\n"); 2203 return EEXIST ; 2204 } 2205 ip_dn_init(); 2206 ip_dn_ctl_ptr = ip_dn_ctl; 2207 ip_dn_io_ptr = dummynet_io; 2208 return 0; 2209 } else if (type == MOD_UNLOAD) { 2210#if !defined(KLD_MODULE) 2211 printf("dummynet statically compiled, cannot unload\n"); 2212 return EINVAL ; 2213#else 2214 ip_dn_destroy(1 /* last */); 2215 return 0; 2216#endif 2217 } else 2218 return EOPNOTSUPP; 2219} 2220 2221/* modevent helpers for the modules */ 2222static int 2223load_dn_sched(struct dn_alg *d) 2224{ 2225 struct dn_alg *s; 2226 2227 if (d == NULL) 2228 return 1; /* error */ 2229 ip_dn_init(); /* just in case, we need the lock */ 2230 2231 /* Check that mandatory funcs exists */ 2232 if (d->enqueue == NULL || d->dequeue == NULL) { 2233 D("missing enqueue or dequeue for %s", d->name); 2234 return 1; 2235 } 2236 2237 /* Search if scheduler already exists */ 2238 DN_BH_WLOCK(); 2239 SLIST_FOREACH(s, &dn_cfg.schedlist, next) { 2240 if (strcmp(s->name, d->name) == 0) { 2241 D("%s already loaded", d->name); 2242 break; /* scheduler already exists */ 2243 } 2244 } 2245 if (s == NULL) 2246 SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next); 2247 DN_BH_WUNLOCK(); 2248 D("dn_sched %s %sloaded", d->name, s ? "not ":""); 2249 return s ? 1 : 0; 2250} 2251 2252static int 2253unload_dn_sched(struct dn_alg *s) 2254{ 2255 struct dn_alg *tmp, *r; 2256 int err = EINVAL; 2257 2258 D("called for %s", s->name); 2259 2260 DN_BH_WLOCK(); 2261 SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) { 2262 if (strcmp(s->name, r->name) != 0) 2263 continue; 2264 D("ref_count = %d", r->ref_count); 2265 err = (r->ref_count != 0) ? EBUSY : 0; 2266 if (err == 0) 2267 SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next); 2268 break; 2269 } 2270 DN_BH_WUNLOCK(); 2271 D("dn_sched %s %sunloaded", s->name, err ? "not ":""); 2272 return err; 2273} 2274 2275int 2276dn_sched_modevent(module_t mod, int cmd, void *arg) 2277{ 2278 struct dn_alg *sch = arg; 2279 2280 if (cmd == MOD_LOAD) 2281 return load_dn_sched(sch); 2282 else if (cmd == MOD_UNLOAD) 2283 return unload_dn_sched(sch); 2284 else 2285 return EINVAL; 2286} 2287 2288static moduledata_t dummynet_mod = { 2289 "dummynet", dummynet_modevent, NULL 2290}; 2291 2292#define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN 2293#define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ 2294DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); 2295MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); 2296MODULE_VERSION(dummynet, 1); 2297 2298/* 2299 * Starting up. Done in order after dummynet_modevent() has been called. 2300 * VNET_SYSINIT is also called for each existing vnet and each new vnet. 2301 */ 2302//VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL); 2303 2304/* 2305 * Shutdown handlers up shop. These are done in REVERSE ORDER, but still 2306 * after dummynet_modevent() has been called. Not called on reboot. 2307 * VNET_SYSUNINIT is also called for each exiting vnet as it exits. 2308 * or when the module is unloaded. 2309 */ 2310//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL); 2311 2312/* end of file */
|