Deleted Added
full compact
ng_pipe.c (184205) ng_pipe.c (196019)
1/*
2 * Copyright (c) 2004-2008 University of Zagreb
3 * Copyright (c) 2007-2008 FreeBSD Foundation
4 *
5 * This software was developed by the University of Zagreb and the
6 * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
7 * FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
1/*
2 * Copyright (c) 2004-2008 University of Zagreb
3 * Copyright (c) 2007-2008 FreeBSD Foundation
4 *
5 * This software was developed by the University of Zagreb and the
6 * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
7 * FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * $FreeBSD: head/sys/netgraph/ng_pipe.c 184205 2008-10-23 15:53:51Z des $
30 * $FreeBSD: head/sys/netgraph/ng_pipe.c 196019 2009-08-01 19:26:27Z rwatson $
31 */
32
33/*
34 * This node permits simple traffic shaping by emulating bandwidth
35 * and delay, as well as random packet losses.
36 * The node has two hooks, upper and lower. Traffic flowing from upper to
37 * lower hook is referenced as downstream, and vice versa. Parameters for
38 * both directions can be set separately, except for delay.
39 */
40
41
42#include <sys/param.h>
43#include <sys/errno.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/time.h>
31 */
32
33/*
34 * This node permits simple traffic shaping by emulating bandwidth
35 * and delay, as well as random packet losses.
36 * The node has two hooks, upper and lower. Traffic flowing from upper to
37 * lower hook is referenced as downstream, and vice versa. Parameters for
38 * both directions can be set separately, except for delay.
39 */
40
41
42#include <sys/param.h>
43#include <sys/errno.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/time.h>
49#include <sys/vimage.h>
50
51#include <vm/uma.h>
52
49
50#include <vm/uma.h>
51
52#include <net/vnet.h>
53
53#include <netinet/in.h>
54#include <netinet/in_systm.h>
55#include <netinet/ip.h>
56
57#include <netgraph/ng_message.h>
58#include <netgraph/netgraph.h>
59#include <netgraph/ng_parse.h>
60#include <netgraph/ng_pipe.h>
61
62static MALLOC_DEFINE(M_NG_PIPE, "ng_pipe", "ng_pipe");
63
64struct mtx ng_pipe_giant;
65
66/* Packet header struct */
67struct ngp_hdr {
68 TAILQ_ENTRY(ngp_hdr) ngp_link; /* next pkt in queue */
69 struct timeval when; /* this packet's due time */
70 struct mbuf *m; /* ptr to the packet data */
71};
72TAILQ_HEAD(p_head, ngp_hdr);
73
74/* FIFO queue struct */
75struct ngp_fifo {
76 TAILQ_ENTRY(ngp_fifo) fifo_le; /* list of active queues only */
77 struct p_head packet_head; /* FIFO queue head */
78 u_int32_t hash; /* flow signature */
79 struct timeval vtime; /* virtual time, for WFQ */
80 u_int32_t rr_deficit; /* for DRR */
81 u_int32_t packets; /* # of packets in this queue */
82};
83
84/* Per hook info */
85struct hookinfo {
86 hook_p hook;
87 int noqueue; /* bypass any processing */
88 TAILQ_HEAD(, ngp_fifo) fifo_head; /* FIFO queues */
89 TAILQ_HEAD(, ngp_hdr) qout_head; /* delay queue head */
90 LIST_ENTRY(hookinfo) active_le; /* active hooks */
91 struct timeval qin_utime;
92 struct ng_pipe_hookcfg cfg;
93 struct ng_pipe_hookrun run;
94 struct ng_pipe_hookstat stats;
95 uint64_t *ber_p; /* loss_p(BER,psize) map */
96};
97
98/* Per node info */
99struct node_priv {
100 u_int64_t delay;
101 u_int32_t overhead;
102 u_int32_t header_offset;
103 struct hookinfo lower;
104 struct hookinfo upper;
105};
106typedef struct node_priv *priv_p;
107
108/* Macro for calculating the virtual time for packet dequeueing in WFQ */
109#define FIFO_VTIME_SORT(plen) \
110 if (hinfo->cfg.wfq && hinfo->cfg.bandwidth) { \
111 ngp_f->vtime.tv_usec = now->tv_usec + ((uint64_t) (plen) \
112 + priv->overhead ) * hinfo->run.fifo_queues * \
113 8000000 / hinfo->cfg.bandwidth; \
114 ngp_f->vtime.tv_sec = now->tv_sec + \
115 ngp_f->vtime.tv_usec / 1000000; \
116 ngp_f->vtime.tv_usec = ngp_f->vtime.tv_usec % 1000000; \
117 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le) \
118 if (ngp_f1->vtime.tv_sec > ngp_f->vtime.tv_sec || \
119 (ngp_f1->vtime.tv_sec == ngp_f->vtime.tv_sec && \
120 ngp_f1->vtime.tv_usec > ngp_f->vtime.tv_usec)) \
121 break; \
122 if (ngp_f1 == NULL) \
123 TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \
124 else \
125 TAILQ_INSERT_BEFORE(ngp_f1, ngp_f, fifo_le); \
126 } else \
127 TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \
128
129
130static void parse_cfg(struct ng_pipe_hookcfg *, struct ng_pipe_hookcfg *,
131 struct hookinfo *, priv_p);
132static void pipe_dequeue(struct hookinfo *, struct timeval *);
133static void pipe_scheduler(void *);
134static void pipe_poll(void);
135static int ngp_modevent(module_t, int, void *);
136
137/* linked list of active "pipe" hooks */
138static LIST_HEAD(, hookinfo) active_head;
139static int active_gen_id = 0;
140
141/* timeout handle for pipe_scheduler */
142static struct callout polling_timer;
143
144/* zone for storing ngp_hdr-s */
145static uma_zone_t ngp_zone;
146
147/* Netgraph methods */
148static ng_constructor_t ngp_constructor;
149static ng_rcvmsg_t ngp_rcvmsg;
150static ng_shutdown_t ngp_shutdown;
151static ng_newhook_t ngp_newhook;
152static ng_rcvdata_t ngp_rcvdata;
153static ng_disconnect_t ngp_disconnect;
154
155/* Parse type for struct ng_pipe_hookstat */
156static const struct ng_parse_struct_field
157 ng_pipe_hookstat_type_fields[] = NG_PIPE_HOOKSTAT_INFO;
158static const struct ng_parse_type ng_pipe_hookstat_type = {
159 &ng_parse_struct_type,
160 &ng_pipe_hookstat_type_fields
161};
162
163/* Parse type for struct ng_pipe_stats */
164static const struct ng_parse_struct_field ng_pipe_stats_type_fields[] =
165 NG_PIPE_STATS_INFO(&ng_pipe_hookstat_type);
166static const struct ng_parse_type ng_pipe_stats_type = {
167 &ng_parse_struct_type,
168 &ng_pipe_stats_type_fields
169};
170
171/* Parse type for struct ng_pipe_hookrun */
172static const struct ng_parse_struct_field
173 ng_pipe_hookrun_type_fields[] = NG_PIPE_HOOKRUN_INFO;
174static const struct ng_parse_type ng_pipe_hookrun_type = {
175 &ng_parse_struct_type,
176 &ng_pipe_hookrun_type_fields
177};
178
179/* Parse type for struct ng_pipe_run */
180static const struct ng_parse_struct_field
181 ng_pipe_run_type_fields[] = NG_PIPE_RUN_INFO(&ng_pipe_hookrun_type);
182static const struct ng_parse_type ng_pipe_run_type = {
183 &ng_parse_struct_type,
184 &ng_pipe_run_type_fields
185};
186
187/* Parse type for struct ng_pipe_hookcfg */
188static const struct ng_parse_struct_field
189 ng_pipe_hookcfg_type_fields[] = NG_PIPE_HOOKCFG_INFO;
190static const struct ng_parse_type ng_pipe_hookcfg_type = {
191 &ng_parse_struct_type,
192 &ng_pipe_hookcfg_type_fields
193};
194
195/* Parse type for struct ng_pipe_cfg */
196static const struct ng_parse_struct_field
197 ng_pipe_cfg_type_fields[] = NG_PIPE_CFG_INFO(&ng_pipe_hookcfg_type);
198static const struct ng_parse_type ng_pipe_cfg_type = {
199 &ng_parse_struct_type,
200 &ng_pipe_cfg_type_fields
201};
202
203/* List of commands and how to convert arguments to/from ASCII */
204static const struct ng_cmdlist ngp_cmds[] = {
205 {
206 .cookie = NGM_PIPE_COOKIE,
207 .cmd = NGM_PIPE_GET_STATS,
208 .name = "getstats",
209 .respType = &ng_pipe_stats_type
210 },
211 {
212 .cookie = NGM_PIPE_COOKIE,
213 .cmd = NGM_PIPE_CLR_STATS,
214 .name = "clrstats"
215 },
216 {
217 .cookie = NGM_PIPE_COOKIE,
218 .cmd = NGM_PIPE_GETCLR_STATS,
219 .name = "getclrstats",
220 .respType = &ng_pipe_stats_type
221 },
222 {
223 .cookie = NGM_PIPE_COOKIE,
224 .cmd = NGM_PIPE_GET_RUN,
225 .name = "getrun",
226 .respType = &ng_pipe_run_type
227 },
228 {
229 .cookie = NGM_PIPE_COOKIE,
230 .cmd = NGM_PIPE_GET_CFG,
231 .name = "getcfg",
232 .respType = &ng_pipe_cfg_type
233 },
234 {
235 .cookie = NGM_PIPE_COOKIE,
236 .cmd = NGM_PIPE_SET_CFG,
237 .name = "setcfg",
238 .mesgType = &ng_pipe_cfg_type,
239 },
240 { 0 }
241};
242
243/* Netgraph type descriptor */
244static struct ng_type ng_pipe_typestruct = {
245 .version = NG_ABI_VERSION,
246 .name = NG_PIPE_NODE_TYPE,
247 .mod_event = ngp_modevent,
248 .constructor = ngp_constructor,
249 .shutdown = ngp_shutdown,
250 .rcvmsg = ngp_rcvmsg,
251 .newhook = ngp_newhook,
252 .rcvdata = ngp_rcvdata,
253 .disconnect = ngp_disconnect,
254 .cmdlist = ngp_cmds
255};
256NETGRAPH_INIT(pipe, &ng_pipe_typestruct);
257
258/* Node constructor */
259static int
260ngp_constructor(node_p node)
261{
262 priv_p priv;
263
264 priv = malloc(sizeof(*priv), M_NG_PIPE, M_ZERO | M_NOWAIT);
265 if (priv == NULL)
266 return (ENOMEM);
267 NG_NODE_SET_PRIVATE(node, priv);
268
269 return (0);
270}
271
272/* Add a hook */
273static int
274ngp_newhook(node_p node, hook_p hook, const char *name)
275{
276 const priv_p priv = NG_NODE_PRIVATE(node);
277 struct hookinfo *hinfo;
278
279 if (strcmp(name, NG_PIPE_HOOK_UPPER) == 0) {
280 bzero(&priv->upper, sizeof(priv->upper));
281 priv->upper.hook = hook;
282 NG_HOOK_SET_PRIVATE(hook, &priv->upper);
283 } else if (strcmp(name, NG_PIPE_HOOK_LOWER) == 0) {
284 bzero(&priv->lower, sizeof(priv->lower));
285 priv->lower.hook = hook;
286 NG_HOOK_SET_PRIVATE(hook, &priv->lower);
287 } else
288 return (EINVAL);
289
290 /* Load non-zero initial cfg values */
291 hinfo = NG_HOOK_PRIVATE(hook);
292 hinfo->cfg.qin_size_limit = 50;
293 hinfo->cfg.fifo = 1;
294 hinfo->cfg.droptail = 1;
295 TAILQ_INIT(&hinfo->fifo_head);
296 TAILQ_INIT(&hinfo->qout_head);
297 return (0);
298}
299
300/* Receive a control message */
301static int
302ngp_rcvmsg(node_p node, item_p item, hook_p lasthook)
303{
304 const priv_p priv = NG_NODE_PRIVATE(node);
305 struct ng_mesg *resp = NULL;
306 struct ng_mesg *msg;
307 struct ng_pipe_stats *stats;
308 struct ng_pipe_run *run;
309 struct ng_pipe_cfg *cfg;
310 int error = 0;
311
312 mtx_lock(&ng_pipe_giant);
313
314 NGI_GET_MSG(item, msg);
315 switch (msg->header.typecookie) {
316 case NGM_PIPE_COOKIE:
317 switch (msg->header.cmd) {
318 case NGM_PIPE_GET_STATS:
319 case NGM_PIPE_CLR_STATS:
320 case NGM_PIPE_GETCLR_STATS:
321 if (msg->header.cmd != NGM_PIPE_CLR_STATS) {
322 NG_MKRESPONSE(resp, msg,
323 sizeof(*stats), M_NOWAIT);
324 if (resp == NULL) {
325 error = ENOMEM;
326 break;
327 }
328 stats = (struct ng_pipe_stats *)resp->data;
329 bcopy(&priv->upper.stats, &stats->downstream,
330 sizeof(stats->downstream));
331 bcopy(&priv->lower.stats, &stats->upstream,
332 sizeof(stats->upstream));
333 }
334 if (msg->header.cmd != NGM_PIPE_GET_STATS) {
335 bzero(&priv->upper.stats,
336 sizeof(priv->upper.stats));
337 bzero(&priv->lower.stats,
338 sizeof(priv->lower.stats));
339 }
340 break;
341 case NGM_PIPE_GET_RUN:
342 NG_MKRESPONSE(resp, msg, sizeof(*run), M_NOWAIT);
343 if (resp == NULL) {
344 error = ENOMEM;
345 break;
346 }
347 run = (struct ng_pipe_run *)resp->data;
348 bcopy(&priv->upper.run, &run->downstream,
349 sizeof(run->downstream));
350 bcopy(&priv->lower.run, &run->upstream,
351 sizeof(run->upstream));
352 break;
353 case NGM_PIPE_GET_CFG:
354 NG_MKRESPONSE(resp, msg, sizeof(*cfg), M_NOWAIT);
355 if (resp == NULL) {
356 error = ENOMEM;
357 break;
358 }
359 cfg = (struct ng_pipe_cfg *)resp->data;
360 bcopy(&priv->upper.cfg, &cfg->downstream,
361 sizeof(cfg->downstream));
362 bcopy(&priv->lower.cfg, &cfg->upstream,
363 sizeof(cfg->upstream));
364 cfg->delay = priv->delay;
365 cfg->overhead = priv->overhead;
366 cfg->header_offset = priv->header_offset;
367 if (cfg->upstream.bandwidth ==
368 cfg->downstream.bandwidth) {
369 cfg->bandwidth = cfg->upstream.bandwidth;
370 cfg->upstream.bandwidth = 0;
371 cfg->downstream.bandwidth = 0;
372 } else
373 cfg->bandwidth = 0;
374 break;
375 case NGM_PIPE_SET_CFG:
376 cfg = (struct ng_pipe_cfg *)msg->data;
377 if (msg->header.arglen != sizeof(*cfg)) {
378 error = EINVAL;
379 break;
380 }
381
382 if (cfg->delay == -1)
383 priv->delay = 0;
384 else if (cfg->delay > 0 && cfg->delay < 10000000)
385 priv->delay = cfg->delay;
386
387 if (cfg->bandwidth == -1) {
388 priv->upper.cfg.bandwidth = 0;
389 priv->lower.cfg.bandwidth = 0;
390 priv->overhead = 0;
391 } else if (cfg->bandwidth >= 100 &&
392 cfg->bandwidth <= 1000000000) {
393 priv->upper.cfg.bandwidth = cfg->bandwidth;
394 priv->lower.cfg.bandwidth = cfg->bandwidth;
395 if (cfg->bandwidth >= 10000000)
396 priv->overhead = 8+4+12; /* Ethernet */
397 else
398 priv->overhead = 10; /* HDLC */
399 }
400
401 if (cfg->overhead == -1)
402 priv->overhead = 0;
403 else if (cfg->overhead > 0 && cfg->overhead < 256)
404 priv->overhead = cfg->overhead;
405
406 if (cfg->header_offset == -1)
407 priv->header_offset = 0;
408 else if (cfg->header_offset > 0 &&
409 cfg->header_offset < 64)
410 priv->header_offset = cfg->header_offset;
411
412 parse_cfg(&priv->upper.cfg, &cfg->downstream,
413 &priv->upper, priv);
414 parse_cfg(&priv->lower.cfg, &cfg->upstream,
415 &priv->lower, priv);
416 break;
417 default:
418 error = EINVAL;
419 break;
420 }
421 break;
422 default:
423 error = EINVAL;
424 break;
425 }
426 NG_RESPOND_MSG(error, node, item, resp);
427 NG_FREE_MSG(msg);
428
429 mtx_unlock(&ng_pipe_giant);
430
431 return (error);
432}
433
434static void
435parse_cfg(struct ng_pipe_hookcfg *current, struct ng_pipe_hookcfg *new,
436 struct hookinfo *hinfo, priv_p priv)
437{
438
439 if (new->ber == -1) {
440 current->ber = 0;
441 if (hinfo->ber_p) {
442 free(hinfo->ber_p, M_NG_PIPE);
443 hinfo->ber_p = NULL;
444 }
445 } else if (new->ber >= 1 && new->ber <= 1000000000000) {
446 static const uint64_t one = 0x1000000000000; /* = 2^48 */
447 uint64_t p0, p;
448 uint32_t fsize, i;
449
450 if (hinfo->ber_p == NULL)
451 hinfo->ber_p = malloc(\
452 (MAX_FSIZE + MAX_OHSIZE)*sizeof(uint64_t), \
453 M_NG_PIPE, M_NOWAIT);
454 current->ber = new->ber;
455
456 /*
457 * For given BER and each frame size N (in bytes) calculate
458 * the probability P_OK that the frame is clean:
459 *
460 * P_OK(BER,N) = (1 - 1/BER)^(N*8)
461 *
462 * We use a 64-bit fixed-point format with decimal point
463 * positioned between bits 47 and 48.
464 */
465 p0 = one - one / new->ber;
466 p = one;
467 for (fsize = 0; fsize < MAX_FSIZE + MAX_OHSIZE; fsize++) {
468 hinfo->ber_p[fsize] = p;
469 for (i=0; i<8; i++)
470 p = (p*(p0&0xffff)>>48) + \
471 (p*((p0>>16)&0xffff)>>32) + \
472 (p*(p0>>32)>>16);
473 }
474 }
475
476 if (new->qin_size_limit == -1)
477 current->qin_size_limit = 0;
478 else if (new->qin_size_limit >= 5)
479 current->qin_size_limit = new->qin_size_limit;
480
481 if (new->qout_size_limit == -1)
482 current->qout_size_limit = 0;
483 else if (new->qout_size_limit >= 5)
484 current->qout_size_limit = new->qout_size_limit;
485
486 if (new->duplicate == -1)
487 current->duplicate = 0;
488 else if (new->duplicate > 0 && new->duplicate <= 50)
489 current->duplicate = new->duplicate;
490
491 if (new->fifo) {
492 current->fifo = 1;
493 current->wfq = 0;
494 current->drr = 0;
495 }
496
497 if (new->wfq) {
498 current->fifo = 0;
499 current->wfq = 1;
500 current->drr = 0;
501 }
502
503 if (new->drr) {
504 current->fifo = 0;
505 current->wfq = 0;
506 /* DRR quantum */
507 if (new->drr >= 32)
508 current->drr = new->drr;
509 else
510 current->drr = 2048; /* default quantum */
511 }
512
513 if (new->droptail) {
514 current->droptail = 1;
515 current->drophead = 0;
516 }
517
518 if (new->drophead) {
519 current->droptail = 0;
520 current->drophead = 1;
521 }
522
523 if (new->bandwidth == -1) {
524 current->bandwidth = 0;
525 current->fifo = 1;
526 current->wfq = 0;
527 current->drr = 0;
528 } else if (new->bandwidth >= 100 && new->bandwidth <= 1000000000)
529 current->bandwidth = new->bandwidth;
530
531 if (current->bandwidth | priv->delay |
532 current->duplicate | current->ber)
533 hinfo->noqueue = 0;
534 else
535 hinfo->noqueue = 1;
536}
537
538/*
539 * Compute a hash signature for a packet. This function suffers from the
540 * NIH sindrome, so probably it would be wise to look around what other
541 * folks have found out to be a good and efficient IP hash function...
542 */
543static int
544ip_hash(struct mbuf *m, int offset)
545{
546 u_int64_t i;
547 struct ip *ip = (struct ip *)(mtod(m, u_char *) + offset);
548
549 if (m->m_len < sizeof(struct ip) + offset ||
550 ip->ip_v != 4 || ip->ip_hl << 2 != sizeof(struct ip))
551 return 0;
552
553 i = ((u_int64_t) ip->ip_src.s_addr ^
554 ((u_int64_t) ip->ip_src.s_addr << 13) ^
555 ((u_int64_t) ip->ip_dst.s_addr << 7) ^
556 ((u_int64_t) ip->ip_dst.s_addr << 19));
557 return (i ^ (i >> 32));
558}
559
560/*
561 * Receive data on a hook - both in upstream and downstream direction.
562 * We put the frame on the inbound queue, and try to initiate dequeuing
563 * sequence immediately. If inbound queue is full, discard one frame
564 * depending on dropping policy (from the head or from the tail of the
565 * queue).
566 */
567static int
568ngp_rcvdata(hook_p hook, item_p item)
569{
570 struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
571 const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
572 struct timeval uuptime;
573 struct timeval *now = &uuptime;
574 struct ngp_fifo *ngp_f = NULL, *ngp_f1;
575 struct ngp_hdr *ngp_h = NULL;
576 struct mbuf *m;
577 int hash;
578 int error = 0;
579
580 if (hinfo->noqueue) {
581 struct hookinfo *dest;
582 if (hinfo == &priv->lower)
583 dest = &priv->upper;
584 else
585 dest = &priv->lower;
586 NG_FWD_ITEM_HOOK(error, item, dest->hook);
587 return error;
588 }
589
590 mtx_lock(&ng_pipe_giant);
591 microuptime(now);
592
593 /*
594 * Attach us to the list of active ng_pipes if this was an empty
595 * one before, and also update the queue service deadline time.
596 */
597 if (hinfo->run.qin_frames == 0) {
598 struct timeval *when = &hinfo->qin_utime;
599 if (when->tv_sec < now->tv_sec || (when->tv_sec == now->tv_sec
600 && when->tv_usec < now->tv_usec)) {
601 when->tv_sec = now->tv_sec;
602 when->tv_usec = now->tv_usec;
603 }
604 if (hinfo->run.qout_frames == 0)
605 LIST_INSERT_HEAD(&active_head, hinfo, active_le);
606 }
607
608 /* Populate the packet header */
609 ngp_h = uma_zalloc(ngp_zone, M_NOWAIT);
610 KASSERT((ngp_h != NULL), ("ngp_h zalloc failed (1)"));
611 NGI_GET_M(item, m);
612 KASSERT(m != NULL, ("NGI_GET_M failed"));
613 ngp_h->m = m;
614 NG_FREE_ITEM(item);
615
616 if (hinfo->cfg.fifo)
617 hash = 0; /* all packets go into a single FIFO queue */
618 else
619 hash = ip_hash(m, priv->header_offset);
620
621 /* Find the appropriate FIFO queue for the packet and enqueue it*/
622 TAILQ_FOREACH(ngp_f, &hinfo->fifo_head, fifo_le)
623 if (hash == ngp_f->hash)
624 break;
625 if (ngp_f == NULL) {
626 ngp_f = uma_zalloc(ngp_zone, M_NOWAIT);
627 KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (2)"));
628 TAILQ_INIT(&ngp_f->packet_head);
629 ngp_f->hash = hash;
630 ngp_f->packets = 1;
631 ngp_f->rr_deficit = hinfo->cfg.drr; /* DRR quantum */
632 hinfo->run.fifo_queues++;
633 TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
634 FIFO_VTIME_SORT(m->m_pkthdr.len);
635 } else {
636 TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
637 ngp_f->packets++;
638 }
639 hinfo->run.qin_frames++;
640 hinfo->run.qin_octets += m->m_pkthdr.len;
641
642 /* Discard a frame if inbound queue limit has been reached */
643 if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) {
644 struct mbuf *m1;
645 int longest = 0;
646
647 /* Find the longest queue */
648 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)
649 if (ngp_f1->packets > longest) {
650 longest = ngp_f1->packets;
651 ngp_f = ngp_f1;
652 }
653
654 /* Drop a frame from the queue head/tail, depending on cfg */
655 if (hinfo->cfg.drophead)
656 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
657 else
658 ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head);
659 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
660 m1 = ngp_h->m;
661 uma_zfree(ngp_zone, ngp_h);
662 hinfo->run.qin_octets -= m1->m_pkthdr.len;
663 hinfo->stats.in_disc_octets += m1->m_pkthdr.len;
664 m_freem(m1);
665 if (--(ngp_f->packets) == 0) {
666 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
667 uma_zfree(ngp_zone, ngp_f);
668 hinfo->run.fifo_queues--;
669 }
670 hinfo->run.qin_frames--;
671 hinfo->stats.in_disc_frames++;
672 } else if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) {
673 struct mbuf *m1;
674 int longest = 0;
675
676 /* Find the longest queue */
677 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)
678 if (ngp_f1->packets > longest) {
679 longest = ngp_f1->packets;
680 ngp_f = ngp_f1;
681 }
682
683 /* Drop a frame from the queue head/tail, depending on cfg */
684 if (hinfo->cfg.drophead)
685 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
686 else
687 ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head);
688 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
689 m1 = ngp_h->m;
690 uma_zfree(ngp_zone, ngp_h);
691 hinfo->run.qin_octets -= m1->m_pkthdr.len;
692 hinfo->stats.in_disc_octets += m1->m_pkthdr.len;
693 m_freem(m1);
694 if (--(ngp_f->packets) == 0) {
695 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
696 uma_zfree(ngp_zone, ngp_f);
697 hinfo->run.fifo_queues--;
698 }
699 hinfo->run.qin_frames--;
700 hinfo->stats.in_disc_frames++;
701 }
702
703 /*
704 * Try to start the dequeuing process immediately. We must
705 * hold the ng_pipe_giant lock here and pipe_dequeue() will
706 * release it
707 */
708 pipe_dequeue(hinfo, now);
709
710 return (0);
711}
712
713
714/*
715 * Dequeueing sequence - we basically do the following:
716 * 1) Try to extract the frame from the inbound (bandwidth) queue;
717 * 2) In accordance to BER specified, discard the frame randomly;
718 * 3) If the frame survives BER, prepend it with delay info and move it
719 * to outbound (delay) queue;
720 * 4) Loop to 2) until bandwidth quota for this timeslice is reached, or
721 * inbound queue is flushed completely;
722 * 5) Extract the first frame from the outbound queue, if it's time has
723 * come. Queue the frame for transmission on the outbound hook;
724 * 6) Loop to 5) until outbound queue is flushed completely, or the next
725 * frame in the queue is not scheduled to be dequeued yet;
726 * 7) Transimit all frames queued in 5)
727 *
728 * Note: the caller must hold the ng_pipe_giant lock; this function
729 * returns with the lock released.
730 */
731static void
732pipe_dequeue(struct hookinfo *hinfo, struct timeval *now) {
733 static uint64_t rand, oldrand;
734 const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hinfo->hook));
735 struct hookinfo *dest;
736 struct ngp_fifo *ngp_f, *ngp_f1;
737 struct ngp_hdr *ngp_h;
738 struct timeval *when;
739 struct mbuf *q_head = NULL;
740 struct mbuf *q_tail = NULL;
741 struct mbuf *m;
742 int error = 0;
743
744 /* Which one is the destination hook? */
745 if (hinfo == &priv->lower)
746 dest = &priv->upper;
747 else
748 dest = &priv->lower;
749
750 /* Bandwidth queue processing */
751 while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
752 when = &hinfo->qin_utime;
753 if (when->tv_sec > now->tv_sec || (when->tv_sec == now->tv_sec
754 && when->tv_usec > now->tv_usec))
755 break;
756
757 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
758 m = ngp_h->m;
759
760 /* Deficit Round Robin (DRR) processing */
761 if (hinfo->cfg.drr) {
762 if (ngp_f->rr_deficit >= m->m_pkthdr.len) {
763 ngp_f->rr_deficit -= m->m_pkthdr.len;
764 } else {
765 ngp_f->rr_deficit += hinfo->cfg.drr;
766 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
767 TAILQ_INSERT_TAIL(&hinfo->fifo_head,
768 ngp_f, fifo_le);
769 continue;
770 }
771 }
772
773 /*
774 * Either create a duplicate and pass it on, or dequeue
775 * the original packet...
776 */
777 if (hinfo->cfg.duplicate &&
778 random() % 100 <= hinfo->cfg.duplicate) {
779 ngp_h = uma_zalloc(ngp_zone, M_NOWAIT);
780 KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (3)"));
781 ngp_h->m = m_dup(m, M_NOWAIT);
782 KASSERT(ngp_h->m != NULL, ("m_dup failed"));
783 } else {
784 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
785 hinfo->run.qin_frames--;
786 hinfo->run.qin_octets -= m->m_pkthdr.len;
787 ngp_f->packets--;
788 }
789
790 /* Calculate the serialization delay */
791 if (hinfo->cfg.bandwidth) {
792 hinfo->qin_utime.tv_usec += ((uint64_t) m->m_pkthdr.len
793 + priv->overhead ) *
794 8000000 / hinfo->cfg.bandwidth;
795 hinfo->qin_utime.tv_sec +=
796 hinfo->qin_utime.tv_usec / 1000000;
797 hinfo->qin_utime.tv_usec =
798 hinfo->qin_utime.tv_usec % 1000000;
799 }
800 when = &ngp_h->when;
801 when->tv_sec = hinfo->qin_utime.tv_sec;
802 when->tv_usec = hinfo->qin_utime.tv_usec;
803
804 /* Sort / rearrange inbound queues */
805 if (ngp_f->packets) {
806 if (hinfo->cfg.wfq) {
807 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
808 FIFO_VTIME_SORT(TAILQ_FIRST(
809 &ngp_f->packet_head)->m->m_pkthdr.len)
810 }
811 } else {
812 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
813 uma_zfree(ngp_zone, ngp_f);
814 hinfo->run.fifo_queues--;
815 }
816
817 /* Randomly discard the frame, according to BER setting */
818 if (hinfo->cfg.ber &&
819 ((oldrand = rand) ^ (rand = random())<<17) >=
820 hinfo->ber_p[priv->overhead + m->m_pkthdr.len] ) {
821 hinfo->stats.out_disc_frames++;
822 hinfo->stats.out_disc_octets += m->m_pkthdr.len;
823 uma_zfree(ngp_zone, ngp_h);
824 m_freem(m);
825 continue;
826 }
827
828 /* Discard frame if outbound queue size limit exceeded */
829 if (hinfo->cfg.qout_size_limit &&
830 hinfo->run.qout_frames>=hinfo->cfg.qout_size_limit) {
831 hinfo->stats.out_disc_frames++;
832 hinfo->stats.out_disc_octets += m->m_pkthdr.len;
833 uma_zfree(ngp_zone, ngp_h);
834 m_freem(m);
835 continue;
836 }
837
838 /* Calculate the propagation delay */
839 when->tv_usec += priv->delay;
840 when->tv_sec += when->tv_usec / 1000000;
841 when->tv_usec = when->tv_usec % 1000000;
842
843 /* Put the frame into the delay queue */
844 TAILQ_INSERT_TAIL(&hinfo->qout_head, ngp_h, ngp_link);
845 hinfo->run.qout_frames++;
846 hinfo->run.qout_octets += m->m_pkthdr.len;
847 }
848
849 /* Delay queue processing */
850 while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
851 struct mbuf *m = ngp_h->m;
852
853 when = &ngp_h->when;
854 if (when->tv_sec > now->tv_sec ||
855 (when->tv_sec == now->tv_sec &&
856 when->tv_usec > now->tv_usec))
857 break;
858
859 /* Update outbound queue stats */
860 hinfo->stats.fwd_frames++;
861 hinfo->stats.fwd_octets += m->m_pkthdr.len;
862 hinfo->run.qout_frames--;
863 hinfo->run.qout_octets -= m->m_pkthdr.len;
864
865 /* Dequeue the packet from qout */
866 TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
867 uma_zfree(ngp_zone, ngp_h);
868
869 /* Enqueue locally for sending downstream */
870 if (q_head == NULL)
871 q_head = m;
872 if (q_tail)
873 q_tail->m_nextpkt = m;
874 q_tail = m;
875 m->m_nextpkt = NULL;
876 }
877
878 /* If both queues are empty detach us from the list of active queues */
879 if (hinfo->run.qin_frames + hinfo->run.qout_frames == 0) {
880 LIST_REMOVE(hinfo, active_le);
881 active_gen_id++;
882 }
883
884 mtx_unlock(&ng_pipe_giant);
885
886 while ((m = q_head) != NULL) {
887 q_head = m->m_nextpkt;
888 m->m_nextpkt = NULL;
889 NG_SEND_DATA(error, dest->hook, m, meta);
890 }
891}
892
893
894/*
895 * This routine is called on every clock tick. We poll all nodes/hooks
896 * for queued frames by calling pipe_dequeue().
897 */
898static void
899pipe_scheduler(void *arg)
900{
901 pipe_poll();
902
903 /* Reschedule */
904 callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
905}
906
907
908/*
909 * Traverse the list of all active hooks and attempt to dequeue
910 * some packets. Hooks with empty queues are not traversed since
911 * they are not linked into this list.
912 */
913static void
914pipe_poll(void)
915{
916 struct hookinfo *hinfo;
917 struct timeval now;
918 int old_gen_id = active_gen_id;
919
920 mtx_lock(&ng_pipe_giant);
921 microuptime(&now);
922 LIST_FOREACH(hinfo, &active_head, active_le) {
923 CURVNET_SET(NG_HOOK_NODE(hinfo->hook)->nd_vnet);
924 pipe_dequeue(hinfo, &now);
925 CURVNET_RESTORE();
926 mtx_lock(&ng_pipe_giant);
927 if (old_gen_id != active_gen_id) {
928 /* the list was updated; restart traversing */
929 hinfo = LIST_FIRST(&active_head);
930 if (hinfo == NULL)
931 break;
932 old_gen_id = active_gen_id;
933 continue;
934 }
935 }
936 mtx_unlock(&ng_pipe_giant);
937}
938
939
940/*
941 * Shutdown processing
942 *
943 * This is tricky. If we have both a lower and upper hook, then we
944 * probably want to extricate ourselves and leave the two peers
945 * still linked to each other. Otherwise we should just shut down as
946 * a normal node would.
947 */
948static int
949ngp_shutdown(node_p node)
950{
951 const priv_p priv = NG_NODE_PRIVATE(node);
952
953 if (priv->lower.hook && priv->upper.hook)
954 ng_bypass(priv->lower.hook, priv->upper.hook);
955 else {
956 if (priv->upper.hook != NULL)
957 ng_rmhook_self(priv->upper.hook);
958 if (priv->lower.hook != NULL)
959 ng_rmhook_self(priv->lower.hook);
960 }
961 NG_NODE_UNREF(node);
962 free(priv, M_NG_PIPE);
963 return (0);
964}
965
966
967/*
968 * Hook disconnection
969 */
970static int
971ngp_disconnect(hook_p hook)
972{
973 struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
974 struct ngp_fifo *ngp_f;
975 struct ngp_hdr *ngp_h;
976 int removed = 0;
977
978 mtx_lock(&ng_pipe_giant);
979
980 KASSERT(hinfo != NULL, ("%s: null info", __FUNCTION__));
981 hinfo->hook = NULL;
982
983 /* Flush all fifo queues associated with the hook */
984 while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
985 while ((ngp_h = TAILQ_FIRST(&ngp_f->packet_head))) {
986 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
987 m_freem(ngp_h->m);
988 uma_zfree(ngp_zone, ngp_h);
989 removed++;
990 }
991 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
992 uma_zfree(ngp_zone, ngp_f);
993 }
994
995 /* Flush the delay queue */
996 while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
997 TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
998 m_freem(ngp_h->m);
999 uma_zfree(ngp_zone, ngp_h);
1000 removed++;
1001 }
1002
1003 /*
1004 * Both queues should be empty by now, so detach us from
1005 * the list of active queues
1006 */
1007 if (removed) {
1008 LIST_REMOVE(hinfo, active_le);
1009 active_gen_id++;
1010 }
1011 if (hinfo->run.qin_frames + hinfo->run.qout_frames != removed)
1012 printf("Mismatch: queued=%d but removed=%d !?!",
1013 hinfo->run.qin_frames + hinfo->run.qout_frames, removed);
1014
1015 /* Release the packet loss probability table (BER) */
1016 if (hinfo->ber_p)
1017 free(hinfo->ber_p, M_NG_PIPE);
1018
1019 mtx_unlock(&ng_pipe_giant);
1020
1021 return (0);
1022}
1023
1024static int
1025ngp_modevent(module_t mod, int type, void *unused)
1026{
1027 int error = 0;
1028
1029 switch (type) {
1030 case MOD_LOAD:
1031 ngp_zone = uma_zcreate("ng_pipe", max(sizeof(struct ngp_hdr),
1032 sizeof (struct ngp_fifo)), NULL, NULL, NULL, NULL,
1033 UMA_ALIGN_PTR, 0);
1034 if (ngp_zone == NULL)
1035 panic("ng_pipe: couldn't allocate descriptor zone");
1036
1037 mtx_init(&ng_pipe_giant, "ng_pipe_giant", NULL, MTX_DEF);
1038 LIST_INIT(&active_head);
1039 callout_init(&polling_timer, CALLOUT_MPSAFE);
1040 callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
1041 break;
1042 case MOD_UNLOAD:
1043 callout_drain(&polling_timer);
1044 uma_zdestroy(ngp_zone);
1045 mtx_destroy(&ng_pipe_giant);
1046 break;
1047 default:
1048 error = EOPNOTSUPP;
1049 break;
1050 }
1051
1052 return (error);
1053}
54#include <netinet/in.h>
55#include <netinet/in_systm.h>
56#include <netinet/ip.h>
57
58#include <netgraph/ng_message.h>
59#include <netgraph/netgraph.h>
60#include <netgraph/ng_parse.h>
61#include <netgraph/ng_pipe.h>
62
63static MALLOC_DEFINE(M_NG_PIPE, "ng_pipe", "ng_pipe");
64
65struct mtx ng_pipe_giant;
66
67/* Packet header struct */
68struct ngp_hdr {
69 TAILQ_ENTRY(ngp_hdr) ngp_link; /* next pkt in queue */
70 struct timeval when; /* this packet's due time */
71 struct mbuf *m; /* ptr to the packet data */
72};
73TAILQ_HEAD(p_head, ngp_hdr);
74
75/* FIFO queue struct */
76struct ngp_fifo {
77 TAILQ_ENTRY(ngp_fifo) fifo_le; /* list of active queues only */
78 struct p_head packet_head; /* FIFO queue head */
79 u_int32_t hash; /* flow signature */
80 struct timeval vtime; /* virtual time, for WFQ */
81 u_int32_t rr_deficit; /* for DRR */
82 u_int32_t packets; /* # of packets in this queue */
83};
84
85/* Per hook info */
86struct hookinfo {
87 hook_p hook;
88 int noqueue; /* bypass any processing */
89 TAILQ_HEAD(, ngp_fifo) fifo_head; /* FIFO queues */
90 TAILQ_HEAD(, ngp_hdr) qout_head; /* delay queue head */
91 LIST_ENTRY(hookinfo) active_le; /* active hooks */
92 struct timeval qin_utime;
93 struct ng_pipe_hookcfg cfg;
94 struct ng_pipe_hookrun run;
95 struct ng_pipe_hookstat stats;
96 uint64_t *ber_p; /* loss_p(BER,psize) map */
97};
98
99/* Per node info */
100struct node_priv {
101 u_int64_t delay;
102 u_int32_t overhead;
103 u_int32_t header_offset;
104 struct hookinfo lower;
105 struct hookinfo upper;
106};
107typedef struct node_priv *priv_p;
108
109/* Macro for calculating the virtual time for packet dequeueing in WFQ */
110#define FIFO_VTIME_SORT(plen) \
111 if (hinfo->cfg.wfq && hinfo->cfg.bandwidth) { \
112 ngp_f->vtime.tv_usec = now->tv_usec + ((uint64_t) (plen) \
113 + priv->overhead ) * hinfo->run.fifo_queues * \
114 8000000 / hinfo->cfg.bandwidth; \
115 ngp_f->vtime.tv_sec = now->tv_sec + \
116 ngp_f->vtime.tv_usec / 1000000; \
117 ngp_f->vtime.tv_usec = ngp_f->vtime.tv_usec % 1000000; \
118 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le) \
119 if (ngp_f1->vtime.tv_sec > ngp_f->vtime.tv_sec || \
120 (ngp_f1->vtime.tv_sec == ngp_f->vtime.tv_sec && \
121 ngp_f1->vtime.tv_usec > ngp_f->vtime.tv_usec)) \
122 break; \
123 if (ngp_f1 == NULL) \
124 TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \
125 else \
126 TAILQ_INSERT_BEFORE(ngp_f1, ngp_f, fifo_le); \
127 } else \
128 TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \
129
130
131static void parse_cfg(struct ng_pipe_hookcfg *, struct ng_pipe_hookcfg *,
132 struct hookinfo *, priv_p);
133static void pipe_dequeue(struct hookinfo *, struct timeval *);
134static void pipe_scheduler(void *);
135static void pipe_poll(void);
136static int ngp_modevent(module_t, int, void *);
137
138/* linked list of active "pipe" hooks */
139static LIST_HEAD(, hookinfo) active_head;
140static int active_gen_id = 0;
141
142/* timeout handle for pipe_scheduler */
143static struct callout polling_timer;
144
145/* zone for storing ngp_hdr-s */
146static uma_zone_t ngp_zone;
147
148/* Netgraph methods */
149static ng_constructor_t ngp_constructor;
150static ng_rcvmsg_t ngp_rcvmsg;
151static ng_shutdown_t ngp_shutdown;
152static ng_newhook_t ngp_newhook;
153static ng_rcvdata_t ngp_rcvdata;
154static ng_disconnect_t ngp_disconnect;
155
156/* Parse type for struct ng_pipe_hookstat */
157static const struct ng_parse_struct_field
158 ng_pipe_hookstat_type_fields[] = NG_PIPE_HOOKSTAT_INFO;
159static const struct ng_parse_type ng_pipe_hookstat_type = {
160 &ng_parse_struct_type,
161 &ng_pipe_hookstat_type_fields
162};
163
164/* Parse type for struct ng_pipe_stats */
165static const struct ng_parse_struct_field ng_pipe_stats_type_fields[] =
166 NG_PIPE_STATS_INFO(&ng_pipe_hookstat_type);
167static const struct ng_parse_type ng_pipe_stats_type = {
168 &ng_parse_struct_type,
169 &ng_pipe_stats_type_fields
170};
171
172/* Parse type for struct ng_pipe_hookrun */
173static const struct ng_parse_struct_field
174 ng_pipe_hookrun_type_fields[] = NG_PIPE_HOOKRUN_INFO;
175static const struct ng_parse_type ng_pipe_hookrun_type = {
176 &ng_parse_struct_type,
177 &ng_pipe_hookrun_type_fields
178};
179
180/* Parse type for struct ng_pipe_run */
181static const struct ng_parse_struct_field
182 ng_pipe_run_type_fields[] = NG_PIPE_RUN_INFO(&ng_pipe_hookrun_type);
183static const struct ng_parse_type ng_pipe_run_type = {
184 &ng_parse_struct_type,
185 &ng_pipe_run_type_fields
186};
187
188/* Parse type for struct ng_pipe_hookcfg */
189static const struct ng_parse_struct_field
190 ng_pipe_hookcfg_type_fields[] = NG_PIPE_HOOKCFG_INFO;
191static const struct ng_parse_type ng_pipe_hookcfg_type = {
192 &ng_parse_struct_type,
193 &ng_pipe_hookcfg_type_fields
194};
195
196/* Parse type for struct ng_pipe_cfg */
197static const struct ng_parse_struct_field
198 ng_pipe_cfg_type_fields[] = NG_PIPE_CFG_INFO(&ng_pipe_hookcfg_type);
199static const struct ng_parse_type ng_pipe_cfg_type = {
200 &ng_parse_struct_type,
201 &ng_pipe_cfg_type_fields
202};
203
204/* List of commands and how to convert arguments to/from ASCII */
205static const struct ng_cmdlist ngp_cmds[] = {
206 {
207 .cookie = NGM_PIPE_COOKIE,
208 .cmd = NGM_PIPE_GET_STATS,
209 .name = "getstats",
210 .respType = &ng_pipe_stats_type
211 },
212 {
213 .cookie = NGM_PIPE_COOKIE,
214 .cmd = NGM_PIPE_CLR_STATS,
215 .name = "clrstats"
216 },
217 {
218 .cookie = NGM_PIPE_COOKIE,
219 .cmd = NGM_PIPE_GETCLR_STATS,
220 .name = "getclrstats",
221 .respType = &ng_pipe_stats_type
222 },
223 {
224 .cookie = NGM_PIPE_COOKIE,
225 .cmd = NGM_PIPE_GET_RUN,
226 .name = "getrun",
227 .respType = &ng_pipe_run_type
228 },
229 {
230 .cookie = NGM_PIPE_COOKIE,
231 .cmd = NGM_PIPE_GET_CFG,
232 .name = "getcfg",
233 .respType = &ng_pipe_cfg_type
234 },
235 {
236 .cookie = NGM_PIPE_COOKIE,
237 .cmd = NGM_PIPE_SET_CFG,
238 .name = "setcfg",
239 .mesgType = &ng_pipe_cfg_type,
240 },
241 { 0 }
242};
243
244/* Netgraph type descriptor */
245static struct ng_type ng_pipe_typestruct = {
246 .version = NG_ABI_VERSION,
247 .name = NG_PIPE_NODE_TYPE,
248 .mod_event = ngp_modevent,
249 .constructor = ngp_constructor,
250 .shutdown = ngp_shutdown,
251 .rcvmsg = ngp_rcvmsg,
252 .newhook = ngp_newhook,
253 .rcvdata = ngp_rcvdata,
254 .disconnect = ngp_disconnect,
255 .cmdlist = ngp_cmds
256};
257NETGRAPH_INIT(pipe, &ng_pipe_typestruct);
258
259/* Node constructor */
260static int
261ngp_constructor(node_p node)
262{
263 priv_p priv;
264
265 priv = malloc(sizeof(*priv), M_NG_PIPE, M_ZERO | M_NOWAIT);
266 if (priv == NULL)
267 return (ENOMEM);
268 NG_NODE_SET_PRIVATE(node, priv);
269
270 return (0);
271}
272
273/* Add a hook */
274static int
275ngp_newhook(node_p node, hook_p hook, const char *name)
276{
277 const priv_p priv = NG_NODE_PRIVATE(node);
278 struct hookinfo *hinfo;
279
280 if (strcmp(name, NG_PIPE_HOOK_UPPER) == 0) {
281 bzero(&priv->upper, sizeof(priv->upper));
282 priv->upper.hook = hook;
283 NG_HOOK_SET_PRIVATE(hook, &priv->upper);
284 } else if (strcmp(name, NG_PIPE_HOOK_LOWER) == 0) {
285 bzero(&priv->lower, sizeof(priv->lower));
286 priv->lower.hook = hook;
287 NG_HOOK_SET_PRIVATE(hook, &priv->lower);
288 } else
289 return (EINVAL);
290
291 /* Load non-zero initial cfg values */
292 hinfo = NG_HOOK_PRIVATE(hook);
293 hinfo->cfg.qin_size_limit = 50;
294 hinfo->cfg.fifo = 1;
295 hinfo->cfg.droptail = 1;
296 TAILQ_INIT(&hinfo->fifo_head);
297 TAILQ_INIT(&hinfo->qout_head);
298 return (0);
299}
300
301/* Receive a control message */
302static int
303ngp_rcvmsg(node_p node, item_p item, hook_p lasthook)
304{
305 const priv_p priv = NG_NODE_PRIVATE(node);
306 struct ng_mesg *resp = NULL;
307 struct ng_mesg *msg;
308 struct ng_pipe_stats *stats;
309 struct ng_pipe_run *run;
310 struct ng_pipe_cfg *cfg;
311 int error = 0;
312
313 mtx_lock(&ng_pipe_giant);
314
315 NGI_GET_MSG(item, msg);
316 switch (msg->header.typecookie) {
317 case NGM_PIPE_COOKIE:
318 switch (msg->header.cmd) {
319 case NGM_PIPE_GET_STATS:
320 case NGM_PIPE_CLR_STATS:
321 case NGM_PIPE_GETCLR_STATS:
322 if (msg->header.cmd != NGM_PIPE_CLR_STATS) {
323 NG_MKRESPONSE(resp, msg,
324 sizeof(*stats), M_NOWAIT);
325 if (resp == NULL) {
326 error = ENOMEM;
327 break;
328 }
329 stats = (struct ng_pipe_stats *)resp->data;
330 bcopy(&priv->upper.stats, &stats->downstream,
331 sizeof(stats->downstream));
332 bcopy(&priv->lower.stats, &stats->upstream,
333 sizeof(stats->upstream));
334 }
335 if (msg->header.cmd != NGM_PIPE_GET_STATS) {
336 bzero(&priv->upper.stats,
337 sizeof(priv->upper.stats));
338 bzero(&priv->lower.stats,
339 sizeof(priv->lower.stats));
340 }
341 break;
342 case NGM_PIPE_GET_RUN:
343 NG_MKRESPONSE(resp, msg, sizeof(*run), M_NOWAIT);
344 if (resp == NULL) {
345 error = ENOMEM;
346 break;
347 }
348 run = (struct ng_pipe_run *)resp->data;
349 bcopy(&priv->upper.run, &run->downstream,
350 sizeof(run->downstream));
351 bcopy(&priv->lower.run, &run->upstream,
352 sizeof(run->upstream));
353 break;
354 case NGM_PIPE_GET_CFG:
355 NG_MKRESPONSE(resp, msg, sizeof(*cfg), M_NOWAIT);
356 if (resp == NULL) {
357 error = ENOMEM;
358 break;
359 }
360 cfg = (struct ng_pipe_cfg *)resp->data;
361 bcopy(&priv->upper.cfg, &cfg->downstream,
362 sizeof(cfg->downstream));
363 bcopy(&priv->lower.cfg, &cfg->upstream,
364 sizeof(cfg->upstream));
365 cfg->delay = priv->delay;
366 cfg->overhead = priv->overhead;
367 cfg->header_offset = priv->header_offset;
368 if (cfg->upstream.bandwidth ==
369 cfg->downstream.bandwidth) {
370 cfg->bandwidth = cfg->upstream.bandwidth;
371 cfg->upstream.bandwidth = 0;
372 cfg->downstream.bandwidth = 0;
373 } else
374 cfg->bandwidth = 0;
375 break;
376 case NGM_PIPE_SET_CFG:
377 cfg = (struct ng_pipe_cfg *)msg->data;
378 if (msg->header.arglen != sizeof(*cfg)) {
379 error = EINVAL;
380 break;
381 }
382
383 if (cfg->delay == -1)
384 priv->delay = 0;
385 else if (cfg->delay > 0 && cfg->delay < 10000000)
386 priv->delay = cfg->delay;
387
388 if (cfg->bandwidth == -1) {
389 priv->upper.cfg.bandwidth = 0;
390 priv->lower.cfg.bandwidth = 0;
391 priv->overhead = 0;
392 } else if (cfg->bandwidth >= 100 &&
393 cfg->bandwidth <= 1000000000) {
394 priv->upper.cfg.bandwidth = cfg->bandwidth;
395 priv->lower.cfg.bandwidth = cfg->bandwidth;
396 if (cfg->bandwidth >= 10000000)
397 priv->overhead = 8+4+12; /* Ethernet */
398 else
399 priv->overhead = 10; /* HDLC */
400 }
401
402 if (cfg->overhead == -1)
403 priv->overhead = 0;
404 else if (cfg->overhead > 0 && cfg->overhead < 256)
405 priv->overhead = cfg->overhead;
406
407 if (cfg->header_offset == -1)
408 priv->header_offset = 0;
409 else if (cfg->header_offset > 0 &&
410 cfg->header_offset < 64)
411 priv->header_offset = cfg->header_offset;
412
413 parse_cfg(&priv->upper.cfg, &cfg->downstream,
414 &priv->upper, priv);
415 parse_cfg(&priv->lower.cfg, &cfg->upstream,
416 &priv->lower, priv);
417 break;
418 default:
419 error = EINVAL;
420 break;
421 }
422 break;
423 default:
424 error = EINVAL;
425 break;
426 }
427 NG_RESPOND_MSG(error, node, item, resp);
428 NG_FREE_MSG(msg);
429
430 mtx_unlock(&ng_pipe_giant);
431
432 return (error);
433}
434
435static void
436parse_cfg(struct ng_pipe_hookcfg *current, struct ng_pipe_hookcfg *new,
437 struct hookinfo *hinfo, priv_p priv)
438{
439
440 if (new->ber == -1) {
441 current->ber = 0;
442 if (hinfo->ber_p) {
443 free(hinfo->ber_p, M_NG_PIPE);
444 hinfo->ber_p = NULL;
445 }
446 } else if (new->ber >= 1 && new->ber <= 1000000000000) {
447 static const uint64_t one = 0x1000000000000; /* = 2^48 */
448 uint64_t p0, p;
449 uint32_t fsize, i;
450
451 if (hinfo->ber_p == NULL)
452 hinfo->ber_p = malloc(\
453 (MAX_FSIZE + MAX_OHSIZE)*sizeof(uint64_t), \
454 M_NG_PIPE, M_NOWAIT);
455 current->ber = new->ber;
456
457 /*
458 * For given BER and each frame size N (in bytes) calculate
459 * the probability P_OK that the frame is clean:
460 *
461 * P_OK(BER,N) = (1 - 1/BER)^(N*8)
462 *
463 * We use a 64-bit fixed-point format with decimal point
464 * positioned between bits 47 and 48.
465 */
466 p0 = one - one / new->ber;
467 p = one;
468 for (fsize = 0; fsize < MAX_FSIZE + MAX_OHSIZE; fsize++) {
469 hinfo->ber_p[fsize] = p;
470 for (i=0; i<8; i++)
471 p = (p*(p0&0xffff)>>48) + \
472 (p*((p0>>16)&0xffff)>>32) + \
473 (p*(p0>>32)>>16);
474 }
475 }
476
477 if (new->qin_size_limit == -1)
478 current->qin_size_limit = 0;
479 else if (new->qin_size_limit >= 5)
480 current->qin_size_limit = new->qin_size_limit;
481
482 if (new->qout_size_limit == -1)
483 current->qout_size_limit = 0;
484 else if (new->qout_size_limit >= 5)
485 current->qout_size_limit = new->qout_size_limit;
486
487 if (new->duplicate == -1)
488 current->duplicate = 0;
489 else if (new->duplicate > 0 && new->duplicate <= 50)
490 current->duplicate = new->duplicate;
491
492 if (new->fifo) {
493 current->fifo = 1;
494 current->wfq = 0;
495 current->drr = 0;
496 }
497
498 if (new->wfq) {
499 current->fifo = 0;
500 current->wfq = 1;
501 current->drr = 0;
502 }
503
504 if (new->drr) {
505 current->fifo = 0;
506 current->wfq = 0;
507 /* DRR quantum */
508 if (new->drr >= 32)
509 current->drr = new->drr;
510 else
511 current->drr = 2048; /* default quantum */
512 }
513
514 if (new->droptail) {
515 current->droptail = 1;
516 current->drophead = 0;
517 }
518
519 if (new->drophead) {
520 current->droptail = 0;
521 current->drophead = 1;
522 }
523
524 if (new->bandwidth == -1) {
525 current->bandwidth = 0;
526 current->fifo = 1;
527 current->wfq = 0;
528 current->drr = 0;
529 } else if (new->bandwidth >= 100 && new->bandwidth <= 1000000000)
530 current->bandwidth = new->bandwidth;
531
532 if (current->bandwidth | priv->delay |
533 current->duplicate | current->ber)
534 hinfo->noqueue = 0;
535 else
536 hinfo->noqueue = 1;
537}
538
539/*
540 * Compute a hash signature for a packet. This function suffers from the
541 * NIH sindrome, so probably it would be wise to look around what other
542 * folks have found out to be a good and efficient IP hash function...
543 */
544static int
545ip_hash(struct mbuf *m, int offset)
546{
547 u_int64_t i;
548 struct ip *ip = (struct ip *)(mtod(m, u_char *) + offset);
549
550 if (m->m_len < sizeof(struct ip) + offset ||
551 ip->ip_v != 4 || ip->ip_hl << 2 != sizeof(struct ip))
552 return 0;
553
554 i = ((u_int64_t) ip->ip_src.s_addr ^
555 ((u_int64_t) ip->ip_src.s_addr << 13) ^
556 ((u_int64_t) ip->ip_dst.s_addr << 7) ^
557 ((u_int64_t) ip->ip_dst.s_addr << 19));
558 return (i ^ (i >> 32));
559}
560
561/*
562 * Receive data on a hook - both in upstream and downstream direction.
563 * We put the frame on the inbound queue, and try to initiate dequeuing
564 * sequence immediately. If inbound queue is full, discard one frame
565 * depending on dropping policy (from the head or from the tail of the
566 * queue).
567 */
568static int
569ngp_rcvdata(hook_p hook, item_p item)
570{
571 struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
572 const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
573 struct timeval uuptime;
574 struct timeval *now = &uuptime;
575 struct ngp_fifo *ngp_f = NULL, *ngp_f1;
576 struct ngp_hdr *ngp_h = NULL;
577 struct mbuf *m;
578 int hash;
579 int error = 0;
580
581 if (hinfo->noqueue) {
582 struct hookinfo *dest;
583 if (hinfo == &priv->lower)
584 dest = &priv->upper;
585 else
586 dest = &priv->lower;
587 NG_FWD_ITEM_HOOK(error, item, dest->hook);
588 return error;
589 }
590
591 mtx_lock(&ng_pipe_giant);
592 microuptime(now);
593
594 /*
595 * Attach us to the list of active ng_pipes if this was an empty
596 * one before, and also update the queue service deadline time.
597 */
598 if (hinfo->run.qin_frames == 0) {
599 struct timeval *when = &hinfo->qin_utime;
600 if (when->tv_sec < now->tv_sec || (when->tv_sec == now->tv_sec
601 && when->tv_usec < now->tv_usec)) {
602 when->tv_sec = now->tv_sec;
603 when->tv_usec = now->tv_usec;
604 }
605 if (hinfo->run.qout_frames == 0)
606 LIST_INSERT_HEAD(&active_head, hinfo, active_le);
607 }
608
609 /* Populate the packet header */
610 ngp_h = uma_zalloc(ngp_zone, M_NOWAIT);
611 KASSERT((ngp_h != NULL), ("ngp_h zalloc failed (1)"));
612 NGI_GET_M(item, m);
613 KASSERT(m != NULL, ("NGI_GET_M failed"));
614 ngp_h->m = m;
615 NG_FREE_ITEM(item);
616
617 if (hinfo->cfg.fifo)
618 hash = 0; /* all packets go into a single FIFO queue */
619 else
620 hash = ip_hash(m, priv->header_offset);
621
622 /* Find the appropriate FIFO queue for the packet and enqueue it*/
623 TAILQ_FOREACH(ngp_f, &hinfo->fifo_head, fifo_le)
624 if (hash == ngp_f->hash)
625 break;
626 if (ngp_f == NULL) {
627 ngp_f = uma_zalloc(ngp_zone, M_NOWAIT);
628 KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (2)"));
629 TAILQ_INIT(&ngp_f->packet_head);
630 ngp_f->hash = hash;
631 ngp_f->packets = 1;
632 ngp_f->rr_deficit = hinfo->cfg.drr; /* DRR quantum */
633 hinfo->run.fifo_queues++;
634 TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
635 FIFO_VTIME_SORT(m->m_pkthdr.len);
636 } else {
637 TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
638 ngp_f->packets++;
639 }
640 hinfo->run.qin_frames++;
641 hinfo->run.qin_octets += m->m_pkthdr.len;
642
643 /* Discard a frame if inbound queue limit has been reached */
644 if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) {
645 struct mbuf *m1;
646 int longest = 0;
647
648 /* Find the longest queue */
649 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)
650 if (ngp_f1->packets > longest) {
651 longest = ngp_f1->packets;
652 ngp_f = ngp_f1;
653 }
654
655 /* Drop a frame from the queue head/tail, depending on cfg */
656 if (hinfo->cfg.drophead)
657 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
658 else
659 ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head);
660 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
661 m1 = ngp_h->m;
662 uma_zfree(ngp_zone, ngp_h);
663 hinfo->run.qin_octets -= m1->m_pkthdr.len;
664 hinfo->stats.in_disc_octets += m1->m_pkthdr.len;
665 m_freem(m1);
666 if (--(ngp_f->packets) == 0) {
667 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
668 uma_zfree(ngp_zone, ngp_f);
669 hinfo->run.fifo_queues--;
670 }
671 hinfo->run.qin_frames--;
672 hinfo->stats.in_disc_frames++;
673 } else if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) {
674 struct mbuf *m1;
675 int longest = 0;
676
677 /* Find the longest queue */
678 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)
679 if (ngp_f1->packets > longest) {
680 longest = ngp_f1->packets;
681 ngp_f = ngp_f1;
682 }
683
684 /* Drop a frame from the queue head/tail, depending on cfg */
685 if (hinfo->cfg.drophead)
686 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
687 else
688 ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head);
689 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
690 m1 = ngp_h->m;
691 uma_zfree(ngp_zone, ngp_h);
692 hinfo->run.qin_octets -= m1->m_pkthdr.len;
693 hinfo->stats.in_disc_octets += m1->m_pkthdr.len;
694 m_freem(m1);
695 if (--(ngp_f->packets) == 0) {
696 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
697 uma_zfree(ngp_zone, ngp_f);
698 hinfo->run.fifo_queues--;
699 }
700 hinfo->run.qin_frames--;
701 hinfo->stats.in_disc_frames++;
702 }
703
704 /*
705 * Try to start the dequeuing process immediately. We must
706 * hold the ng_pipe_giant lock here and pipe_dequeue() will
707 * release it
708 */
709 pipe_dequeue(hinfo, now);
710
711 return (0);
712}
713
714
715/*
716 * Dequeueing sequence - we basically do the following:
717 * 1) Try to extract the frame from the inbound (bandwidth) queue;
718 * 2) In accordance to BER specified, discard the frame randomly;
719 * 3) If the frame survives BER, prepend it with delay info and move it
720 * to outbound (delay) queue;
721 * 4) Loop to 2) until bandwidth quota for this timeslice is reached, or
722 * inbound queue is flushed completely;
723 * 5) Extract the first frame from the outbound queue, if it's time has
724 * come. Queue the frame for transmission on the outbound hook;
725 * 6) Loop to 5) until outbound queue is flushed completely, or the next
726 * frame in the queue is not scheduled to be dequeued yet;
727 * 7) Transimit all frames queued in 5)
728 *
729 * Note: the caller must hold the ng_pipe_giant lock; this function
730 * returns with the lock released.
731 */
732static void
733pipe_dequeue(struct hookinfo *hinfo, struct timeval *now) {
734 static uint64_t rand, oldrand;
735 const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hinfo->hook));
736 struct hookinfo *dest;
737 struct ngp_fifo *ngp_f, *ngp_f1;
738 struct ngp_hdr *ngp_h;
739 struct timeval *when;
740 struct mbuf *q_head = NULL;
741 struct mbuf *q_tail = NULL;
742 struct mbuf *m;
743 int error = 0;
744
745 /* Which one is the destination hook? */
746 if (hinfo == &priv->lower)
747 dest = &priv->upper;
748 else
749 dest = &priv->lower;
750
751 /* Bandwidth queue processing */
752 while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
753 when = &hinfo->qin_utime;
754 if (when->tv_sec > now->tv_sec || (when->tv_sec == now->tv_sec
755 && when->tv_usec > now->tv_usec))
756 break;
757
758 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
759 m = ngp_h->m;
760
761 /* Deficit Round Robin (DRR) processing */
762 if (hinfo->cfg.drr) {
763 if (ngp_f->rr_deficit >= m->m_pkthdr.len) {
764 ngp_f->rr_deficit -= m->m_pkthdr.len;
765 } else {
766 ngp_f->rr_deficit += hinfo->cfg.drr;
767 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
768 TAILQ_INSERT_TAIL(&hinfo->fifo_head,
769 ngp_f, fifo_le);
770 continue;
771 }
772 }
773
774 /*
775 * Either create a duplicate and pass it on, or dequeue
776 * the original packet...
777 */
778 if (hinfo->cfg.duplicate &&
779 random() % 100 <= hinfo->cfg.duplicate) {
780 ngp_h = uma_zalloc(ngp_zone, M_NOWAIT);
781 KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (3)"));
782 ngp_h->m = m_dup(m, M_NOWAIT);
783 KASSERT(ngp_h->m != NULL, ("m_dup failed"));
784 } else {
785 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
786 hinfo->run.qin_frames--;
787 hinfo->run.qin_octets -= m->m_pkthdr.len;
788 ngp_f->packets--;
789 }
790
791 /* Calculate the serialization delay */
792 if (hinfo->cfg.bandwidth) {
793 hinfo->qin_utime.tv_usec += ((uint64_t) m->m_pkthdr.len
794 + priv->overhead ) *
795 8000000 / hinfo->cfg.bandwidth;
796 hinfo->qin_utime.tv_sec +=
797 hinfo->qin_utime.tv_usec / 1000000;
798 hinfo->qin_utime.tv_usec =
799 hinfo->qin_utime.tv_usec % 1000000;
800 }
801 when = &ngp_h->when;
802 when->tv_sec = hinfo->qin_utime.tv_sec;
803 when->tv_usec = hinfo->qin_utime.tv_usec;
804
805 /* Sort / rearrange inbound queues */
806 if (ngp_f->packets) {
807 if (hinfo->cfg.wfq) {
808 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
809 FIFO_VTIME_SORT(TAILQ_FIRST(
810 &ngp_f->packet_head)->m->m_pkthdr.len)
811 }
812 } else {
813 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
814 uma_zfree(ngp_zone, ngp_f);
815 hinfo->run.fifo_queues--;
816 }
817
818 /* Randomly discard the frame, according to BER setting */
819 if (hinfo->cfg.ber &&
820 ((oldrand = rand) ^ (rand = random())<<17) >=
821 hinfo->ber_p[priv->overhead + m->m_pkthdr.len] ) {
822 hinfo->stats.out_disc_frames++;
823 hinfo->stats.out_disc_octets += m->m_pkthdr.len;
824 uma_zfree(ngp_zone, ngp_h);
825 m_freem(m);
826 continue;
827 }
828
829 /* Discard frame if outbound queue size limit exceeded */
830 if (hinfo->cfg.qout_size_limit &&
831 hinfo->run.qout_frames>=hinfo->cfg.qout_size_limit) {
832 hinfo->stats.out_disc_frames++;
833 hinfo->stats.out_disc_octets += m->m_pkthdr.len;
834 uma_zfree(ngp_zone, ngp_h);
835 m_freem(m);
836 continue;
837 }
838
839 /* Calculate the propagation delay */
840 when->tv_usec += priv->delay;
841 when->tv_sec += when->tv_usec / 1000000;
842 when->tv_usec = when->tv_usec % 1000000;
843
844 /* Put the frame into the delay queue */
845 TAILQ_INSERT_TAIL(&hinfo->qout_head, ngp_h, ngp_link);
846 hinfo->run.qout_frames++;
847 hinfo->run.qout_octets += m->m_pkthdr.len;
848 }
849
850 /* Delay queue processing */
851 while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
852 struct mbuf *m = ngp_h->m;
853
854 when = &ngp_h->when;
855 if (when->tv_sec > now->tv_sec ||
856 (when->tv_sec == now->tv_sec &&
857 when->tv_usec > now->tv_usec))
858 break;
859
860 /* Update outbound queue stats */
861 hinfo->stats.fwd_frames++;
862 hinfo->stats.fwd_octets += m->m_pkthdr.len;
863 hinfo->run.qout_frames--;
864 hinfo->run.qout_octets -= m->m_pkthdr.len;
865
866 /* Dequeue the packet from qout */
867 TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
868 uma_zfree(ngp_zone, ngp_h);
869
870 /* Enqueue locally for sending downstream */
871 if (q_head == NULL)
872 q_head = m;
873 if (q_tail)
874 q_tail->m_nextpkt = m;
875 q_tail = m;
876 m->m_nextpkt = NULL;
877 }
878
879 /* If both queues are empty detach us from the list of active queues */
880 if (hinfo->run.qin_frames + hinfo->run.qout_frames == 0) {
881 LIST_REMOVE(hinfo, active_le);
882 active_gen_id++;
883 }
884
885 mtx_unlock(&ng_pipe_giant);
886
887 while ((m = q_head) != NULL) {
888 q_head = m->m_nextpkt;
889 m->m_nextpkt = NULL;
890 NG_SEND_DATA(error, dest->hook, m, meta);
891 }
892}
893
894
895/*
896 * This routine is called on every clock tick. We poll all nodes/hooks
897 * for queued frames by calling pipe_dequeue().
898 */
899static void
900pipe_scheduler(void *arg)
901{
902 pipe_poll();
903
904 /* Reschedule */
905 callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
906}
907
908
909/*
910 * Traverse the list of all active hooks and attempt to dequeue
911 * some packets. Hooks with empty queues are not traversed since
912 * they are not linked into this list.
913 */
914static void
915pipe_poll(void)
916{
917 struct hookinfo *hinfo;
918 struct timeval now;
919 int old_gen_id = active_gen_id;
920
921 mtx_lock(&ng_pipe_giant);
922 microuptime(&now);
923 LIST_FOREACH(hinfo, &active_head, active_le) {
924 CURVNET_SET(NG_HOOK_NODE(hinfo->hook)->nd_vnet);
925 pipe_dequeue(hinfo, &now);
926 CURVNET_RESTORE();
927 mtx_lock(&ng_pipe_giant);
928 if (old_gen_id != active_gen_id) {
929 /* the list was updated; restart traversing */
930 hinfo = LIST_FIRST(&active_head);
931 if (hinfo == NULL)
932 break;
933 old_gen_id = active_gen_id;
934 continue;
935 }
936 }
937 mtx_unlock(&ng_pipe_giant);
938}
939
940
941/*
942 * Shutdown processing
943 *
944 * This is tricky. If we have both a lower and upper hook, then we
945 * probably want to extricate ourselves and leave the two peers
946 * still linked to each other. Otherwise we should just shut down as
947 * a normal node would.
948 */
949static int
950ngp_shutdown(node_p node)
951{
952 const priv_p priv = NG_NODE_PRIVATE(node);
953
954 if (priv->lower.hook && priv->upper.hook)
955 ng_bypass(priv->lower.hook, priv->upper.hook);
956 else {
957 if (priv->upper.hook != NULL)
958 ng_rmhook_self(priv->upper.hook);
959 if (priv->lower.hook != NULL)
960 ng_rmhook_self(priv->lower.hook);
961 }
962 NG_NODE_UNREF(node);
963 free(priv, M_NG_PIPE);
964 return (0);
965}
966
967
968/*
969 * Hook disconnection
970 */
971static int
972ngp_disconnect(hook_p hook)
973{
974 struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
975 struct ngp_fifo *ngp_f;
976 struct ngp_hdr *ngp_h;
977 int removed = 0;
978
979 mtx_lock(&ng_pipe_giant);
980
981 KASSERT(hinfo != NULL, ("%s: null info", __FUNCTION__));
982 hinfo->hook = NULL;
983
984 /* Flush all fifo queues associated with the hook */
985 while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
986 while ((ngp_h = TAILQ_FIRST(&ngp_f->packet_head))) {
987 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
988 m_freem(ngp_h->m);
989 uma_zfree(ngp_zone, ngp_h);
990 removed++;
991 }
992 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
993 uma_zfree(ngp_zone, ngp_f);
994 }
995
996 /* Flush the delay queue */
997 while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
998 TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
999 m_freem(ngp_h->m);
1000 uma_zfree(ngp_zone, ngp_h);
1001 removed++;
1002 }
1003
1004 /*
1005 * Both queues should be empty by now, so detach us from
1006 * the list of active queues
1007 */
1008 if (removed) {
1009 LIST_REMOVE(hinfo, active_le);
1010 active_gen_id++;
1011 }
1012 if (hinfo->run.qin_frames + hinfo->run.qout_frames != removed)
1013 printf("Mismatch: queued=%d but removed=%d !?!",
1014 hinfo->run.qin_frames + hinfo->run.qout_frames, removed);
1015
1016 /* Release the packet loss probability table (BER) */
1017 if (hinfo->ber_p)
1018 free(hinfo->ber_p, M_NG_PIPE);
1019
1020 mtx_unlock(&ng_pipe_giant);
1021
1022 return (0);
1023}
1024
1025static int
1026ngp_modevent(module_t mod, int type, void *unused)
1027{
1028 int error = 0;
1029
1030 switch (type) {
1031 case MOD_LOAD:
1032 ngp_zone = uma_zcreate("ng_pipe", max(sizeof(struct ngp_hdr),
1033 sizeof (struct ngp_fifo)), NULL, NULL, NULL, NULL,
1034 UMA_ALIGN_PTR, 0);
1035 if (ngp_zone == NULL)
1036 panic("ng_pipe: couldn't allocate descriptor zone");
1037
1038 mtx_init(&ng_pipe_giant, "ng_pipe_giant", NULL, MTX_DEF);
1039 LIST_INIT(&active_head);
1040 callout_init(&polling_timer, CALLOUT_MPSAFE);
1041 callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
1042 break;
1043 case MOD_UNLOAD:
1044 callout_drain(&polling_timer);
1045 uma_zdestroy(ngp_zone);
1046 mtx_destroy(&ng_pipe_giant);
1047 break;
1048 default:
1049 error = EOPNOTSUPP;
1050 break;
1051 }
1052
1053 return (error);
1054}