Deleted Added
full compact
ip_fw_table.c (306025) ip_fw_table.c (307970)
1/*-
2 * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
3 * Copyright (c) 2014 Yandex LLC
4 * Copyright (c) 2014 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
3 * Copyright (c) 2014 Yandex LLC
4 * Copyright (c) 2014 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/11/sys/netpfil/ipfw/ip_fw_table.c 306025 2016-09-20 13:23:08Z ae $");
29__FBSDID("$FreeBSD: stable/11/sys/netpfil/ipfw/ip_fw_table.c 307970 2016-10-26 17:34:33Z ae $");
30
31/*
32 * Lookup table support for ipfw.
33 *
34 * This file contains handlers for all generic tables' operations:
35 * add/del/flush entries, list/dump tables etc..
36 *
37 * Table data modification is protected by both UH and runtime lock
38 * while reading configuration/data is protected by UH lock.
39 *
40 * Lookup algorithms for all table types are located in ip_fw_table_algo.c
41 */
42
43#include "opt_ipfw.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/malloc.h>
48#include <sys/kernel.h>
49#include <sys/lock.h>
50#include <sys/rwlock.h>
51#include <sys/rmlock.h>
52#include <sys/socket.h>
53#include <sys/socketvar.h>
54#include <sys/queue.h>
55#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
56
57#include <netinet/in.h>
58#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
59#include <netinet/ip_fw.h>
60
61#include <netpfil/ipfw/ip_fw_private.h>
62#include <netpfil/ipfw/ip_fw_table.h>
63
64 /*
65 * Table has the following `type` concepts:
66 *
67 * `no.type` represents lookup key type (addr, ifp, uid, etc..)
68 * vmask represents bitmask of table values which are present at the moment.
69 * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
70 * single-value-for-all approach.
71 */
72struct table_config {
73 struct named_object no;
74 uint8_t tflags; /* type flags */
75 uint8_t locked; /* 1 if locked from changes */
76 uint8_t linked; /* 1 if already linked */
77 uint8_t ochanged; /* used by set swapping */
78 uint8_t vshared; /* 1 if using shared value array */
79 uint8_t spare[3];
80 uint32_t count; /* Number of records */
81 uint32_t limit; /* Max number of records */
82 uint32_t vmask; /* bitmask with supported values */
83 uint32_t ocount; /* used by set swapping */
84 uint64_t gencnt; /* generation count */
85 char tablename[64]; /* table name */
86 struct table_algo *ta; /* Callbacks for given algo */
87 void *astate; /* algorithm state */
88 struct table_info ti_copy; /* data to put to table_info */
89 struct namedobj_instance *vi;
90};
91
92static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
93 struct table_config **tc);
94static struct table_config *find_table(struct namedobj_instance *ni,
95 struct tid_info *ti);
96static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
97 struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
98static void free_table_config(struct namedobj_instance *ni,
99 struct table_config *tc);
100static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
101 char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref);
102static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
103static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
104static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
105 struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
106#define OP_ADD 1
107#define OP_DEL 0
108static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
109 struct sockopt_data *sd);
110static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
111 ipfw_xtable_info *i);
112static int dump_table_tentry(void *e, void *arg);
113static int dump_table_xentry(void *e, void *arg);
114
115static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
116 struct tid_info *b);
117
118static int check_table_name(const char *name);
119static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
120 struct table_config *tc, struct table_info *ti, uint32_t count);
121static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
122
123static struct table_algo *find_table_algo(struct tables_config *tableconf,
124 struct tid_info *ti, char *name);
125
126static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
127static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
128
129#define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash)
130#define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k]))
131
132#define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */
133
134void
135rollback_toperation_state(struct ip_fw_chain *ch, void *object)
136{
137 struct tables_config *tcfg;
138 struct op_state *os;
139
140 tcfg = CHAIN_TO_TCFG(ch);
141 TAILQ_FOREACH(os, &tcfg->state_list, next)
142 os->func(object, os);
143}
144
145void
146add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
147{
148 struct tables_config *tcfg;
149
150 tcfg = CHAIN_TO_TCFG(ch);
151 TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
152}
153
154void
155del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
156{
157 struct tables_config *tcfg;
158
159 tcfg = CHAIN_TO_TCFG(ch);
160 TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
161}
162
163void
164tc_ref(struct table_config *tc)
165{
166
167 tc->no.refcnt++;
168}
169
170void
171tc_unref(struct table_config *tc)
172{
173
174 tc->no.refcnt--;
175}
176
177static struct table_value *
178get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
179{
180 struct table_value *pval;
181
182 pval = (struct table_value *)ch->valuestate;
183
184 return (&pval[kidx]);
185}
186
187
188/*
189 * Checks if we're able to insert/update entry @tei into table
190 * w.r.t @tc limits.
191 * May alter @tei to indicate insertion error / insert
192 * options.
193 *
194 * Returns 0 if operation can be performed/
195 */
196static int
197check_table_limit(struct table_config *tc, struct tentry_info *tei)
198{
199
200 if (tc->limit == 0 || tc->count < tc->limit)
201 return (0);
202
203 if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
204 /* Notify userland on error cause */
205 tei->flags |= TEI_FLAGS_LIMIT;
206 return (EFBIG);
207 }
208
209 /*
210 * We have UPDATE flag set.
211 * Permit updating record (if found),
212 * but restrict adding new one since we've
213 * already hit the limit.
214 */
215 tei->flags |= TEI_FLAGS_DONTADD;
216
217 return (0);
218}
219
220/*
221 * Convert algorithm callback return code into
222 * one of pre-defined states known by userland.
223 */
224static void
225store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
226{
227 int flag;
228
229 flag = 0;
230
231 switch (error) {
232 case 0:
233 if (op == OP_ADD && num != 0)
234 flag = TEI_FLAGS_ADDED;
235 if (op == OP_DEL)
236 flag = TEI_FLAGS_DELETED;
237 break;
238 case ENOENT:
239 flag = TEI_FLAGS_NOTFOUND;
240 break;
241 case EEXIST:
242 flag = TEI_FLAGS_EXISTS;
243 break;
244 default:
245 flag = TEI_FLAGS_ERROR;
246 }
247
248 tei->flags |= flag;
249}
250
251/*
252 * Creates and references table with default parameters.
253 * Saves table config, algo and allocated kidx info @ptc, @pta and
254 * @pkidx if non-zero.
255 * Used for table auto-creation to support old binaries.
256 *
257 * Returns 0 on success.
258 */
259static int
260create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
261 uint16_t *pkidx)
262{
263 ipfw_xtable_info xi;
264 int error;
265
266 memset(&xi, 0, sizeof(xi));
267 /* Set default value mask for legacy clients */
268 xi.vmask = IPFW_VTYPE_LEGACY;
269
270 error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
271 if (error != 0)
272 return (error);
273
274 return (0);
275}
276
277/*
278 * Find and reference existing table optionally
279 * creating new one.
280 *
281 * Saves found table config into @ptc.
282 * Note function may drop/acquire UH_WLOCK.
283 * Returns 0 if table was found/created and referenced
284 * or non-zero return code.
285 */
286static int
287find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
288 struct tentry_info *tei, uint32_t count, int op,
289 struct table_config **ptc)
290{
291 struct namedobj_instance *ni;
292 struct table_config *tc;
293 uint16_t kidx;
294 int error;
295
296 IPFW_UH_WLOCK_ASSERT(ch);
297
298 ni = CHAIN_TO_NI(ch);
299 tc = NULL;
300 if ((tc = find_table(ni, ti)) != NULL) {
301 /* check table type */
302 if (tc->no.subtype != ti->type)
303 return (EINVAL);
304
305 if (tc->locked != 0)
306 return (EACCES);
307
308 /* Try to exit early on limit hit */
309 if (op == OP_ADD && count == 1 &&
310 check_table_limit(tc, tei) != 0)
311 return (EFBIG);
312
313 /* Reference and return */
314 tc->no.refcnt++;
315 *ptc = tc;
316 return (0);
317 }
318
319 if (op == OP_DEL)
320 return (ESRCH);
321
322 /* Compatibility mode: create new table for old clients */
323 if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
324 return (ESRCH);
325
326 IPFW_UH_WUNLOCK(ch);
327 error = create_table_compat(ch, ti, &kidx);
328 IPFW_UH_WLOCK(ch);
329
330 if (error != 0)
331 return (error);
332
333 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
334 KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx));
335
336 /* OK, now we've got referenced table. */
337 *ptc = tc;
338 return (0);
339}
340
341/*
342 * Rolls back already @added to @tc entries using state array @ta_buf_m.
343 * Assume the following layout:
344 * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
345 * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
346 * for storing deleted state
347 */
348static void
349rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
350 struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
351 uint32_t count, uint32_t added)
352{
353 struct table_algo *ta;
354 struct tentry_info *ptei;
355 caddr_t v, vv;
356 size_t ta_buf_sz;
357 int error, i;
358 uint32_t num;
359
360 IPFW_UH_WLOCK_ASSERT(ch);
361
362 ta = tc->ta;
363 ta_buf_sz = ta->ta_buf_size;
364 v = ta_buf_m;
365 vv = v + count * ta_buf_sz;
366 for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
367 ptei = &tei[i];
368 if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
369
370 /*
371 * We have old value stored by previous
372 * call in @ptei->value. Do add once again
373 * to restore it.
374 */
375 error = ta->add(tc->astate, tinfo, ptei, v, &num);
376 KASSERT(error == 0, ("rollback UPDATE fail"));
377 KASSERT(num == 0, ("rollback UPDATE fail2"));
378 continue;
379 }
380
381 error = ta->prepare_del(ch, ptei, vv);
382 KASSERT(error == 0, ("pre-rollback INSERT failed"));
383 error = ta->del(tc->astate, tinfo, ptei, vv, &num);
384 KASSERT(error == 0, ("rollback INSERT failed"));
385 tc->count -= num;
386 }
387}
388
389/*
390 * Prepares add/del state for all @count entries in @tei.
391 * Uses either stack buffer (@ta_buf) or allocates a new one.
392 * Stores pointer to allocated buffer back to @ta_buf.
393 *
394 * Returns 0 on success.
395 */
396static int
397prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
398 struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
399{
400 caddr_t ta_buf_m, v;
401 size_t ta_buf_sz, sz;
402 struct tentry_info *ptei;
403 int error, i;
404
405 error = 0;
406 ta_buf_sz = ta->ta_buf_size;
407 if (count == 1) {
408 /* Sigle add/delete, use on-stack buffer */
409 memset(*ta_buf, 0, TA_BUF_SZ);
410 ta_buf_m = *ta_buf;
411 } else {
412
413 /*
414 * Multiple adds/deletes, allocate larger buffer
415 *
416 * Note we need 2xcount buffer for add case:
417 * we have hold both ADD state
418 * and DELETE state (this may be needed
419 * if we need to rollback all changes)
420 */
421 sz = count * ta_buf_sz;
422 ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
423 M_WAITOK | M_ZERO);
424 }
425
426 v = ta_buf_m;
427 for (i = 0; i < count; i++, v += ta_buf_sz) {
428 ptei = &tei[i];
429 error = (op == OP_ADD) ?
430 ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
431
432 /*
433 * Some syntax error (incorrect mask, or address, or
434 * anything). Return error regardless of atomicity
435 * settings.
436 */
437 if (error != 0)
438 break;
439 }
440
441 *ta_buf = ta_buf_m;
442 return (error);
443}
444
445/*
446 * Flushes allocated state for each @count entries in @tei.
447 * Frees @ta_buf_m if differs from stack buffer @ta_buf.
448 */
449static void
450flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
451 struct tentry_info *tei, uint32_t count, int rollback,
452 caddr_t ta_buf_m, caddr_t ta_buf)
453{
454 caddr_t v;
455 struct tentry_info *ptei;
456 size_t ta_buf_sz;
457 int i;
458
459 ta_buf_sz = ta->ta_buf_size;
460
461 /* Run cleaning callback anyway */
462 v = ta_buf_m;
463 for (i = 0; i < count; i++, v += ta_buf_sz) {
464 ptei = &tei[i];
465 ta->flush_entry(ch, ptei, v);
466 if (ptei->ptv != NULL) {
467 free(ptei->ptv, M_IPFW);
468 ptei->ptv = NULL;
469 }
470 }
471
472 /* Clean up "deleted" state in case of rollback */
473 if (rollback != 0) {
474 v = ta_buf_m + count * ta_buf_sz;
475 for (i = 0; i < count; i++, v += ta_buf_sz)
476 ta->flush_entry(ch, &tei[i], v);
477 }
478
479 if (ta_buf_m != ta_buf)
480 free(ta_buf_m, M_TEMP);
481}
482
483
484static void
485rollback_add_entry(void *object, struct op_state *_state)
486{
487 struct ip_fw_chain *ch;
488 struct tableop_state *ts;
489
490 ts = (struct tableop_state *)_state;
491
492 if (ts->tc != object && ts->ch != object)
493 return;
494
495 ch = ts->ch;
496
497 IPFW_UH_WLOCK_ASSERT(ch);
498
499 /* Call specifid unlockers */
500 rollback_table_values(ts);
501
502 /* Indicate we've called */
503 ts->modified = 1;
504}
505
506/*
507 * Adds/updates one or more entries in table @ti.
508 *
509 * Function may drop/reacquire UH wlock multiple times due to
510 * items alloc, algorithm callbacks (check_space), value linkage
511 * (new values, value storage realloc), etc..
512 * Other processes like other adds (which may involve storage resize),
513 * table swaps (which changes table data and may change algo type),
514 * table modify (which may change value mask) may be executed
515 * simultaneously so we need to deal with it.
516 *
517 * The following approach was implemented:
518 * we have per-chain linked list, protected with UH lock.
519 * add_table_entry prepares special on-stack structure wthich is passed
520 * to its descendants. Users add this structure to this list before unlock.
521 * After performing needed operations and acquiring UH lock back, each user
522 * checks if structure has changed. If true, it rolls local state back and
523 * returns without error to the caller.
524 * add_table_entry() on its own checks if structure has changed and restarts
525 * its operation from the beginning (goto restart).
526 *
527 * Functions which are modifying fields of interest (currently
528 * resize_shared_value_storage() and swap_tables() )
529 * traverses given list while holding UH lock immediately before
530 * performing their operations calling function provided be list entry
531 * ( currently rollback_add_entry ) which performs rollback for all necessary
532 * state and sets appropriate values in structure indicating rollback
533 * has happened.
534 *
535 * Algo interaction:
536 * Function references @ti first to ensure table won't
537 * disappear or change its type.
538 * After that, prepare_add callback is called for each @tei entry.
539 * Next, we try to add each entry under UH+WHLOCK
540 * using add() callback.
541 * Finally, we free all state by calling flush_entry callback
542 * for each @tei.
543 *
544 * Returns 0 on success.
545 */
546int
547add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
548 struct tentry_info *tei, uint8_t flags, uint32_t count)
549{
550 struct table_config *tc;
551 struct table_algo *ta;
552 uint16_t kidx;
553 int error, first_error, i, rollback;
554 uint32_t num, numadd;
555 struct tentry_info *ptei;
556 struct tableop_state ts;
557 char ta_buf[TA_BUF_SZ];
558 caddr_t ta_buf_m, v;
559
560 memset(&ts, 0, sizeof(ts));
561 ta = NULL;
562 IPFW_UH_WLOCK(ch);
563
564 /*
565 * Find and reference existing table.
566 */
567restart:
568 if (ts.modified != 0) {
569 IPFW_UH_WUNLOCK(ch);
570 flush_batch_buffer(ch, ta, tei, count, rollback,
571 ta_buf_m, ta_buf);
572 memset(&ts, 0, sizeof(ts));
573 ta = NULL;
574 IPFW_UH_WLOCK(ch);
575 }
576
577 error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
578 if (error != 0) {
579 IPFW_UH_WUNLOCK(ch);
580 return (error);
581 }
582 ta = tc->ta;
583
584 /* Fill in tablestate */
585 ts.ch = ch;
586 ts.opstate.func = rollback_add_entry;
587 ts.tc = tc;
588 ts.vshared = tc->vshared;
589 ts.vmask = tc->vmask;
590 ts.ta = ta;
591 ts.tei = tei;
592 ts.count = count;
593 rollback = 0;
594 add_toperation_state(ch, &ts);
595 IPFW_UH_WUNLOCK(ch);
596
597 /* Allocate memory and prepare record(s) */
598 /* Pass stack buffer by default */
599 ta_buf_m = ta_buf;
600 error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
601
602 IPFW_UH_WLOCK(ch);
603 del_toperation_state(ch, &ts);
604 /* Drop reference we've used in first search */
605 tc->no.refcnt--;
606
607 /* Check prepare_batch_buffer() error */
608 if (error != 0)
609 goto cleanup;
610
611 /*
612 * Check if table swap has happened.
613 * (so table algo might be changed).
614 * Restart operation to achieve consistent behavior.
615 */
616 if (ts.modified != 0)
617 goto restart;
618
619 /*
620 * Link all values values to shared/per-table value array.
621 *
622 * May release/reacquire UH_WLOCK.
623 */
624 error = ipfw_link_table_values(ch, &ts);
625 if (error != 0)
626 goto cleanup;
627 if (ts.modified != 0)
628 goto restart;
629
630 /*
631 * Ensure we are able to add all entries without additional
632 * memory allocations. May release/reacquire UH_WLOCK.
633 */
634 kidx = tc->no.kidx;
635 error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
636 if (error != 0)
637 goto cleanup;
638 if (ts.modified != 0)
639 goto restart;
640
641 /* We've got valid table in @tc. Let's try to add data */
642 kidx = tc->no.kidx;
643 ta = tc->ta;
644 numadd = 0;
645 first_error = 0;
646
647 IPFW_WLOCK(ch);
648
649 v = ta_buf_m;
650 for (i = 0; i < count; i++, v += ta->ta_buf_size) {
651 ptei = &tei[i];
652 num = 0;
653 /* check limit before adding */
654 if ((error = check_table_limit(tc, ptei)) == 0) {
655 error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
656 ptei, v, &num);
657 /* Set status flag to inform userland */
658 store_tei_result(ptei, OP_ADD, error, num);
659 }
660 if (error == 0) {
661 /* Update number of records to ease limit checking */
662 tc->count += num;
663 numadd += num;
664 continue;
665 }
666
667 if (first_error == 0)
668 first_error = error;
669
670 /*
671 * Some error have happened. Check our atomicity
672 * settings: continue if atomicity is not required,
673 * rollback changes otherwise.
674 */
675 if ((flags & IPFW_CTF_ATOMIC) == 0)
676 continue;
677
678 rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
679 tei, ta_buf_m, count, i);
680
681 rollback = 1;
682 break;
683 }
684
685 IPFW_WUNLOCK(ch);
686
687 ipfw_garbage_table_values(ch, tc, tei, count, rollback);
688
689 /* Permit post-add algorithm grow/rehash. */
690 if (numadd != 0)
691 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
692
693 /* Return first error to user, if any */
694 error = first_error;
695
696cleanup:
697 IPFW_UH_WUNLOCK(ch);
698
699 flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
700
701 return (error);
702}
703
704/*
705 * Deletes one or more entries in table @ti.
706 *
707 * Returns 0 on success.
708 */
709int
710del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
711 struct tentry_info *tei, uint8_t flags, uint32_t count)
712{
713 struct table_config *tc;
714 struct table_algo *ta;
715 struct tentry_info *ptei;
716 uint16_t kidx;
717 int error, first_error, i;
718 uint32_t num, numdel;
719 char ta_buf[TA_BUF_SZ];
720 caddr_t ta_buf_m, v;
721
722 /*
723 * Find and reference existing table.
724 */
725 IPFW_UH_WLOCK(ch);
726 error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
727 if (error != 0) {
728 IPFW_UH_WUNLOCK(ch);
729 return (error);
730 }
731 ta = tc->ta;
732 IPFW_UH_WUNLOCK(ch);
733
734 /* Allocate memory and prepare record(s) */
735 /* Pass stack buffer by default */
736 ta_buf_m = ta_buf;
737 error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
738 if (error != 0)
739 goto cleanup;
740
741 IPFW_UH_WLOCK(ch);
742
743 /* Drop reference we've used in first search */
744 tc->no.refcnt--;
745
746 /*
747 * Check if table algo is still the same.
748 * (changed ta may be the result of table swap).
749 */
750 if (ta != tc->ta) {
751 IPFW_UH_WUNLOCK(ch);
752 error = EINVAL;
753 goto cleanup;
754 }
755
756 kidx = tc->no.kidx;
757 numdel = 0;
758 first_error = 0;
759
760 IPFW_WLOCK(ch);
761 v = ta_buf_m;
762 for (i = 0; i < count; i++, v += ta->ta_buf_size) {
763 ptei = &tei[i];
764 num = 0;
765 error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
766 &num);
767 /* Save state for userland */
768 store_tei_result(ptei, OP_DEL, error, num);
769 if (error != 0 && first_error == 0)
770 first_error = error;
771 tc->count -= num;
772 numdel += num;
773 }
774 IPFW_WUNLOCK(ch);
775
776 /* Unlink non-used values */
777 ipfw_garbage_table_values(ch, tc, tei, count, 0);
778
779 if (numdel != 0) {
780 /* Run post-del hook to permit shrinking */
781 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
782 }
783
784 IPFW_UH_WUNLOCK(ch);
785
786 /* Return first error to user, if any */
787 error = first_error;
788
789cleanup:
790 flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
791
792 return (error);
793}
794
795/*
796 * Ensure that table @tc has enough space to add @count entries without
797 * need for reallocation.
798 *
799 * Callbacks order:
800 * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
801 *
802 * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
803 * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
804 * 3) modify (UH_WLOCK + WLOCK) - switch pointers
805 * 4) flush_modify (UH_WLOCK) - free state, if needed
806 *
807 * Returns 0 on success.
808 */
809static int
810check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
811 struct table_config *tc, struct table_info *ti, uint32_t count)
812{
813 struct table_algo *ta;
814 uint64_t pflags;
815 char ta_buf[TA_BUF_SZ];
816 int error;
817
818 IPFW_UH_WLOCK_ASSERT(ch);
819
820 error = 0;
821 ta = tc->ta;
822 if (ta->need_modify == NULL)
823 return (0);
824
825 /* Acquire reference not to loose @tc between locks/unlocks */
826 tc->no.refcnt++;
827
828 /*
829 * TODO: think about avoiding race between large add/large delete
830 * operation on algorithm which implements shrinking along with
831 * growing.
832 */
833 while (true) {
834 pflags = 0;
835 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
836 error = 0;
837 break;
838 }
839
840 /* We have to shrink/grow table */
841 if (ts != NULL)
842 add_toperation_state(ch, ts);
843 IPFW_UH_WUNLOCK(ch);
844
845 memset(&ta_buf, 0, sizeof(ta_buf));
846 error = ta->prepare_mod(ta_buf, &pflags);
847
848 IPFW_UH_WLOCK(ch);
849 if (ts != NULL)
850 del_toperation_state(ch, ts);
851
852 if (error != 0)
853 break;
854
855 if (ts != NULL && ts->modified != 0) {
856
857 /*
858 * Swap operation has happened
859 * so we're currently operating on other
860 * table data. Stop doing this.
861 */
862 ta->flush_mod(ta_buf);
863 break;
864 }
865
866 /* Check if we still need to alter table */
867 ti = KIDX_TO_TI(ch, tc->no.kidx);
868 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
869 IPFW_UH_WUNLOCK(ch);
870
871 /*
872 * Other thread has already performed resize.
873 * Flush our state and return.
874 */
875 ta->flush_mod(ta_buf);
876 break;
877 }
878
879 error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
880 if (error == 0) {
881 /* Do actual modification */
882 IPFW_WLOCK(ch);
883 ta->modify(tc->astate, ti, ta_buf, pflags);
884 IPFW_WUNLOCK(ch);
885 }
886
887 /* Anyway, flush data and retry */
888 ta->flush_mod(ta_buf);
889 }
890
891 tc->no.refcnt--;
892 return (error);
893}
894
895/*
896 * Adds or deletes record in table.
897 * Data layout (v0):
898 * Request: [ ip_fw3_opheader ipfw_table_xentry ]
899 *
900 * Returns 0 on success
901 */
902static int
903manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
904 struct sockopt_data *sd)
905{
906 ipfw_table_xentry *xent;
907 struct tentry_info tei;
908 struct tid_info ti;
909 struct table_value v;
910 int error, hdrlen, read;
911
912 hdrlen = offsetof(ipfw_table_xentry, k);
913
914 /* Check minimum header size */
915 if (sd->valsize < (sizeof(*op3) + hdrlen))
916 return (EINVAL);
917
918 read = sizeof(ip_fw3_opheader);
919
920 /* Check if xentry len field is valid */
921 xent = (ipfw_table_xentry *)(op3 + 1);
922 if (xent->len < hdrlen || xent->len + read > sd->valsize)
923 return (EINVAL);
924
925 memset(&tei, 0, sizeof(tei));
926 tei.paddr = &xent->k;
927 tei.masklen = xent->masklen;
928 ipfw_import_table_value_legacy(xent->value, &v);
929 tei.pvalue = &v;
930 /* Old requests compatibility */
931 tei.flags = TEI_FLAGS_COMPAT;
932 if (xent->type == IPFW_TABLE_ADDR) {
933 if (xent->len - hdrlen == sizeof(in_addr_t))
934 tei.subtype = AF_INET;
935 else
936 tei.subtype = AF_INET6;
937 }
938
939 memset(&ti, 0, sizeof(ti));
940 ti.uidx = xent->tbl;
941 ti.type = xent->type;
942
943 error = (op3->opcode == IP_FW_TABLE_XADD) ?
944 add_table_entry(ch, &ti, &tei, 0, 1) :
945 del_table_entry(ch, &ti, &tei, 0, 1);
946
947 return (error);
948}
949
950/*
951 * Adds or deletes record in table.
952 * Data layout (v1)(current):
953 * Request: [ ipfw_obj_header
954 * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
955 * ]
956 *
957 * Returns 0 on success
958 */
959static int
960manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
961 struct sockopt_data *sd)
962{
963 ipfw_obj_tentry *tent, *ptent;
964 ipfw_obj_ctlv *ctlv;
965 ipfw_obj_header *oh;
966 struct tentry_info *ptei, tei, *tei_buf;
967 struct tid_info ti;
968 int error, i, kidx, read;
969
970 /* Check minimum header size */
971 if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
972 return (EINVAL);
973
974 /* Check if passed data is too long */
975 if (sd->valsize != sd->kavail)
976 return (EINVAL);
977
978 oh = (ipfw_obj_header *)sd->kbuf;
979
980 /* Basic length checks for TLVs */
981 if (oh->ntlv.head.length != sizeof(oh->ntlv))
982 return (EINVAL);
983
984 read = sizeof(*oh);
985
986 ctlv = (ipfw_obj_ctlv *)(oh + 1);
987 if (ctlv->head.length + read != sd->valsize)
988 return (EINVAL);
989
990 read += sizeof(*ctlv);
991 tent = (ipfw_obj_tentry *)(ctlv + 1);
992 if (ctlv->count * sizeof(*tent) + read != sd->valsize)
993 return (EINVAL);
994
995 if (ctlv->count == 0)
996 return (0);
997
998 /*
999 * Mark entire buffer as "read".
1000 * This instructs sopt api write it back
1001 * after function return.
1002 */
1003 ipfw_get_sopt_header(sd, sd->valsize);
1004
1005 /* Perform basic checks for each entry */
1006 ptent = tent;
1007 kidx = tent->idx;
1008 for (i = 0; i < ctlv->count; i++, ptent++) {
1009 if (ptent->head.length != sizeof(*ptent))
1010 return (EINVAL);
1011 if (ptent->idx != kidx)
1012 return (ENOTSUP);
1013 }
1014
1015 /* Convert data into kernel request objects */
1016 objheader_to_ti(oh, &ti);
1017 ti.type = oh->ntlv.type;
1018 ti.uidx = kidx;
1019
1020 /* Use on-stack buffer for single add/del */
1021 if (ctlv->count == 1) {
1022 memset(&tei, 0, sizeof(tei));
1023 tei_buf = &tei;
1024 } else
1025 tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
1026 M_WAITOK | M_ZERO);
1027
1028 ptei = tei_buf;
1029 ptent = tent;
1030 for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
1031 ptei->paddr = &ptent->k;
1032 ptei->subtype = ptent->subtype;
1033 ptei->masklen = ptent->masklen;
1034 if (ptent->head.flags & IPFW_TF_UPDATE)
1035 ptei->flags |= TEI_FLAGS_UPDATE;
1036
1037 ipfw_import_table_value_v1(&ptent->v.value);
1038 ptei->pvalue = (struct table_value *)&ptent->v.value;
1039 }
1040
1041 error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
1042 add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
1043 del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
1044
1045 /* Translate result back to userland */
1046 ptei = tei_buf;
1047 ptent = tent;
1048 for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
1049 if (ptei->flags & TEI_FLAGS_ADDED)
1050 ptent->result = IPFW_TR_ADDED;
1051 else if (ptei->flags & TEI_FLAGS_DELETED)
1052 ptent->result = IPFW_TR_DELETED;
1053 else if (ptei->flags & TEI_FLAGS_UPDATED)
1054 ptent->result = IPFW_TR_UPDATED;
1055 else if (ptei->flags & TEI_FLAGS_LIMIT)
1056 ptent->result = IPFW_TR_LIMIT;
1057 else if (ptei->flags & TEI_FLAGS_ERROR)
1058 ptent->result = IPFW_TR_ERROR;
1059 else if (ptei->flags & TEI_FLAGS_NOTFOUND)
1060 ptent->result = IPFW_TR_NOTFOUND;
1061 else if (ptei->flags & TEI_FLAGS_EXISTS)
1062 ptent->result = IPFW_TR_EXISTS;
1063 ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
1064 }
1065
1066 if (tei_buf != &tei)
1067 free(tei_buf, M_TEMP);
1068
1069 return (error);
1070}
1071
1072/*
1073 * Looks up an entry in given table.
1074 * Data layout (v0)(current):
1075 * Request: [ ipfw_obj_header ipfw_obj_tentry ]
1076 * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
1077 *
1078 * Returns 0 on success
1079 */
1080static int
1081find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1082 struct sockopt_data *sd)
1083{
1084 ipfw_obj_tentry *tent;
1085 ipfw_obj_header *oh;
1086 struct tid_info ti;
1087 struct table_config *tc;
1088 struct table_algo *ta;
1089 struct table_info *kti;
30
31/*
32 * Lookup table support for ipfw.
33 *
34 * This file contains handlers for all generic tables' operations:
35 * add/del/flush entries, list/dump tables etc..
36 *
37 * Table data modification is protected by both UH and runtime lock
38 * while reading configuration/data is protected by UH lock.
39 *
40 * Lookup algorithms for all table types are located in ip_fw_table_algo.c
41 */
42
43#include "opt_ipfw.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/malloc.h>
48#include <sys/kernel.h>
49#include <sys/lock.h>
50#include <sys/rwlock.h>
51#include <sys/rmlock.h>
52#include <sys/socket.h>
53#include <sys/socketvar.h>
54#include <sys/queue.h>
55#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */
56
57#include <netinet/in.h>
58#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
59#include <netinet/ip_fw.h>
60
61#include <netpfil/ipfw/ip_fw_private.h>
62#include <netpfil/ipfw/ip_fw_table.h>
63
64 /*
65 * Table has the following `type` concepts:
66 *
67 * `no.type` represents lookup key type (addr, ifp, uid, etc..)
68 * vmask represents bitmask of table values which are present at the moment.
69 * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
70 * single-value-for-all approach.
71 */
72struct table_config {
73 struct named_object no;
74 uint8_t tflags; /* type flags */
75 uint8_t locked; /* 1 if locked from changes */
76 uint8_t linked; /* 1 if already linked */
77 uint8_t ochanged; /* used by set swapping */
78 uint8_t vshared; /* 1 if using shared value array */
79 uint8_t spare[3];
80 uint32_t count; /* Number of records */
81 uint32_t limit; /* Max number of records */
82 uint32_t vmask; /* bitmask with supported values */
83 uint32_t ocount; /* used by set swapping */
84 uint64_t gencnt; /* generation count */
85 char tablename[64]; /* table name */
86 struct table_algo *ta; /* Callbacks for given algo */
87 void *astate; /* algorithm state */
88 struct table_info ti_copy; /* data to put to table_info */
89 struct namedobj_instance *vi;
90};
91
92static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
93 struct table_config **tc);
94static struct table_config *find_table(struct namedobj_instance *ni,
95 struct tid_info *ti);
96static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
97 struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
98static void free_table_config(struct namedobj_instance *ni,
99 struct table_config *tc);
100static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
101 char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref);
102static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
103static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
104static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
105 struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
106#define OP_ADD 1
107#define OP_DEL 0
108static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
109 struct sockopt_data *sd);
110static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
111 ipfw_xtable_info *i);
112static int dump_table_tentry(void *e, void *arg);
113static int dump_table_xentry(void *e, void *arg);
114
115static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
116 struct tid_info *b);
117
118static int check_table_name(const char *name);
119static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
120 struct table_config *tc, struct table_info *ti, uint32_t count);
121static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
122
123static struct table_algo *find_table_algo(struct tables_config *tableconf,
124 struct tid_info *ti, char *name);
125
126static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
127static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
128
129#define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash)
130#define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k]))
131
132#define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */
133
134void
135rollback_toperation_state(struct ip_fw_chain *ch, void *object)
136{
137 struct tables_config *tcfg;
138 struct op_state *os;
139
140 tcfg = CHAIN_TO_TCFG(ch);
141 TAILQ_FOREACH(os, &tcfg->state_list, next)
142 os->func(object, os);
143}
144
145void
146add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
147{
148 struct tables_config *tcfg;
149
150 tcfg = CHAIN_TO_TCFG(ch);
151 TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
152}
153
154void
155del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
156{
157 struct tables_config *tcfg;
158
159 tcfg = CHAIN_TO_TCFG(ch);
160 TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
161}
162
163void
164tc_ref(struct table_config *tc)
165{
166
167 tc->no.refcnt++;
168}
169
170void
171tc_unref(struct table_config *tc)
172{
173
174 tc->no.refcnt--;
175}
176
177static struct table_value *
178get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
179{
180 struct table_value *pval;
181
182 pval = (struct table_value *)ch->valuestate;
183
184 return (&pval[kidx]);
185}
186
187
188/*
189 * Checks if we're able to insert/update entry @tei into table
190 * w.r.t @tc limits.
191 * May alter @tei to indicate insertion error / insert
192 * options.
193 *
194 * Returns 0 if operation can be performed/
195 */
196static int
197check_table_limit(struct table_config *tc, struct tentry_info *tei)
198{
199
200 if (tc->limit == 0 || tc->count < tc->limit)
201 return (0);
202
203 if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
204 /* Notify userland on error cause */
205 tei->flags |= TEI_FLAGS_LIMIT;
206 return (EFBIG);
207 }
208
209 /*
210 * We have UPDATE flag set.
211 * Permit updating record (if found),
212 * but restrict adding new one since we've
213 * already hit the limit.
214 */
215 tei->flags |= TEI_FLAGS_DONTADD;
216
217 return (0);
218}
219
220/*
221 * Convert algorithm callback return code into
222 * one of pre-defined states known by userland.
223 */
224static void
225store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
226{
227 int flag;
228
229 flag = 0;
230
231 switch (error) {
232 case 0:
233 if (op == OP_ADD && num != 0)
234 flag = TEI_FLAGS_ADDED;
235 if (op == OP_DEL)
236 flag = TEI_FLAGS_DELETED;
237 break;
238 case ENOENT:
239 flag = TEI_FLAGS_NOTFOUND;
240 break;
241 case EEXIST:
242 flag = TEI_FLAGS_EXISTS;
243 break;
244 default:
245 flag = TEI_FLAGS_ERROR;
246 }
247
248 tei->flags |= flag;
249}
250
251/*
252 * Creates and references table with default parameters.
253 * Saves table config, algo and allocated kidx info @ptc, @pta and
254 * @pkidx if non-zero.
255 * Used for table auto-creation to support old binaries.
256 *
257 * Returns 0 on success.
258 */
259static int
260create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
261 uint16_t *pkidx)
262{
263 ipfw_xtable_info xi;
264 int error;
265
266 memset(&xi, 0, sizeof(xi));
267 /* Set default value mask for legacy clients */
268 xi.vmask = IPFW_VTYPE_LEGACY;
269
270 error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
271 if (error != 0)
272 return (error);
273
274 return (0);
275}
276
277/*
278 * Find and reference existing table optionally
279 * creating new one.
280 *
281 * Saves found table config into @ptc.
282 * Note function may drop/acquire UH_WLOCK.
283 * Returns 0 if table was found/created and referenced
284 * or non-zero return code.
285 */
286static int
287find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
288 struct tentry_info *tei, uint32_t count, int op,
289 struct table_config **ptc)
290{
291 struct namedobj_instance *ni;
292 struct table_config *tc;
293 uint16_t kidx;
294 int error;
295
296 IPFW_UH_WLOCK_ASSERT(ch);
297
298 ni = CHAIN_TO_NI(ch);
299 tc = NULL;
300 if ((tc = find_table(ni, ti)) != NULL) {
301 /* check table type */
302 if (tc->no.subtype != ti->type)
303 return (EINVAL);
304
305 if (tc->locked != 0)
306 return (EACCES);
307
308 /* Try to exit early on limit hit */
309 if (op == OP_ADD && count == 1 &&
310 check_table_limit(tc, tei) != 0)
311 return (EFBIG);
312
313 /* Reference and return */
314 tc->no.refcnt++;
315 *ptc = tc;
316 return (0);
317 }
318
319 if (op == OP_DEL)
320 return (ESRCH);
321
322 /* Compatibility mode: create new table for old clients */
323 if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
324 return (ESRCH);
325
326 IPFW_UH_WUNLOCK(ch);
327 error = create_table_compat(ch, ti, &kidx);
328 IPFW_UH_WLOCK(ch);
329
330 if (error != 0)
331 return (error);
332
333 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
334 KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx));
335
336 /* OK, now we've got referenced table. */
337 *ptc = tc;
338 return (0);
339}
340
341/*
342 * Rolls back already @added to @tc entries using state array @ta_buf_m.
343 * Assume the following layout:
344 * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
345 * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
346 * for storing deleted state
347 */
348static void
349rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
350 struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
351 uint32_t count, uint32_t added)
352{
353 struct table_algo *ta;
354 struct tentry_info *ptei;
355 caddr_t v, vv;
356 size_t ta_buf_sz;
357 int error, i;
358 uint32_t num;
359
360 IPFW_UH_WLOCK_ASSERT(ch);
361
362 ta = tc->ta;
363 ta_buf_sz = ta->ta_buf_size;
364 v = ta_buf_m;
365 vv = v + count * ta_buf_sz;
366 for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
367 ptei = &tei[i];
368 if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
369
370 /*
371 * We have old value stored by previous
372 * call in @ptei->value. Do add once again
373 * to restore it.
374 */
375 error = ta->add(tc->astate, tinfo, ptei, v, &num);
376 KASSERT(error == 0, ("rollback UPDATE fail"));
377 KASSERT(num == 0, ("rollback UPDATE fail2"));
378 continue;
379 }
380
381 error = ta->prepare_del(ch, ptei, vv);
382 KASSERT(error == 0, ("pre-rollback INSERT failed"));
383 error = ta->del(tc->astate, tinfo, ptei, vv, &num);
384 KASSERT(error == 0, ("rollback INSERT failed"));
385 tc->count -= num;
386 }
387}
388
389/*
390 * Prepares add/del state for all @count entries in @tei.
391 * Uses either stack buffer (@ta_buf) or allocates a new one.
392 * Stores pointer to allocated buffer back to @ta_buf.
393 *
394 * Returns 0 on success.
395 */
396static int
397prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
398 struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
399{
400 caddr_t ta_buf_m, v;
401 size_t ta_buf_sz, sz;
402 struct tentry_info *ptei;
403 int error, i;
404
405 error = 0;
406 ta_buf_sz = ta->ta_buf_size;
407 if (count == 1) {
408 /* Sigle add/delete, use on-stack buffer */
409 memset(*ta_buf, 0, TA_BUF_SZ);
410 ta_buf_m = *ta_buf;
411 } else {
412
413 /*
414 * Multiple adds/deletes, allocate larger buffer
415 *
416 * Note we need 2xcount buffer for add case:
417 * we have hold both ADD state
418 * and DELETE state (this may be needed
419 * if we need to rollback all changes)
420 */
421 sz = count * ta_buf_sz;
422 ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
423 M_WAITOK | M_ZERO);
424 }
425
426 v = ta_buf_m;
427 for (i = 0; i < count; i++, v += ta_buf_sz) {
428 ptei = &tei[i];
429 error = (op == OP_ADD) ?
430 ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
431
432 /*
433 * Some syntax error (incorrect mask, or address, or
434 * anything). Return error regardless of atomicity
435 * settings.
436 */
437 if (error != 0)
438 break;
439 }
440
441 *ta_buf = ta_buf_m;
442 return (error);
443}
444
445/*
446 * Flushes allocated state for each @count entries in @tei.
447 * Frees @ta_buf_m if differs from stack buffer @ta_buf.
448 */
449static void
450flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
451 struct tentry_info *tei, uint32_t count, int rollback,
452 caddr_t ta_buf_m, caddr_t ta_buf)
453{
454 caddr_t v;
455 struct tentry_info *ptei;
456 size_t ta_buf_sz;
457 int i;
458
459 ta_buf_sz = ta->ta_buf_size;
460
461 /* Run cleaning callback anyway */
462 v = ta_buf_m;
463 for (i = 0; i < count; i++, v += ta_buf_sz) {
464 ptei = &tei[i];
465 ta->flush_entry(ch, ptei, v);
466 if (ptei->ptv != NULL) {
467 free(ptei->ptv, M_IPFW);
468 ptei->ptv = NULL;
469 }
470 }
471
472 /* Clean up "deleted" state in case of rollback */
473 if (rollback != 0) {
474 v = ta_buf_m + count * ta_buf_sz;
475 for (i = 0; i < count; i++, v += ta_buf_sz)
476 ta->flush_entry(ch, &tei[i], v);
477 }
478
479 if (ta_buf_m != ta_buf)
480 free(ta_buf_m, M_TEMP);
481}
482
483
484static void
485rollback_add_entry(void *object, struct op_state *_state)
486{
487 struct ip_fw_chain *ch;
488 struct tableop_state *ts;
489
490 ts = (struct tableop_state *)_state;
491
492 if (ts->tc != object && ts->ch != object)
493 return;
494
495 ch = ts->ch;
496
497 IPFW_UH_WLOCK_ASSERT(ch);
498
499 /* Call specifid unlockers */
500 rollback_table_values(ts);
501
502 /* Indicate we've called */
503 ts->modified = 1;
504}
505
506/*
507 * Adds/updates one or more entries in table @ti.
508 *
509 * Function may drop/reacquire UH wlock multiple times due to
510 * items alloc, algorithm callbacks (check_space), value linkage
511 * (new values, value storage realloc), etc..
512 * Other processes like other adds (which may involve storage resize),
513 * table swaps (which changes table data and may change algo type),
514 * table modify (which may change value mask) may be executed
515 * simultaneously so we need to deal with it.
516 *
517 * The following approach was implemented:
518 * we have per-chain linked list, protected with UH lock.
519 * add_table_entry prepares special on-stack structure wthich is passed
520 * to its descendants. Users add this structure to this list before unlock.
521 * After performing needed operations and acquiring UH lock back, each user
522 * checks if structure has changed. If true, it rolls local state back and
523 * returns without error to the caller.
524 * add_table_entry() on its own checks if structure has changed and restarts
525 * its operation from the beginning (goto restart).
526 *
527 * Functions which are modifying fields of interest (currently
528 * resize_shared_value_storage() and swap_tables() )
529 * traverses given list while holding UH lock immediately before
530 * performing their operations calling function provided be list entry
531 * ( currently rollback_add_entry ) which performs rollback for all necessary
532 * state and sets appropriate values in structure indicating rollback
533 * has happened.
534 *
535 * Algo interaction:
536 * Function references @ti first to ensure table won't
537 * disappear or change its type.
538 * After that, prepare_add callback is called for each @tei entry.
539 * Next, we try to add each entry under UH+WHLOCK
540 * using add() callback.
541 * Finally, we free all state by calling flush_entry callback
542 * for each @tei.
543 *
544 * Returns 0 on success.
545 */
546int
547add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
548 struct tentry_info *tei, uint8_t flags, uint32_t count)
549{
550 struct table_config *tc;
551 struct table_algo *ta;
552 uint16_t kidx;
553 int error, first_error, i, rollback;
554 uint32_t num, numadd;
555 struct tentry_info *ptei;
556 struct tableop_state ts;
557 char ta_buf[TA_BUF_SZ];
558 caddr_t ta_buf_m, v;
559
560 memset(&ts, 0, sizeof(ts));
561 ta = NULL;
562 IPFW_UH_WLOCK(ch);
563
564 /*
565 * Find and reference existing table.
566 */
567restart:
568 if (ts.modified != 0) {
569 IPFW_UH_WUNLOCK(ch);
570 flush_batch_buffer(ch, ta, tei, count, rollback,
571 ta_buf_m, ta_buf);
572 memset(&ts, 0, sizeof(ts));
573 ta = NULL;
574 IPFW_UH_WLOCK(ch);
575 }
576
577 error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
578 if (error != 0) {
579 IPFW_UH_WUNLOCK(ch);
580 return (error);
581 }
582 ta = tc->ta;
583
584 /* Fill in tablestate */
585 ts.ch = ch;
586 ts.opstate.func = rollback_add_entry;
587 ts.tc = tc;
588 ts.vshared = tc->vshared;
589 ts.vmask = tc->vmask;
590 ts.ta = ta;
591 ts.tei = tei;
592 ts.count = count;
593 rollback = 0;
594 add_toperation_state(ch, &ts);
595 IPFW_UH_WUNLOCK(ch);
596
597 /* Allocate memory and prepare record(s) */
598 /* Pass stack buffer by default */
599 ta_buf_m = ta_buf;
600 error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
601
602 IPFW_UH_WLOCK(ch);
603 del_toperation_state(ch, &ts);
604 /* Drop reference we've used in first search */
605 tc->no.refcnt--;
606
607 /* Check prepare_batch_buffer() error */
608 if (error != 0)
609 goto cleanup;
610
611 /*
612 * Check if table swap has happened.
613 * (so table algo might be changed).
614 * Restart operation to achieve consistent behavior.
615 */
616 if (ts.modified != 0)
617 goto restart;
618
619 /*
620 * Link all values values to shared/per-table value array.
621 *
622 * May release/reacquire UH_WLOCK.
623 */
624 error = ipfw_link_table_values(ch, &ts);
625 if (error != 0)
626 goto cleanup;
627 if (ts.modified != 0)
628 goto restart;
629
630 /*
631 * Ensure we are able to add all entries without additional
632 * memory allocations. May release/reacquire UH_WLOCK.
633 */
634 kidx = tc->no.kidx;
635 error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
636 if (error != 0)
637 goto cleanup;
638 if (ts.modified != 0)
639 goto restart;
640
641 /* We've got valid table in @tc. Let's try to add data */
642 kidx = tc->no.kidx;
643 ta = tc->ta;
644 numadd = 0;
645 first_error = 0;
646
647 IPFW_WLOCK(ch);
648
649 v = ta_buf_m;
650 for (i = 0; i < count; i++, v += ta->ta_buf_size) {
651 ptei = &tei[i];
652 num = 0;
653 /* check limit before adding */
654 if ((error = check_table_limit(tc, ptei)) == 0) {
655 error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
656 ptei, v, &num);
657 /* Set status flag to inform userland */
658 store_tei_result(ptei, OP_ADD, error, num);
659 }
660 if (error == 0) {
661 /* Update number of records to ease limit checking */
662 tc->count += num;
663 numadd += num;
664 continue;
665 }
666
667 if (first_error == 0)
668 first_error = error;
669
670 /*
671 * Some error have happened. Check our atomicity
672 * settings: continue if atomicity is not required,
673 * rollback changes otherwise.
674 */
675 if ((flags & IPFW_CTF_ATOMIC) == 0)
676 continue;
677
678 rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
679 tei, ta_buf_m, count, i);
680
681 rollback = 1;
682 break;
683 }
684
685 IPFW_WUNLOCK(ch);
686
687 ipfw_garbage_table_values(ch, tc, tei, count, rollback);
688
689 /* Permit post-add algorithm grow/rehash. */
690 if (numadd != 0)
691 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
692
693 /* Return first error to user, if any */
694 error = first_error;
695
696cleanup:
697 IPFW_UH_WUNLOCK(ch);
698
699 flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
700
701 return (error);
702}
703
704/*
705 * Deletes one or more entries in table @ti.
706 *
707 * Returns 0 on success.
708 */
709int
710del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
711 struct tentry_info *tei, uint8_t flags, uint32_t count)
712{
713 struct table_config *tc;
714 struct table_algo *ta;
715 struct tentry_info *ptei;
716 uint16_t kidx;
717 int error, first_error, i;
718 uint32_t num, numdel;
719 char ta_buf[TA_BUF_SZ];
720 caddr_t ta_buf_m, v;
721
722 /*
723 * Find and reference existing table.
724 */
725 IPFW_UH_WLOCK(ch);
726 error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
727 if (error != 0) {
728 IPFW_UH_WUNLOCK(ch);
729 return (error);
730 }
731 ta = tc->ta;
732 IPFW_UH_WUNLOCK(ch);
733
734 /* Allocate memory and prepare record(s) */
735 /* Pass stack buffer by default */
736 ta_buf_m = ta_buf;
737 error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
738 if (error != 0)
739 goto cleanup;
740
741 IPFW_UH_WLOCK(ch);
742
743 /* Drop reference we've used in first search */
744 tc->no.refcnt--;
745
746 /*
747 * Check if table algo is still the same.
748 * (changed ta may be the result of table swap).
749 */
750 if (ta != tc->ta) {
751 IPFW_UH_WUNLOCK(ch);
752 error = EINVAL;
753 goto cleanup;
754 }
755
756 kidx = tc->no.kidx;
757 numdel = 0;
758 first_error = 0;
759
760 IPFW_WLOCK(ch);
761 v = ta_buf_m;
762 for (i = 0; i < count; i++, v += ta->ta_buf_size) {
763 ptei = &tei[i];
764 num = 0;
765 error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
766 &num);
767 /* Save state for userland */
768 store_tei_result(ptei, OP_DEL, error, num);
769 if (error != 0 && first_error == 0)
770 first_error = error;
771 tc->count -= num;
772 numdel += num;
773 }
774 IPFW_WUNLOCK(ch);
775
776 /* Unlink non-used values */
777 ipfw_garbage_table_values(ch, tc, tei, count, 0);
778
779 if (numdel != 0) {
780 /* Run post-del hook to permit shrinking */
781 check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
782 }
783
784 IPFW_UH_WUNLOCK(ch);
785
786 /* Return first error to user, if any */
787 error = first_error;
788
789cleanup:
790 flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
791
792 return (error);
793}
794
795/*
796 * Ensure that table @tc has enough space to add @count entries without
797 * need for reallocation.
798 *
799 * Callbacks order:
800 * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
801 *
802 * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
803 * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
804 * 3) modify (UH_WLOCK + WLOCK) - switch pointers
805 * 4) flush_modify (UH_WLOCK) - free state, if needed
806 *
807 * Returns 0 on success.
808 */
809static int
810check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
811 struct table_config *tc, struct table_info *ti, uint32_t count)
812{
813 struct table_algo *ta;
814 uint64_t pflags;
815 char ta_buf[TA_BUF_SZ];
816 int error;
817
818 IPFW_UH_WLOCK_ASSERT(ch);
819
820 error = 0;
821 ta = tc->ta;
822 if (ta->need_modify == NULL)
823 return (0);
824
825 /* Acquire reference not to loose @tc between locks/unlocks */
826 tc->no.refcnt++;
827
828 /*
829 * TODO: think about avoiding race between large add/large delete
830 * operation on algorithm which implements shrinking along with
831 * growing.
832 */
833 while (true) {
834 pflags = 0;
835 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
836 error = 0;
837 break;
838 }
839
840 /* We have to shrink/grow table */
841 if (ts != NULL)
842 add_toperation_state(ch, ts);
843 IPFW_UH_WUNLOCK(ch);
844
845 memset(&ta_buf, 0, sizeof(ta_buf));
846 error = ta->prepare_mod(ta_buf, &pflags);
847
848 IPFW_UH_WLOCK(ch);
849 if (ts != NULL)
850 del_toperation_state(ch, ts);
851
852 if (error != 0)
853 break;
854
855 if (ts != NULL && ts->modified != 0) {
856
857 /*
858 * Swap operation has happened
859 * so we're currently operating on other
860 * table data. Stop doing this.
861 */
862 ta->flush_mod(ta_buf);
863 break;
864 }
865
866 /* Check if we still need to alter table */
867 ti = KIDX_TO_TI(ch, tc->no.kidx);
868 if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
869 IPFW_UH_WUNLOCK(ch);
870
871 /*
872 * Other thread has already performed resize.
873 * Flush our state and return.
874 */
875 ta->flush_mod(ta_buf);
876 break;
877 }
878
879 error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
880 if (error == 0) {
881 /* Do actual modification */
882 IPFW_WLOCK(ch);
883 ta->modify(tc->astate, ti, ta_buf, pflags);
884 IPFW_WUNLOCK(ch);
885 }
886
887 /* Anyway, flush data and retry */
888 ta->flush_mod(ta_buf);
889 }
890
891 tc->no.refcnt--;
892 return (error);
893}
894
895/*
896 * Adds or deletes record in table.
897 * Data layout (v0):
898 * Request: [ ip_fw3_opheader ipfw_table_xentry ]
899 *
900 * Returns 0 on success
901 */
902static int
903manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
904 struct sockopt_data *sd)
905{
906 ipfw_table_xentry *xent;
907 struct tentry_info tei;
908 struct tid_info ti;
909 struct table_value v;
910 int error, hdrlen, read;
911
912 hdrlen = offsetof(ipfw_table_xentry, k);
913
914 /* Check minimum header size */
915 if (sd->valsize < (sizeof(*op3) + hdrlen))
916 return (EINVAL);
917
918 read = sizeof(ip_fw3_opheader);
919
920 /* Check if xentry len field is valid */
921 xent = (ipfw_table_xentry *)(op3 + 1);
922 if (xent->len < hdrlen || xent->len + read > sd->valsize)
923 return (EINVAL);
924
925 memset(&tei, 0, sizeof(tei));
926 tei.paddr = &xent->k;
927 tei.masklen = xent->masklen;
928 ipfw_import_table_value_legacy(xent->value, &v);
929 tei.pvalue = &v;
930 /* Old requests compatibility */
931 tei.flags = TEI_FLAGS_COMPAT;
932 if (xent->type == IPFW_TABLE_ADDR) {
933 if (xent->len - hdrlen == sizeof(in_addr_t))
934 tei.subtype = AF_INET;
935 else
936 tei.subtype = AF_INET6;
937 }
938
939 memset(&ti, 0, sizeof(ti));
940 ti.uidx = xent->tbl;
941 ti.type = xent->type;
942
943 error = (op3->opcode == IP_FW_TABLE_XADD) ?
944 add_table_entry(ch, &ti, &tei, 0, 1) :
945 del_table_entry(ch, &ti, &tei, 0, 1);
946
947 return (error);
948}
949
950/*
951 * Adds or deletes record in table.
952 * Data layout (v1)(current):
953 * Request: [ ipfw_obj_header
954 * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
955 * ]
956 *
957 * Returns 0 on success
958 */
959static int
960manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
961 struct sockopt_data *sd)
962{
963 ipfw_obj_tentry *tent, *ptent;
964 ipfw_obj_ctlv *ctlv;
965 ipfw_obj_header *oh;
966 struct tentry_info *ptei, tei, *tei_buf;
967 struct tid_info ti;
968 int error, i, kidx, read;
969
970 /* Check minimum header size */
971 if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
972 return (EINVAL);
973
974 /* Check if passed data is too long */
975 if (sd->valsize != sd->kavail)
976 return (EINVAL);
977
978 oh = (ipfw_obj_header *)sd->kbuf;
979
980 /* Basic length checks for TLVs */
981 if (oh->ntlv.head.length != sizeof(oh->ntlv))
982 return (EINVAL);
983
984 read = sizeof(*oh);
985
986 ctlv = (ipfw_obj_ctlv *)(oh + 1);
987 if (ctlv->head.length + read != sd->valsize)
988 return (EINVAL);
989
990 read += sizeof(*ctlv);
991 tent = (ipfw_obj_tentry *)(ctlv + 1);
992 if (ctlv->count * sizeof(*tent) + read != sd->valsize)
993 return (EINVAL);
994
995 if (ctlv->count == 0)
996 return (0);
997
998 /*
999 * Mark entire buffer as "read".
1000 * This instructs sopt api write it back
1001 * after function return.
1002 */
1003 ipfw_get_sopt_header(sd, sd->valsize);
1004
1005 /* Perform basic checks for each entry */
1006 ptent = tent;
1007 kidx = tent->idx;
1008 for (i = 0; i < ctlv->count; i++, ptent++) {
1009 if (ptent->head.length != sizeof(*ptent))
1010 return (EINVAL);
1011 if (ptent->idx != kidx)
1012 return (ENOTSUP);
1013 }
1014
1015 /* Convert data into kernel request objects */
1016 objheader_to_ti(oh, &ti);
1017 ti.type = oh->ntlv.type;
1018 ti.uidx = kidx;
1019
1020 /* Use on-stack buffer for single add/del */
1021 if (ctlv->count == 1) {
1022 memset(&tei, 0, sizeof(tei));
1023 tei_buf = &tei;
1024 } else
1025 tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
1026 M_WAITOK | M_ZERO);
1027
1028 ptei = tei_buf;
1029 ptent = tent;
1030 for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
1031 ptei->paddr = &ptent->k;
1032 ptei->subtype = ptent->subtype;
1033 ptei->masklen = ptent->masklen;
1034 if (ptent->head.flags & IPFW_TF_UPDATE)
1035 ptei->flags |= TEI_FLAGS_UPDATE;
1036
1037 ipfw_import_table_value_v1(&ptent->v.value);
1038 ptei->pvalue = (struct table_value *)&ptent->v.value;
1039 }
1040
1041 error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
1042 add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
1043 del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
1044
1045 /* Translate result back to userland */
1046 ptei = tei_buf;
1047 ptent = tent;
1048 for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
1049 if (ptei->flags & TEI_FLAGS_ADDED)
1050 ptent->result = IPFW_TR_ADDED;
1051 else if (ptei->flags & TEI_FLAGS_DELETED)
1052 ptent->result = IPFW_TR_DELETED;
1053 else if (ptei->flags & TEI_FLAGS_UPDATED)
1054 ptent->result = IPFW_TR_UPDATED;
1055 else if (ptei->flags & TEI_FLAGS_LIMIT)
1056 ptent->result = IPFW_TR_LIMIT;
1057 else if (ptei->flags & TEI_FLAGS_ERROR)
1058 ptent->result = IPFW_TR_ERROR;
1059 else if (ptei->flags & TEI_FLAGS_NOTFOUND)
1060 ptent->result = IPFW_TR_NOTFOUND;
1061 else if (ptei->flags & TEI_FLAGS_EXISTS)
1062 ptent->result = IPFW_TR_EXISTS;
1063 ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
1064 }
1065
1066 if (tei_buf != &tei)
1067 free(tei_buf, M_TEMP);
1068
1069 return (error);
1070}
1071
1072/*
1073 * Looks up an entry in given table.
1074 * Data layout (v0)(current):
1075 * Request: [ ipfw_obj_header ipfw_obj_tentry ]
1076 * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
1077 *
1078 * Returns 0 on success
1079 */
1080static int
1081find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1082 struct sockopt_data *sd)
1083{
1084 ipfw_obj_tentry *tent;
1085 ipfw_obj_header *oh;
1086 struct tid_info ti;
1087 struct table_config *tc;
1088 struct table_algo *ta;
1089 struct table_info *kti;
1090 struct table_value *pval;
1090 struct namedobj_instance *ni;
1091 int error;
1092 size_t sz;
1093
1094 /* Check minimum header size */
1095 sz = sizeof(*oh) + sizeof(*tent);
1096 if (sd->valsize != sz)
1097 return (EINVAL);
1098
1099 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1100 tent = (ipfw_obj_tentry *)(oh + 1);
1101
1102 /* Basic length checks for TLVs */
1103 if (oh->ntlv.head.length != sizeof(oh->ntlv))
1104 return (EINVAL);
1105
1106 objheader_to_ti(oh, &ti);
1107 ti.type = oh->ntlv.type;
1108 ti.uidx = tent->idx;
1109
1110 IPFW_UH_RLOCK(ch);
1111 ni = CHAIN_TO_NI(ch);
1112
1113 /*
1114 * Find existing table and check its type .
1115 */
1116 ta = NULL;
1117 if ((tc = find_table(ni, &ti)) == NULL) {
1118 IPFW_UH_RUNLOCK(ch);
1119 return (ESRCH);
1120 }
1121
1122 /* check table type */
1123 if (tc->no.subtype != ti.type) {
1124 IPFW_UH_RUNLOCK(ch);
1125 return (EINVAL);
1126 }
1127
1128 kti = KIDX_TO_TI(ch, tc->no.kidx);
1129 ta = tc->ta;
1130
1131 if (ta->find_tentry == NULL)
1132 return (ENOTSUP);
1133
1134 error = ta->find_tentry(tc->astate, kti, tent);
1091 struct namedobj_instance *ni;
1092 int error;
1093 size_t sz;
1094
1095 /* Check minimum header size */
1096 sz = sizeof(*oh) + sizeof(*tent);
1097 if (sd->valsize != sz)
1098 return (EINVAL);
1099
1100 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1101 tent = (ipfw_obj_tentry *)(oh + 1);
1102
1103 /* Basic length checks for TLVs */
1104 if (oh->ntlv.head.length != sizeof(oh->ntlv))
1105 return (EINVAL);
1106
1107 objheader_to_ti(oh, &ti);
1108 ti.type = oh->ntlv.type;
1109 ti.uidx = tent->idx;
1110
1111 IPFW_UH_RLOCK(ch);
1112 ni = CHAIN_TO_NI(ch);
1113
1114 /*
1115 * Find existing table and check its type .
1116 */
1117 ta = NULL;
1118 if ((tc = find_table(ni, &ti)) == NULL) {
1119 IPFW_UH_RUNLOCK(ch);
1120 return (ESRCH);
1121 }
1122
1123 /* check table type */
1124 if (tc->no.subtype != ti.type) {
1125 IPFW_UH_RUNLOCK(ch);
1126 return (EINVAL);
1127 }
1128
1129 kti = KIDX_TO_TI(ch, tc->no.kidx);
1130 ta = tc->ta;
1131
1132 if (ta->find_tentry == NULL)
1133 return (ENOTSUP);
1134
1135 error = ta->find_tentry(tc->astate, kti, tent);
1135
1136 if (error == 0) {
1137 pval = get_table_value(ch, tc, tent->v.kidx);
1138 ipfw_export_table_value_v1(pval, &tent->v.value);
1139 }
1136 IPFW_UH_RUNLOCK(ch);
1137
1138 return (error);
1139}
1140
1141/*
1142 * Flushes all entries or destroys given table.
1143 * Data layout (v0)(current):
1144 * Request: [ ipfw_obj_header ]
1145 *
1146 * Returns 0 on success
1147 */
1148static int
1149flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1150 struct sockopt_data *sd)
1151{
1152 int error;
1153 struct _ipfw_obj_header *oh;
1154 struct tid_info ti;
1155
1156 if (sd->valsize != sizeof(*oh))
1157 return (EINVAL);
1158
1159 oh = (struct _ipfw_obj_header *)op3;
1160 objheader_to_ti(oh, &ti);
1161
1162 if (op3->opcode == IP_FW_TABLE_XDESTROY)
1163 error = destroy_table(ch, &ti);
1164 else if (op3->opcode == IP_FW_TABLE_XFLUSH)
1165 error = flush_table(ch, &ti);
1166 else
1167 return (ENOTSUP);
1168
1169 return (error);
1170}
1171
1172static void
1173restart_flush(void *object, struct op_state *_state)
1174{
1175 struct tableop_state *ts;
1176
1177 ts = (struct tableop_state *)_state;
1178
1179 if (ts->tc != object)
1180 return;
1181
1182 /* Indicate we've called */
1183 ts->modified = 1;
1184}
1185
1186/*
1187 * Flushes given table.
1188 *
1189 * Function create new table instance with the same
1190 * parameters, swaps it with old one and
1191 * flushes state without holding runtime WLOCK.
1192 *
1193 * Returns 0 on success.
1194 */
1195int
1196flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
1197{
1198 struct namedobj_instance *ni;
1199 struct table_config *tc;
1200 struct table_algo *ta;
1201 struct table_info ti_old, ti_new, *tablestate;
1202 void *astate_old, *astate_new;
1203 char algostate[64], *pstate;
1204 struct tableop_state ts;
1205 int error, need_gc;
1206 uint16_t kidx;
1207 uint8_t tflags;
1208
1209 /*
1210 * Stage 1: save table algorithm.
1211 * Reference found table to ensure it won't disappear.
1212 */
1213 IPFW_UH_WLOCK(ch);
1214 ni = CHAIN_TO_NI(ch);
1215 if ((tc = find_table(ni, ti)) == NULL) {
1216 IPFW_UH_WUNLOCK(ch);
1217 return (ESRCH);
1218 }
1219 need_gc = 0;
1220 astate_new = NULL;
1221 memset(&ti_new, 0, sizeof(ti_new));
1222restart:
1223 /* Set up swap handler */
1224 memset(&ts, 0, sizeof(ts));
1225 ts.opstate.func = restart_flush;
1226 ts.tc = tc;
1227
1228 ta = tc->ta;
1229 /* Do not flush readonly tables */
1230 if ((ta->flags & TA_FLAG_READONLY) != 0) {
1231 IPFW_UH_WUNLOCK(ch);
1232 return (EACCES);
1233 }
1234 /* Save startup algo parameters */
1235 if (ta->print_config != NULL) {
1236 ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
1237 algostate, sizeof(algostate));
1238 pstate = algostate;
1239 } else
1240 pstate = NULL;
1241 tflags = tc->tflags;
1242 tc->no.refcnt++;
1243 add_toperation_state(ch, &ts);
1244 IPFW_UH_WUNLOCK(ch);
1245
1246 /*
1247 * Stage 1.5: if this is not the first attempt, destroy previous state
1248 */
1249 if (need_gc != 0) {
1250 ta->destroy(astate_new, &ti_new);
1251 need_gc = 0;
1252 }
1253
1254 /*
1255 * Stage 2: allocate new table instance using same algo.
1256 */
1257 memset(&ti_new, 0, sizeof(struct table_info));
1258 error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
1259
1260 /*
1261 * Stage 3: swap old state pointers with newly-allocated ones.
1262 * Decrease refcount.
1263 */
1264 IPFW_UH_WLOCK(ch);
1265 tc->no.refcnt--;
1266 del_toperation_state(ch, &ts);
1267
1268 if (error != 0) {
1269 IPFW_UH_WUNLOCK(ch);
1270 return (error);
1271 }
1272
1273 /*
1274 * Restart operation if table swap has happened:
1275 * even if algo may be the same, algo init parameters
1276 * may change. Restart operation instead of doing
1277 * complex checks.
1278 */
1279 if (ts.modified != 0) {
1280 /* Delay destroying data since we're holding UH lock */
1281 need_gc = 1;
1282 goto restart;
1283 }
1284
1285 ni = CHAIN_TO_NI(ch);
1286 kidx = tc->no.kidx;
1287 tablestate = (struct table_info *)ch->tablestate;
1288
1289 IPFW_WLOCK(ch);
1290 ti_old = tablestate[kidx];
1291 tablestate[kidx] = ti_new;
1292 IPFW_WUNLOCK(ch);
1293
1294 astate_old = tc->astate;
1295 tc->astate = astate_new;
1296 tc->ti_copy = ti_new;
1297 tc->count = 0;
1298
1299 /* Notify algo on real @ti address */
1300 if (ta->change_ti != NULL)
1301 ta->change_ti(tc->astate, &tablestate[kidx]);
1302
1303 /*
1304 * Stage 4: unref values.
1305 */
1306 ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
1307 IPFW_UH_WUNLOCK(ch);
1308
1309 /*
1310 * Stage 5: perform real flush/destroy.
1311 */
1312 ta->destroy(astate_old, &ti_old);
1313
1314 return (0);
1315}
1316
1317/*
1318 * Swaps two tables.
1319 * Data layout (v0)(current):
1320 * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
1321 *
1322 * Returns 0 on success
1323 */
1324static int
1325swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1326 struct sockopt_data *sd)
1327{
1328 int error;
1329 struct _ipfw_obj_header *oh;
1330 struct tid_info ti_a, ti_b;
1331
1332 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
1333 return (EINVAL);
1334
1335 oh = (struct _ipfw_obj_header *)op3;
1336 ntlv_to_ti(&oh->ntlv, &ti_a);
1337 ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
1338
1339 error = swap_tables(ch, &ti_a, &ti_b);
1340
1341 return (error);
1342}
1343
1344/*
1345 * Swaps two tables of the same type/valtype.
1346 *
1347 * Checks if tables are compatible and limits
1348 * permits swap, than actually perform swap.
1349 *
1350 * Each table consists of 2 different parts:
1351 * config:
1352 * @tc (with name, set, kidx) and rule bindings, which is "stable".
1353 * number of items
1354 * table algo
1355 * runtime:
1356 * runtime data @ti (ch->tablestate)
1357 * runtime cache in @tc
1358 * algo-specific data (@tc->astate)
1359 *
1360 * So we switch:
1361 * all runtime data
1362 * number of items
1363 * table algo
1364 *
1365 * After that we call @ti change handler for each table.
1366 *
1367 * Note that referencing @tc won't protect tc->ta from change.
1368 * XXX: Do we need to restrict swap between locked tables?
1369 * XXX: Do we need to exchange ftype?
1370 *
1371 * Returns 0 on success.
1372 */
1373static int
1374swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
1375 struct tid_info *b)
1376{
1377 struct namedobj_instance *ni;
1378 struct table_config *tc_a, *tc_b;
1379 struct table_algo *ta;
1380 struct table_info ti, *tablestate;
1381 void *astate;
1382 uint32_t count;
1383
1384 /*
1385 * Stage 1: find both tables and ensure they are of
1386 * the same type.
1387 */
1388 IPFW_UH_WLOCK(ch);
1389 ni = CHAIN_TO_NI(ch);
1390 if ((tc_a = find_table(ni, a)) == NULL) {
1391 IPFW_UH_WUNLOCK(ch);
1392 return (ESRCH);
1393 }
1394 if ((tc_b = find_table(ni, b)) == NULL) {
1395 IPFW_UH_WUNLOCK(ch);
1396 return (ESRCH);
1397 }
1398
1399 /* It is very easy to swap between the same table */
1400 if (tc_a == tc_b) {
1401 IPFW_UH_WUNLOCK(ch);
1402 return (0);
1403 }
1404
1405 /* Check type and value are the same */
1406 if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
1407 IPFW_UH_WUNLOCK(ch);
1408 return (EINVAL);
1409 }
1410
1411 /* Check limits before swap */
1412 if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
1413 (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
1414 IPFW_UH_WUNLOCK(ch);
1415 return (EFBIG);
1416 }
1417
1418 /* Check if one of the tables is readonly */
1419 if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
1420 IPFW_UH_WUNLOCK(ch);
1421 return (EACCES);
1422 }
1423
1424 /* Notify we're going to swap */
1425 rollback_toperation_state(ch, tc_a);
1426 rollback_toperation_state(ch, tc_b);
1427
1428 /* Everything is fine, prepare to swap */
1429 tablestate = (struct table_info *)ch->tablestate;
1430 ti = tablestate[tc_a->no.kidx];
1431 ta = tc_a->ta;
1432 astate = tc_a->astate;
1433 count = tc_a->count;
1434
1435 IPFW_WLOCK(ch);
1436 /* a <- b */
1437 tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
1438 tc_a->ta = tc_b->ta;
1439 tc_a->astate = tc_b->astate;
1440 tc_a->count = tc_b->count;
1441 /* b <- a */
1442 tablestate[tc_b->no.kidx] = ti;
1443 tc_b->ta = ta;
1444 tc_b->astate = astate;
1445 tc_b->count = count;
1446 IPFW_WUNLOCK(ch);
1447
1448 /* Ensure tc.ti copies are in sync */
1449 tc_a->ti_copy = tablestate[tc_a->no.kidx];
1450 tc_b->ti_copy = tablestate[tc_b->no.kidx];
1451
1452 /* Notify both tables on @ti change */
1453 if (tc_a->ta->change_ti != NULL)
1454 tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
1455 if (tc_b->ta->change_ti != NULL)
1456 tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
1457
1458 IPFW_UH_WUNLOCK(ch);
1459
1460 return (0);
1461}
1462
1463/*
1464 * Destroys table specified by @ti.
1465 * Data layout (v0)(current):
1466 * Request: [ ip_fw3_opheader ]
1467 *
1468 * Returns 0 on success
1469 */
1470static int
1471destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
1472{
1473 struct namedobj_instance *ni;
1474 struct table_config *tc;
1475
1476 IPFW_UH_WLOCK(ch);
1477
1478 ni = CHAIN_TO_NI(ch);
1479 if ((tc = find_table(ni, ti)) == NULL) {
1480 IPFW_UH_WUNLOCK(ch);
1481 return (ESRCH);
1482 }
1483
1484 /* Do not permit destroying referenced tables */
1485 if (tc->no.refcnt > 0) {
1486 IPFW_UH_WUNLOCK(ch);
1487 return (EBUSY);
1488 }
1489
1490 IPFW_WLOCK(ch);
1491 unlink_table(ch, tc);
1492 IPFW_WUNLOCK(ch);
1493
1494 /* Free obj index */
1495 if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
1496 printf("Error unlinking kidx %d from table %s\n",
1497 tc->no.kidx, tc->tablename);
1498
1499 /* Unref values used in tables while holding UH lock */
1500 ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
1501 IPFW_UH_WUNLOCK(ch);
1502
1503 free_table_config(ni, tc);
1504
1505 return (0);
1506}
1507
1508static uint32_t
1509roundup2p(uint32_t v)
1510{
1511
1512 v--;
1513 v |= v >> 1;
1514 v |= v >> 2;
1515 v |= v >> 4;
1516 v |= v >> 8;
1517 v |= v >> 16;
1518 v++;
1519
1520 return (v);
1521}
1522
1523/*
1524 * Grow tables index.
1525 *
1526 * Returns 0 on success.
1527 */
1528int
1529ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
1530{
1531 unsigned int ntables_old, tbl;
1532 struct namedobj_instance *ni;
1533 void *new_idx, *old_tablestate, *tablestate;
1534 struct table_info *ti;
1535 struct table_config *tc;
1536 int i, new_blocks;
1537
1538 /* Check new value for validity */
1539 if (ntables == 0)
1540 return (EINVAL);
1541 if (ntables > IPFW_TABLES_MAX)
1542 ntables = IPFW_TABLES_MAX;
1543 /* Alight to nearest power of 2 */
1544 ntables = (unsigned int)roundup2p(ntables);
1545
1546 /* Allocate new pointers */
1547 tablestate = malloc(ntables * sizeof(struct table_info),
1548 M_IPFW, M_WAITOK | M_ZERO);
1549
1550 ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
1551
1552 IPFW_UH_WLOCK(ch);
1553
1554 tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
1555 ni = CHAIN_TO_NI(ch);
1556
1557 /* Temporary restrict decreasing max_tables */
1558 if (ntables < V_fw_tables_max) {
1559
1560 /*
1561 * FIXME: Check if we really can shrink
1562 */
1563 IPFW_UH_WUNLOCK(ch);
1564 return (EINVAL);
1565 }
1566
1567 /* Copy table info/indices */
1568 memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
1569 ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
1570
1571 IPFW_WLOCK(ch);
1572
1573 /* Change pointers */
1574 old_tablestate = ch->tablestate;
1575 ch->tablestate = tablestate;
1576 ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
1577
1578 ntables_old = V_fw_tables_max;
1579 V_fw_tables_max = ntables;
1580
1581 IPFW_WUNLOCK(ch);
1582
1583 /* Notify all consumers that their @ti pointer has changed */
1584 ti = (struct table_info *)ch->tablestate;
1585 for (i = 0; i < tbl; i++, ti++) {
1586 if (ti->lookup == NULL)
1587 continue;
1588 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
1589 if (tc == NULL || tc->ta->change_ti == NULL)
1590 continue;
1591
1592 tc->ta->change_ti(tc->astate, ti);
1593 }
1594
1595 IPFW_UH_WUNLOCK(ch);
1596
1597 /* Free old pointers */
1598 free(old_tablestate, M_IPFW);
1599 ipfw_objhash_bitmap_free(new_idx, new_blocks);
1600
1601 return (0);
1602}
1603
1604/*
1605 * Lookup an IP @addr in table @tbl.
1606 * Stores found value in @val.
1607 *
1608 * Returns 1 if @addr was found.
1609 */
1610int
1611ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
1612 uint32_t *val)
1613{
1614 struct table_info *ti;
1615
1616 ti = KIDX_TO_TI(ch, tbl);
1617
1618 return (ti->lookup(ti, &addr, sizeof(in_addr_t), val));
1619}
1620
1621/*
1622 * Lookup an arbtrary key @paddr of legth @plen in table @tbl.
1623 * Stores found value in @val.
1624 *
1625 * Returns 1 if key was found.
1626 */
1627int
1628ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
1629 void *paddr, uint32_t *val)
1630{
1631 struct table_info *ti;
1632
1633 ti = KIDX_TO_TI(ch, tbl);
1634
1635 return (ti->lookup(ti, paddr, plen, val));
1636}
1637
1638/*
1639 * Info/List/dump support for tables.
1640 *
1641 */
1642
1643/*
1644 * High-level 'get' cmds sysctl handlers
1645 */
1646
1647/*
1648 * Lists all tables currently available in kernel.
1649 * Data layout (v0)(current):
1650 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
1651 * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
1652 *
1653 * Returns 0 on success
1654 */
1655static int
1656list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1657 struct sockopt_data *sd)
1658{
1659 struct _ipfw_obj_lheader *olh;
1660 int error;
1661
1662 olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
1663 if (olh == NULL)
1664 return (EINVAL);
1665 if (sd->valsize < olh->size)
1666 return (EINVAL);
1667
1668 IPFW_UH_RLOCK(ch);
1669 error = export_tables(ch, olh, sd);
1670 IPFW_UH_RUNLOCK(ch);
1671
1672 return (error);
1673}
1674
1675/*
1676 * Store table info to buffer provided by @sd.
1677 * Data layout (v0)(current):
1678 * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
1679 * Reply: [ ipfw_obj_header ipfw_xtable_info ]
1680 *
1681 * Returns 0 on success.
1682 */
1683static int
1684describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1685 struct sockopt_data *sd)
1686{
1687 struct _ipfw_obj_header *oh;
1688 struct table_config *tc;
1689 struct tid_info ti;
1690 size_t sz;
1691
1692 sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
1693 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1694 if (oh == NULL)
1695 return (EINVAL);
1696
1697 objheader_to_ti(oh, &ti);
1698
1699 IPFW_UH_RLOCK(ch);
1700 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
1701 IPFW_UH_RUNLOCK(ch);
1702 return (ESRCH);
1703 }
1704
1705 export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
1706 IPFW_UH_RUNLOCK(ch);
1707
1708 return (0);
1709}
1710
1711/*
1712 * Modifies existing table.
1713 * Data layout (v0)(current):
1714 * Request: [ ipfw_obj_header ipfw_xtable_info ]
1715 *
1716 * Returns 0 on success
1717 */
1718static int
1719modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1720 struct sockopt_data *sd)
1721{
1722 struct _ipfw_obj_header *oh;
1723 ipfw_xtable_info *i;
1724 char *tname;
1725 struct tid_info ti;
1726 struct namedobj_instance *ni;
1727 struct table_config *tc;
1728
1729 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1730 return (EINVAL);
1731
1732 oh = (struct _ipfw_obj_header *)sd->kbuf;
1733 i = (ipfw_xtable_info *)(oh + 1);
1734
1735 /*
1736 * Verify user-supplied strings.
1737 * Check for null-terminated/zero-length strings/
1738 */
1739 tname = oh->ntlv.name;
1740 if (check_table_name(tname) != 0)
1741 return (EINVAL);
1742
1743 objheader_to_ti(oh, &ti);
1744 ti.type = i->type;
1745
1746 IPFW_UH_WLOCK(ch);
1747 ni = CHAIN_TO_NI(ch);
1748 if ((tc = find_table(ni, &ti)) == NULL) {
1749 IPFW_UH_WUNLOCK(ch);
1750 return (ESRCH);
1751 }
1752
1753 /* Do not support any modifications for readonly tables */
1754 if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
1755 IPFW_UH_WUNLOCK(ch);
1756 return (EACCES);
1757 }
1758
1759 if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
1760 tc->limit = i->limit;
1761 if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
1762 tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
1763 IPFW_UH_WUNLOCK(ch);
1764
1765 return (0);
1766}
1767
1768/*
1769 * Creates new table.
1770 * Data layout (v0)(current):
1771 * Request: [ ipfw_obj_header ipfw_xtable_info ]
1772 *
1773 * Returns 0 on success
1774 */
1775static int
1776create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1777 struct sockopt_data *sd)
1778{
1779 struct _ipfw_obj_header *oh;
1780 ipfw_xtable_info *i;
1781 char *tname, *aname;
1782 struct tid_info ti;
1783 struct namedobj_instance *ni;
1784
1785 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1786 return (EINVAL);
1787
1788 oh = (struct _ipfw_obj_header *)sd->kbuf;
1789 i = (ipfw_xtable_info *)(oh + 1);
1790
1791 /*
1792 * Verify user-supplied strings.
1793 * Check for null-terminated/zero-length strings/
1794 */
1795 tname = oh->ntlv.name;
1796 aname = i->algoname;
1797 if (check_table_name(tname) != 0 ||
1798 strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
1799 return (EINVAL);
1800
1801 if (aname[0] == '\0') {
1802 /* Use default algorithm */
1803 aname = NULL;
1804 }
1805
1806 objheader_to_ti(oh, &ti);
1807 ti.type = i->type;
1808
1809 ni = CHAIN_TO_NI(ch);
1810
1811 IPFW_UH_RLOCK(ch);
1812 if (find_table(ni, &ti) != NULL) {
1813 IPFW_UH_RUNLOCK(ch);
1814 return (EEXIST);
1815 }
1816 IPFW_UH_RUNLOCK(ch);
1817
1818 return (create_table_internal(ch, &ti, aname, i, NULL, 0));
1819}
1820
1821/*
1822 * Creates new table based on @ti and @aname.
1823 *
1824 * Assume @aname to be checked and valid.
1825 * Stores allocated table kidx inside @pkidx (if non-NULL).
1826 * Reference created table if @compat is non-zero.
1827 *
1828 * Returns 0 on success.
1829 */
1830static int
1831create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
1832 char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat)
1833{
1834 struct namedobj_instance *ni;
1835 struct table_config *tc, *tc_new, *tmp;
1836 struct table_algo *ta;
1837 uint16_t kidx;
1838
1839 ni = CHAIN_TO_NI(ch);
1840
1841 ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
1842 if (ta == NULL)
1843 return (ENOTSUP);
1844
1845 tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
1846 if (tc == NULL)
1847 return (ENOMEM);
1848
1849 tc->vmask = i->vmask;
1850 tc->limit = i->limit;
1851 if (ta->flags & TA_FLAG_READONLY)
1852 tc->locked = 1;
1853 else
1854 tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
1855
1856 IPFW_UH_WLOCK(ch);
1857
1858 /* Check if table has been already created */
1859 tc_new = find_table(ni, ti);
1860 if (tc_new != NULL) {
1861
1862 /*
1863 * Compat: do not fail if we're
1864 * requesting to create existing table
1865 * which has the same type
1866 */
1867 if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
1868 IPFW_UH_WUNLOCK(ch);
1869 free_table_config(ni, tc);
1870 return (EEXIST);
1871 }
1872
1873 /* Exchange tc and tc_new for proper refcounting & freeing */
1874 tmp = tc;
1875 tc = tc_new;
1876 tc_new = tmp;
1877 } else {
1878 /* New table */
1879 if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
1880 IPFW_UH_WUNLOCK(ch);
1881 printf("Unable to allocate table index."
1882 " Consider increasing net.inet.ip.fw.tables_max");
1883 free_table_config(ni, tc);
1884 return (EBUSY);
1885 }
1886 tc->no.kidx = kidx;
1887 tc->no.etlv = IPFW_TLV_TBL_NAME;
1888
1889 IPFW_WLOCK(ch);
1890 link_table(ch, tc);
1891 IPFW_WUNLOCK(ch);
1892 }
1893
1894 if (compat != 0)
1895 tc->no.refcnt++;
1896 if (pkidx != NULL)
1897 *pkidx = tc->no.kidx;
1898
1899 IPFW_UH_WUNLOCK(ch);
1900
1901 if (tc_new != NULL)
1902 free_table_config(ni, tc_new);
1903
1904 return (0);
1905}
1906
1907static void
1908ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
1909{
1910
1911 memset(ti, 0, sizeof(struct tid_info));
1912 ti->set = ntlv->set;
1913 ti->uidx = ntlv->idx;
1914 ti->tlvs = ntlv;
1915 ti->tlen = ntlv->head.length;
1916}
1917
1918static void
1919objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
1920{
1921
1922 ntlv_to_ti(&oh->ntlv, ti);
1923}
1924
1925struct namedobj_instance *
1926ipfw_get_table_objhash(struct ip_fw_chain *ch)
1927{
1928
1929 return (CHAIN_TO_NI(ch));
1930}
1931
1932/*
1933 * Exports basic table info as name TLV.
1934 * Used inside dump_static_rules() to provide info
1935 * about all tables referenced by current ruleset.
1936 *
1937 * Returns 0 on success.
1938 */
1939int
1940ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
1941 struct sockopt_data *sd)
1942{
1943 struct namedobj_instance *ni;
1944 struct named_object *no;
1945 ipfw_obj_ntlv *ntlv;
1946
1947 ni = CHAIN_TO_NI(ch);
1948
1949 no = ipfw_objhash_lookup_kidx(ni, kidx);
1950 KASSERT(no != NULL, ("invalid table kidx passed"));
1951
1952 ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
1953 if (ntlv == NULL)
1954 return (ENOMEM);
1955
1956 ntlv->head.type = IPFW_TLV_TBL_NAME;
1957 ntlv->head.length = sizeof(*ntlv);
1958 ntlv->idx = no->kidx;
1959 strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
1960
1961 return (0);
1962}
1963
1964struct dump_args {
1965 struct ip_fw_chain *ch;
1966 struct table_info *ti;
1967 struct table_config *tc;
1968 struct sockopt_data *sd;
1969 uint32_t cnt;
1970 uint16_t uidx;
1971 int error;
1972 uint32_t size;
1973 ipfw_table_entry *ent;
1974 ta_foreach_f *f;
1975 void *farg;
1976 ipfw_obj_tentry tent;
1977};
1978
1979static int
1980count_ext_entries(void *e, void *arg)
1981{
1982 struct dump_args *da;
1983
1984 da = (struct dump_args *)arg;
1985 da->cnt++;
1986
1987 return (0);
1988}
1989
1990/*
1991 * Gets number of items from table either using
1992 * internal counter or calling algo callback for
1993 * externally-managed tables.
1994 *
1995 * Returns number of records.
1996 */
1997static uint32_t
1998table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
1999{
2000 struct table_info *ti;
2001 struct table_algo *ta;
2002 struct dump_args da;
2003
2004 ti = KIDX_TO_TI(ch, tc->no.kidx);
2005 ta = tc->ta;
2006
2007 /* Use internal counter for self-managed tables */
2008 if ((ta->flags & TA_FLAG_READONLY) == 0)
2009 return (tc->count);
2010
2011 /* Use callback to quickly get number of items */
2012 if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
2013 return (ta->get_count(tc->astate, ti));
2014
2015 /* Count number of iterms ourselves */
2016 memset(&da, 0, sizeof(da));
2017 ta->foreach(tc->astate, ti, count_ext_entries, &da);
2018
2019 return (da.cnt);
2020}
2021
2022/*
2023 * Exports table @tc info into standard ipfw_xtable_info format.
2024 */
2025static void
2026export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
2027 ipfw_xtable_info *i)
2028{
2029 struct table_info *ti;
2030 struct table_algo *ta;
2031
2032 i->type = tc->no.subtype;
2033 i->tflags = tc->tflags;
2034 i->vmask = tc->vmask;
2035 i->set = tc->no.set;
2036 i->kidx = tc->no.kidx;
2037 i->refcnt = tc->no.refcnt;
2038 i->count = table_get_count(ch, tc);
2039 i->limit = tc->limit;
2040 i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
2041 i->size = i->count * sizeof(ipfw_obj_tentry);
2042 i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2043 strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
2044 ti = KIDX_TO_TI(ch, tc->no.kidx);
2045 ta = tc->ta;
2046 if (ta->print_config != NULL) {
2047 /* Use algo function to print table config to string */
2048 ta->print_config(tc->astate, ti, i->algoname,
2049 sizeof(i->algoname));
2050 } else
2051 strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2052 /* Dump algo-specific data, if possible */
2053 if (ta->dump_tinfo != NULL) {
2054 ta->dump_tinfo(tc->astate, ti, &i->ta_info);
2055 i->ta_info.flags |= IPFW_TATFLAGS_DATA;
2056 }
2057}
2058
2059struct dump_table_args {
2060 struct ip_fw_chain *ch;
2061 struct sockopt_data *sd;
2062};
2063
2064static int
2065export_table_internal(struct namedobj_instance *ni, struct named_object *no,
2066 void *arg)
2067{
2068 ipfw_xtable_info *i;
2069 struct dump_table_args *dta;
2070
2071 dta = (struct dump_table_args *)arg;
2072
2073 i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
2074 KASSERT(i != NULL, ("previously checked buffer is not enough"));
2075
2076 export_table_info(dta->ch, (struct table_config *)no, i);
2077 return (0);
2078}
2079
2080/*
2081 * Export all tables as ipfw_xtable_info structures to
2082 * storage provided by @sd.
2083 *
2084 * If supplied buffer is too small, fills in required size
2085 * and returns ENOMEM.
2086 * Returns 0 on success.
2087 */
2088static int
2089export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
2090 struct sockopt_data *sd)
2091{
2092 uint32_t size;
2093 uint32_t count;
2094 struct dump_table_args dta;
2095
2096 count = ipfw_objhash_count(CHAIN_TO_NI(ch));
2097 size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
2098
2099 /* Fill in header regadless of buffer size */
2100 olh->count = count;
2101 olh->objsize = sizeof(ipfw_xtable_info);
2102
2103 if (size > olh->size) {
2104 olh->size = size;
2105 return (ENOMEM);
2106 }
2107
2108 olh->size = size;
2109
2110 dta.ch = ch;
2111 dta.sd = sd;
2112
2113 ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
2114
2115 return (0);
2116}
2117
2118/*
2119 * Dumps all table data
2120 * Data layout (v1)(current):
2121 * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
2122 * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
2123 *
2124 * Returns 0 on success
2125 */
2126static int
2127dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2128 struct sockopt_data *sd)
2129{
2130 struct _ipfw_obj_header *oh;
2131 ipfw_xtable_info *i;
2132 struct tid_info ti;
2133 struct table_config *tc;
2134 struct table_algo *ta;
2135 struct dump_args da;
2136 uint32_t sz;
2137
2138 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2139 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
2140 if (oh == NULL)
2141 return (EINVAL);
2142
2143 i = (ipfw_xtable_info *)(oh + 1);
2144 objheader_to_ti(oh, &ti);
2145
2146 IPFW_UH_RLOCK(ch);
2147 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
2148 IPFW_UH_RUNLOCK(ch);
2149 return (ESRCH);
2150 }
2151 export_table_info(ch, tc, i);
2152
2153 if (sd->valsize < i->size) {
2154
2155 /*
2156 * Submitted buffer size is not enough.
2157 * WE've already filled in @i structure with
2158 * relevant table info including size, so we
2159 * can return. Buffer will be flushed automatically.
2160 */
2161 IPFW_UH_RUNLOCK(ch);
2162 return (ENOMEM);
2163 }
2164
2165 /*
2166 * Do the actual dump in eXtended format
2167 */
2168 memset(&da, 0, sizeof(da));
2169 da.ch = ch;
2170 da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2171 da.tc = tc;
2172 da.sd = sd;
2173
2174 ta = tc->ta;
2175
2176 ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
2177 IPFW_UH_RUNLOCK(ch);
2178
2179 return (da.error);
2180}
2181
2182/*
2183 * Dumps all table data
2184 * Data layout (version 0)(legacy):
2185 * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE()
2186 * Reply: [ ipfw_xtable ipfw_table_xentry x N ]
2187 *
2188 * Returns 0 on success
2189 */
2190static int
2191dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2192 struct sockopt_data *sd)
2193{
2194 ipfw_xtable *xtbl;
2195 struct tid_info ti;
2196 struct table_config *tc;
2197 struct table_algo *ta;
2198 struct dump_args da;
2199 size_t sz, count;
2200
2201 xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable));
2202 if (xtbl == NULL)
2203 return (EINVAL);
2204
2205 memset(&ti, 0, sizeof(ti));
2206 ti.uidx = xtbl->tbl;
2207
2208 IPFW_UH_RLOCK(ch);
2209 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
2210 IPFW_UH_RUNLOCK(ch);
2211 return (0);
2212 }
2213 count = table_get_count(ch, tc);
2214 sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable);
2215
2216 xtbl->cnt = count;
2217 xtbl->size = sz;
2218 xtbl->type = tc->no.subtype;
2219 xtbl->tbl = ti.uidx;
2220
2221 if (sd->valsize < sz) {
2222
2223 /*
2224 * Submitted buffer size is not enough.
2225 * WE've already filled in @i structure with
2226 * relevant table info including size, so we
2227 * can return. Buffer will be flushed automatically.
2228 */
2229 IPFW_UH_RUNLOCK(ch);
2230 return (ENOMEM);
2231 }
2232
2233 /* Do the actual dump in eXtended format */
2234 memset(&da, 0, sizeof(da));
2235 da.ch = ch;
2236 da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2237 da.tc = tc;
2238 da.sd = sd;
2239
2240 ta = tc->ta;
2241
2242 ta->foreach(tc->astate, da.ti, dump_table_xentry, &da);
2243 IPFW_UH_RUNLOCK(ch);
2244
2245 return (0);
2246}
2247
2248/*
2249 * Legacy function to retrieve number of items in table.
2250 */
2251static int
2252get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2253 struct sockopt_data *sd)
2254{
2255 uint32_t *tbl;
2256 struct tid_info ti;
2257 size_t sz;
2258 int error;
2259
2260 sz = sizeof(*op3) + sizeof(uint32_t);
2261 op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz);
2262 if (op3 == NULL)
2263 return (EINVAL);
2264
2265 tbl = (uint32_t *)(op3 + 1);
2266 memset(&ti, 0, sizeof(ti));
2267 ti.uidx = *tbl;
2268 IPFW_UH_RLOCK(ch);
2269 error = ipfw_count_xtable(ch, &ti, tbl);
2270 IPFW_UH_RUNLOCK(ch);
2271 return (error);
2272}
2273
2274/*
2275 * Legacy IP_FW_TABLE_GETSIZE handler
2276 */
2277int
2278ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
2279{
2280 struct table_config *tc;
2281
2282 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
2283 return (ESRCH);
2284 *cnt = table_get_count(ch, tc);
2285 return (0);
2286}
2287
2288/*
2289 * Legacy IP_FW_TABLE_XGETSIZE handler
2290 */
2291int
2292ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
2293{
2294 struct table_config *tc;
2295 uint32_t count;
2296
2297 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) {
2298 *cnt = 0;
2299 return (0); /* 'table all list' requires success */
2300 }
2301
2302 count = table_get_count(ch, tc);
2303 *cnt = count * sizeof(ipfw_table_xentry);
2304 if (count > 0)
2305 *cnt += sizeof(ipfw_xtable);
2306 return (0);
2307}
2308
2309static int
2310dump_table_entry(void *e, void *arg)
2311{
2312 struct dump_args *da;
2313 struct table_config *tc;
2314 struct table_algo *ta;
2315 ipfw_table_entry *ent;
2316 struct table_value *pval;
2317 int error;
2318
2319 da = (struct dump_args *)arg;
2320
2321 tc = da->tc;
2322 ta = tc->ta;
2323
2324 /* Out of memory, returning */
2325 if (da->cnt == da->size)
2326 return (1);
2327 ent = da->ent++;
2328 ent->tbl = da->uidx;
2329 da->cnt++;
2330
2331 error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
2332 if (error != 0)
2333 return (error);
2334
2335 ent->addr = da->tent.k.addr.s_addr;
2336 ent->masklen = da->tent.masklen;
2337 pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
2338 ent->value = ipfw_export_table_value_legacy(pval);
2339
2340 return (0);
2341}
2342
2343/*
2344 * Dumps table in pre-8.1 legacy format.
2345 */
2346int
2347ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
2348 ipfw_table *tbl)
2349{
2350 struct table_config *tc;
2351 struct table_algo *ta;
2352 struct dump_args da;
2353
2354 tbl->cnt = 0;
2355
2356 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
2357 return (0); /* XXX: We should return ESRCH */
2358
2359 ta = tc->ta;
2360
2361 /* This dump format supports IPv4 only */
2362 if (tc->no.subtype != IPFW_TABLE_ADDR)
2363 return (0);
2364
2365 memset(&da, 0, sizeof(da));
2366 da.ch = ch;
2367 da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2368 da.tc = tc;
2369 da.ent = &tbl->ent[0];
2370 da.size = tbl->size;
2371
2372 tbl->cnt = 0;
2373 ta->foreach(tc->astate, da.ti, dump_table_entry, &da);
2374 tbl->cnt = da.cnt;
2375
2376 return (0);
2377}
2378
2379/*
2380 * Dumps table entry in eXtended format (v1)(current).
2381 */
2382static int
2383dump_table_tentry(void *e, void *arg)
2384{
2385 struct dump_args *da;
2386 struct table_config *tc;
2387 struct table_algo *ta;
2388 struct table_value *pval;
2389 ipfw_obj_tentry *tent;
2390 int error;
2391
2392 da = (struct dump_args *)arg;
2393
2394 tc = da->tc;
2395 ta = tc->ta;
2396
2397 tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
2398 /* Out of memory, returning */
2399 if (tent == NULL) {
2400 da->error = ENOMEM;
2401 return (1);
2402 }
2403 tent->head.length = sizeof(ipfw_obj_tentry);
2404 tent->idx = da->uidx;
2405
2406 error = ta->dump_tentry(tc->astate, da->ti, e, tent);
2407 if (error != 0)
2408 return (error);
2409
2410 pval = get_table_value(da->ch, da->tc, tent->v.kidx);
2411 ipfw_export_table_value_v1(pval, &tent->v.value);
2412
2413 return (0);
2414}
2415
2416/*
2417 * Dumps table entry in eXtended format (v0).
2418 */
2419static int
2420dump_table_xentry(void *e, void *arg)
2421{
2422 struct dump_args *da;
2423 struct table_config *tc;
2424 struct table_algo *ta;
2425 ipfw_table_xentry *xent;
2426 ipfw_obj_tentry *tent;
2427 struct table_value *pval;
2428 int error;
2429
2430 da = (struct dump_args *)arg;
2431
2432 tc = da->tc;
2433 ta = tc->ta;
2434
2435 xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent));
2436 /* Out of memory, returning */
2437 if (xent == NULL)
2438 return (1);
2439 xent->len = sizeof(ipfw_table_xentry);
2440 xent->tbl = da->uidx;
2441
2442 memset(&da->tent, 0, sizeof(da->tent));
2443 tent = &da->tent;
2444 error = ta->dump_tentry(tc->astate, da->ti, e, tent);
2445 if (error != 0)
2446 return (error);
2447
2448 /* Convert current format to previous one */
2449 xent->masklen = tent->masklen;
2450 pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
2451 xent->value = ipfw_export_table_value_legacy(pval);
2452 /* Apply some hacks */
2453 if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
2454 xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
2455 xent->flags = IPFW_TCF_INET;
2456 } else
2457 memcpy(&xent->k, &tent->k, sizeof(xent->k));
2458
2459 return (0);
2460}
2461
2462/*
2463 * Helper function to export table algo data
2464 * to tentry format before calling user function.
2465 *
2466 * Returns 0 on success.
2467 */
2468static int
2469prepare_table_tentry(void *e, void *arg)
2470{
2471 struct dump_args *da;
2472 struct table_config *tc;
2473 struct table_algo *ta;
2474 int error;
2475
2476 da = (struct dump_args *)arg;
2477
2478 tc = da->tc;
2479 ta = tc->ta;
2480
2481 error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
2482 if (error != 0)
2483 return (error);
2484
2485 da->f(&da->tent, da->farg);
2486
2487 return (0);
2488}
2489
2490/*
2491 * Allow external consumers to read table entries in standard format.
2492 */
2493int
2494ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
2495 ta_foreach_f *f, void *arg)
2496{
2497 struct namedobj_instance *ni;
2498 struct table_config *tc;
2499 struct table_algo *ta;
2500 struct dump_args da;
2501
2502 ni = CHAIN_TO_NI(ch);
2503
2504 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
2505 if (tc == NULL)
2506 return (ESRCH);
2507
2508 ta = tc->ta;
2509
2510 memset(&da, 0, sizeof(da));
2511 da.ch = ch;
2512 da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2513 da.tc = tc;
2514 da.f = f;
2515 da.farg = arg;
2516
2517 ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
2518
2519 return (0);
2520}
2521
2522/*
2523 * Table algorithms
2524 */
2525
2526/*
2527 * Finds algorithm by index, table type or supplied name.
2528 *
2529 * Returns pointer to algo or NULL.
2530 */
2531static struct table_algo *
2532find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
2533{
2534 int i, l;
2535 struct table_algo *ta;
2536
2537 if (ti->type > IPFW_TABLE_MAXTYPE)
2538 return (NULL);
2539
2540 /* Search by index */
2541 if (ti->atype != 0) {
2542 if (ti->atype > tcfg->algo_count)
2543 return (NULL);
2544 return (tcfg->algo[ti->atype]);
2545 }
2546
2547 if (name == NULL) {
2548 /* Return default algorithm for given type if set */
2549 return (tcfg->def_algo[ti->type]);
2550 }
2551
2552 /* Search by name */
2553 /* TODO: better search */
2554 for (i = 1; i <= tcfg->algo_count; i++) {
2555 ta = tcfg->algo[i];
2556
2557 /*
2558 * One can supply additional algorithm
2559 * parameters so we compare only the first word
2560 * of supplied name:
2561 * 'addr:chash hsize=32'
2562 * '^^^^^^^^^'
2563 *
2564 */
2565 l = strlen(ta->name);
2566 if (strncmp(name, ta->name, l) != 0)
2567 continue;
2568 if (name[l] != '\0' && name[l] != ' ')
2569 continue;
2570 /* Check if we're requesting proper table type */
2571 if (ti->type != 0 && ti->type != ta->type)
2572 return (NULL);
2573 return (ta);
2574 }
2575
2576 return (NULL);
2577}
2578
2579/*
2580 * Register new table algo @ta.
2581 * Stores algo id inside @idx.
2582 *
2583 * Returns 0 on success.
2584 */
2585int
2586ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
2587 int *idx)
2588{
2589 struct tables_config *tcfg;
2590 struct table_algo *ta_new;
2591 size_t sz;
2592
2593 if (size > sizeof(struct table_algo))
2594 return (EINVAL);
2595
2596 /* Check for the required on-stack size for add/del */
2597 sz = roundup2(ta->ta_buf_size, sizeof(void *));
2598 if (sz > TA_BUF_SZ)
2599 return (EINVAL);
2600
2601 KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
2602
2603 /* Copy algorithm data to stable storage. */
2604 ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
2605 memcpy(ta_new, ta, size);
2606
2607 tcfg = CHAIN_TO_TCFG(ch);
2608
2609 KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
2610
2611 tcfg->algo[++tcfg->algo_count] = ta_new;
2612 ta_new->idx = tcfg->algo_count;
2613
2614 /* Set algorithm as default one for given type */
2615 if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
2616 tcfg->def_algo[ta_new->type] == NULL)
2617 tcfg->def_algo[ta_new->type] = ta_new;
2618
2619 *idx = ta_new->idx;
2620
2621 return (0);
2622}
2623
2624/*
2625 * Unregisters table algo using @idx as id.
2626 * XXX: It is NOT safe to call this function in any place
2627 * other than ipfw instance destroy handler.
2628 */
2629void
2630ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
2631{
2632 struct tables_config *tcfg;
2633 struct table_algo *ta;
2634
2635 tcfg = CHAIN_TO_TCFG(ch);
2636
2637 KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
2638 idx, tcfg->algo_count));
2639
2640 ta = tcfg->algo[idx];
2641 KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
2642
2643 if (tcfg->def_algo[ta->type] == ta)
2644 tcfg->def_algo[ta->type] = NULL;
2645
2646 free(ta, M_IPFW);
2647}
2648
2649/*
2650 * Lists all table algorithms currently available.
2651 * Data layout (v0)(current):
2652 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
2653 * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
2654 *
2655 * Returns 0 on success
2656 */
2657static int
2658list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2659 struct sockopt_data *sd)
2660{
2661 struct _ipfw_obj_lheader *olh;
2662 struct tables_config *tcfg;
2663 ipfw_ta_info *i;
2664 struct table_algo *ta;
2665 uint32_t count, n, size;
2666
2667 olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
2668 if (olh == NULL)
2669 return (EINVAL);
2670 if (sd->valsize < olh->size)
2671 return (EINVAL);
2672
2673 IPFW_UH_RLOCK(ch);
2674 tcfg = CHAIN_TO_TCFG(ch);
2675 count = tcfg->algo_count;
2676 size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
2677
2678 /* Fill in header regadless of buffer size */
2679 olh->count = count;
2680 olh->objsize = sizeof(ipfw_ta_info);
2681
2682 if (size > olh->size) {
2683 olh->size = size;
2684 IPFW_UH_RUNLOCK(ch);
2685 return (ENOMEM);
2686 }
2687 olh->size = size;
2688
2689 for (n = 1; n <= count; n++) {
2690 i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
2691 KASSERT(i != NULL, ("previously checked buffer is not enough"));
2692 ta = tcfg->algo[n];
2693 strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2694 i->type = ta->type;
2695 i->refcnt = ta->refcnt;
2696 }
2697
2698 IPFW_UH_RUNLOCK(ch);
2699
2700 return (0);
2701}
2702
2703static int
2704classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
2705{
2706 /* Basic IPv4/IPv6 or u32 lookups */
2707 *puidx = cmd->arg1;
2708 /* Assume ADDR by default */
2709 *ptype = IPFW_TABLE_ADDR;
2710 int v;
2711
2712 if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
2713 /*
2714 * generic lookup. The key must be
2715 * in 32bit big-endian format.
2716 */
2717 v = ((ipfw_insn_u32 *)cmd)->d[1];
2718 switch (v) {
2719 case 0:
2720 case 1:
2721 /* IPv4 src/dst */
2722 break;
2723 case 2:
2724 case 3:
2725 /* src/dst port */
2726 *ptype = IPFW_TABLE_NUMBER;
2727 break;
2728 case 4:
2729 /* uid/gid */
2730 *ptype = IPFW_TABLE_NUMBER;
2731 break;
2732 case 5:
2733 /* jid */
2734 *ptype = IPFW_TABLE_NUMBER;
2735 break;
2736 case 6:
2737 /* dscp */
2738 *ptype = IPFW_TABLE_NUMBER;
2739 break;
2740 }
2741 }
2742
2743 return (0);
2744}
2745
2746static int
2747classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
2748{
2749 ipfw_insn_if *cmdif;
2750
2751 /* Interface table, possibly */
2752 cmdif = (ipfw_insn_if *)cmd;
2753 if (cmdif->name[0] != '\1')
2754 return (1);
2755
2756 *ptype = IPFW_TABLE_INTERFACE;
2757 *puidx = cmdif->p.kidx;
2758
2759 return (0);
2760}
2761
2762static int
2763classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
2764{
2765
2766 *puidx = cmd->arg1;
2767 *ptype = IPFW_TABLE_FLOW;
2768
2769 return (0);
2770}
2771
2772static void
2773update_arg1(ipfw_insn *cmd, uint16_t idx)
2774{
2775
2776 cmd->arg1 = idx;
2777}
2778
2779static void
2780update_via(ipfw_insn *cmd, uint16_t idx)
2781{
2782 ipfw_insn_if *cmdif;
2783
2784 cmdif = (ipfw_insn_if *)cmd;
2785 cmdif->p.kidx = idx;
2786}
2787
2788static int
2789table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
2790 struct named_object **pno)
2791{
2792 struct table_config *tc;
2793 int error;
2794
2795 IPFW_UH_WLOCK_ASSERT(ch);
2796
2797 error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
2798 if (error != 0)
2799 return (error);
2800
2801 *pno = &tc->no;
2802 return (0);
2803}
2804
2805/* XXX: sets-sets! */
2806static struct named_object *
2807table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
2808{
2809 struct namedobj_instance *ni;
2810 struct table_config *tc;
2811
2812 IPFW_UH_WLOCK_ASSERT(ch);
2813 ni = CHAIN_TO_NI(ch);
2814 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
2815 KASSERT(tc != NULL, ("Table with index %d not found", idx));
2816
2817 return (&tc->no);
2818}
2819
2820static int
2821table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
2822 enum ipfw_sets_cmd cmd)
2823{
2824
2825 switch (cmd) {
2826 case SWAP_ALL:
2827 case TEST_ALL:
2828 case MOVE_ALL:
2829 /*
2830 * Always return success, the real action and decision
2831 * should make table_manage_sets_all().
2832 */
2833 return (0);
2834 case TEST_ONE:
2835 case MOVE_ONE:
2836 /*
2837 * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add
2838 * if set number will be used in hash function. Currently
2839 * we can just use generic handler that replaces set value.
2840 */
2841 if (V_fw_tables_sets == 0)
2842 return (0);
2843 break;
2844 case COUNT_ONE:
2845 /*
2846 * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is
2847 * disabled. This allow skip table's opcodes from additional
2848 * checks when specific rules moved to another set.
2849 */
2850 if (V_fw_tables_sets == 0)
2851 return (EOPNOTSUPP);
2852 }
2853 /* Use generic sets handler when per-set sysctl is enabled. */
2854 return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2855 set, new_set, cmd));
2856}
2857
2858/*
2859 * We register several opcode rewriters for lookup tables.
2860 * All tables opcodes have the same ETLV type, but different subtype.
2861 * To avoid invoking sets handler several times for XXX_ALL commands,
2862 * we use separate manage_sets handler. O_RECV has the lowest value,
2863 * so it should be called first.
2864 */
2865static int
2866table_manage_sets_all(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
2867 enum ipfw_sets_cmd cmd)
2868{
2869
2870 switch (cmd) {
2871 case SWAP_ALL:
2872 case TEST_ALL:
2873 /*
2874 * Return success for TEST_ALL, since nothing prevents
2875 * move rules from one set to another. All tables are
2876 * accessible from all sets when per-set tables sysctl
2877 * is disabled.
2878 */
2879 case MOVE_ALL:
2880 if (V_fw_tables_sets == 0)
2881 return (0);
2882 break;
2883 default:
2884 return (table_manage_sets(ch, set, new_set, cmd));
2885 }
2886 /* Use generic sets handler when per-set sysctl is enabled. */
2887 return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2888 set, new_set, cmd));
2889}
2890
2891static struct opcode_obj_rewrite opcodes[] = {
2892 {
2893 .opcode = O_IP_SRC_LOOKUP,
2894 .etlv = IPFW_TLV_TBL_NAME,
2895 .classifier = classify_srcdst,
2896 .update = update_arg1,
2897 .find_byname = table_findbyname,
2898 .find_bykidx = table_findbykidx,
2899 .create_object = create_table_compat,
2900 .manage_sets = table_manage_sets,
2901 },
2902 {
2903 .opcode = O_IP_DST_LOOKUP,
2904 .etlv = IPFW_TLV_TBL_NAME,
2905 .classifier = classify_srcdst,
2906 .update = update_arg1,
2907 .find_byname = table_findbyname,
2908 .find_bykidx = table_findbykidx,
2909 .create_object = create_table_compat,
2910 .manage_sets = table_manage_sets,
2911 },
2912 {
2913 .opcode = O_IP_FLOW_LOOKUP,
2914 .etlv = IPFW_TLV_TBL_NAME,
2915 .classifier = classify_flow,
2916 .update = update_arg1,
2917 .find_byname = table_findbyname,
2918 .find_bykidx = table_findbykidx,
2919 .create_object = create_table_compat,
2920 .manage_sets = table_manage_sets,
2921 },
2922 {
2923 .opcode = O_XMIT,
2924 .etlv = IPFW_TLV_TBL_NAME,
2925 .classifier = classify_via,
2926 .update = update_via,
2927 .find_byname = table_findbyname,
2928 .find_bykidx = table_findbykidx,
2929 .create_object = create_table_compat,
2930 .manage_sets = table_manage_sets,
2931 },
2932 {
2933 .opcode = O_RECV,
2934 .etlv = IPFW_TLV_TBL_NAME,
2935 .classifier = classify_via,
2936 .update = update_via,
2937 .find_byname = table_findbyname,
2938 .find_bykidx = table_findbykidx,
2939 .create_object = create_table_compat,
2940 .manage_sets = table_manage_sets_all,
2941 },
2942 {
2943 .opcode = O_VIA,
2944 .etlv = IPFW_TLV_TBL_NAME,
2945 .classifier = classify_via,
2946 .update = update_via,
2947 .find_byname = table_findbyname,
2948 .find_bykidx = table_findbykidx,
2949 .create_object = create_table_compat,
2950 .manage_sets = table_manage_sets,
2951 },
2952};
2953
2954static int
2955test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no,
2956 void *arg __unused)
2957{
2958
2959 /* Check that there aren't any tables in not default set */
2960 if (no->set != 0)
2961 return (EBUSY);
2962 return (0);
2963}
2964
2965/*
2966 * Switch between "set 0" and "rule's set" table binding,
2967 * Check all ruleset bindings and permits changing
2968 * IFF each binding has both rule AND table in default set (set 0).
2969 *
2970 * Returns 0 on success.
2971 */
2972int
2973ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
2974{
2975 struct opcode_obj_rewrite *rw;
2976 struct namedobj_instance *ni;
2977 struct named_object *no;
2978 struct ip_fw *rule;
2979 ipfw_insn *cmd;
2980 int cmdlen, i, l;
2981 uint16_t kidx;
2982 uint8_t subtype;
2983
2984 IPFW_UH_WLOCK(ch);
2985
2986 if (V_fw_tables_sets == sets) {
2987 IPFW_UH_WUNLOCK(ch);
2988 return (0);
2989 }
2990 ni = CHAIN_TO_NI(ch);
2991 if (sets == 0) {
2992 /*
2993 * Prevent disabling sets support if we have some tables
2994 * in not default sets.
2995 */
2996 if (ipfw_objhash_foreach_type(ni, test_sets_cb,
2997 NULL, IPFW_TLV_TBL_NAME) != 0) {
2998 IPFW_UH_WUNLOCK(ch);
2999 return (EBUSY);
3000 }
3001 }
3002 /*
3003 * Scan all rules and examine tables opcodes.
3004 */
3005 for (i = 0; i < ch->n_rules; i++) {
3006 rule = ch->map[i];
3007
3008 l = rule->cmd_len;
3009 cmd = rule->cmd;
3010 cmdlen = 0;
3011 for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
3012 cmdlen = F_LEN(cmd);
3013 /* Check only tables opcodes */
3014 for (kidx = 0, rw = opcodes;
3015 rw < opcodes + nitems(opcodes); rw++) {
3016 if (rw->opcode != cmd->opcode)
3017 continue;
3018 if (rw->classifier(cmd, &kidx, &subtype) == 0)
3019 break;
3020 }
3021 if (kidx == 0)
3022 continue;
3023 no = ipfw_objhash_lookup_kidx(ni, kidx);
3024 /* Check if both table object and rule has the set 0 */
3025 if (no->set != 0 || rule->set != 0) {
3026 IPFW_UH_WUNLOCK(ch);
3027 return (EBUSY);
3028 }
3029
3030 }
3031 }
3032 V_fw_tables_sets = sets;
3033 IPFW_UH_WUNLOCK(ch);
3034 return (0);
3035}
3036
3037/*
3038 * Checks table name for validity.
3039 * Enforce basic length checks, the rest
3040 * should be done in userland.
3041 *
3042 * Returns 0 if name is considered valid.
3043 */
3044static int
3045check_table_name(const char *name)
3046{
3047
3048 /*
3049 * TODO: do some more complicated checks
3050 */
3051 return (ipfw_check_object_name_generic(name));
3052}
3053
3054/*
3055 * Finds table config based on either legacy index
3056 * or name in ntlv.
3057 * Note @ti structure contains unchecked data from userland.
3058 *
3059 * Returns 0 in success and fills in @tc with found config
3060 */
3061static int
3062find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
3063 struct table_config **tc)
3064{
3065 char *name, bname[16];
3066 struct named_object *no;
3067 ipfw_obj_ntlv *ntlv;
3068 uint32_t set;
3069
3070 if (ti->tlvs != NULL) {
3071 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
3072 IPFW_TLV_TBL_NAME);
3073 if (ntlv == NULL)
3074 return (EINVAL);
3075 name = ntlv->name;
3076
3077 /*
3078 * Use set provided by @ti instead of @ntlv one.
3079 * This is needed due to different sets behavior
3080 * controlled by V_fw_tables_sets.
3081 */
3082 set = (V_fw_tables_sets != 0) ? ti->set : 0;
3083 } else {
3084 snprintf(bname, sizeof(bname), "%d", ti->uidx);
3085 name = bname;
3086 set = 0;
3087 }
3088
3089 no = ipfw_objhash_lookup_name(ni, set, name);
3090 *tc = (struct table_config *)no;
3091
3092 return (0);
3093}
3094
3095/*
3096 * Finds table config based on either legacy index
3097 * or name in ntlv.
3098 * Note @ti structure contains unchecked data from userland.
3099 *
3100 * Returns pointer to table_config or NULL.
3101 */
3102static struct table_config *
3103find_table(struct namedobj_instance *ni, struct tid_info *ti)
3104{
3105 struct table_config *tc;
3106
3107 if (find_table_err(ni, ti, &tc) != 0)
3108 return (NULL);
3109
3110 return (tc);
3111}
3112
3113/*
3114 * Allocate new table config structure using
3115 * specified @algo and @aname.
3116 *
3117 * Returns pointer to config or NULL.
3118 */
3119static struct table_config *
3120alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
3121 struct table_algo *ta, char *aname, uint8_t tflags)
3122{
3123 char *name, bname[16];
3124 struct table_config *tc;
3125 int error;
3126 ipfw_obj_ntlv *ntlv;
3127 uint32_t set;
3128
3129 if (ti->tlvs != NULL) {
3130 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
3131 IPFW_TLV_TBL_NAME);
3132 if (ntlv == NULL)
3133 return (NULL);
3134 name = ntlv->name;
3135 set = ntlv->set;
3136 } else {
3137 /* Compat part: convert number to string representation */
3138 snprintf(bname, sizeof(bname), "%d", ti->uidx);
3139 name = bname;
3140 set = 0;
3141 }
3142
3143 tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
3144 tc->no.name = tc->tablename;
3145 tc->no.subtype = ta->type;
3146 tc->no.set = set;
3147 tc->tflags = tflags;
3148 tc->ta = ta;
3149 strlcpy(tc->tablename, name, sizeof(tc->tablename));
3150 /* Set "shared" value type by default */
3151 tc->vshared = 1;
3152
3153 /* Preallocate data structures for new tables */
3154 error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
3155 if (error != 0) {
3156 free(tc, M_IPFW);
3157 return (NULL);
3158 }
3159
3160 return (tc);
3161}
3162
3163/*
3164 * Destroys table state and config.
3165 */
3166static void
3167free_table_config(struct namedobj_instance *ni, struct table_config *tc)
3168{
3169
3170 KASSERT(tc->linked == 0, ("free() on linked config"));
3171 /* UH lock MUST NOT be held */
3172
3173 /*
3174 * We're using ta without any locking/referencing.
3175 * TODO: fix this if we're going to use unloadable algos.
3176 */
3177 tc->ta->destroy(tc->astate, &tc->ti_copy);
3178 free(tc, M_IPFW);
3179}
3180
3181/*
3182 * Links @tc to @chain table named instance.
3183 * Sets appropriate type/states in @chain table info.
3184 */
3185static void
3186link_table(struct ip_fw_chain *ch, struct table_config *tc)
3187{
3188 struct namedobj_instance *ni;
3189 struct table_info *ti;
3190 uint16_t kidx;
3191
3192 IPFW_UH_WLOCK_ASSERT(ch);
3193 IPFW_WLOCK_ASSERT(ch);
3194
3195 ni = CHAIN_TO_NI(ch);
3196 kidx = tc->no.kidx;
3197
3198 ipfw_objhash_add(ni, &tc->no);
3199
3200 ti = KIDX_TO_TI(ch, kidx);
3201 *ti = tc->ti_copy;
3202
3203 /* Notify algo on real @ti address */
3204 if (tc->ta->change_ti != NULL)
3205 tc->ta->change_ti(tc->astate, ti);
3206
3207 tc->linked = 1;
3208 tc->ta->refcnt++;
3209}
3210
3211/*
3212 * Unlinks @tc from @chain table named instance.
3213 * Zeroes states in @chain and stores them in @tc.
3214 */
3215static void
3216unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
3217{
3218 struct namedobj_instance *ni;
3219 struct table_info *ti;
3220 uint16_t kidx;
3221
3222 IPFW_UH_WLOCK_ASSERT(ch);
3223 IPFW_WLOCK_ASSERT(ch);
3224
3225 ni = CHAIN_TO_NI(ch);
3226 kidx = tc->no.kidx;
3227
3228 /* Clear state. @ti copy is already saved inside @tc */
3229 ipfw_objhash_del(ni, &tc->no);
3230 ti = KIDX_TO_TI(ch, kidx);
3231 memset(ti, 0, sizeof(struct table_info));
3232 tc->linked = 0;
3233 tc->ta->refcnt--;
3234
3235 /* Notify algo on real @ti address */
3236 if (tc->ta->change_ti != NULL)
3237 tc->ta->change_ti(tc->astate, NULL);
3238}
3239
3240static struct ipfw_sopt_handler scodes[] = {
3241 { IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table },
3242 { IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 },
3243 { IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 },
3244 { IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table },
3245 { IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table },
3246 { IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables },
3247 { IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 },
3248 { IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 },
3249 { IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 },
3250 { IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 },
3251 { IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 },
3252 { IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 },
3253 { IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry },
3254 { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table },
3255 { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo },
3256 { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size },
3257};
3258
3259static int
3260destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
3261 void *arg)
3262{
3263
3264 unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
3265 if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
3266 printf("Error unlinking kidx %d from table %s\n",
3267 no->kidx, no->name);
3268 free_table_config(ni, (struct table_config *)no);
3269 return (0);
3270}
3271
3272/*
3273 * Shuts tables module down.
3274 */
3275void
3276ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
3277{
3278
3279 IPFW_DEL_SOPT_HANDLER(last, scodes);
3280 IPFW_DEL_OBJ_REWRITER(last, opcodes);
3281
3282 /* Remove all tables from working set */
3283 IPFW_UH_WLOCK(ch);
3284 IPFW_WLOCK(ch);
3285 ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
3286 IPFW_WUNLOCK(ch);
3287 IPFW_UH_WUNLOCK(ch);
3288
3289 /* Free pointers itself */
3290 free(ch->tablestate, M_IPFW);
3291
3292 ipfw_table_value_destroy(ch, last);
3293 ipfw_table_algo_destroy(ch);
3294
3295 ipfw_objhash_destroy(CHAIN_TO_NI(ch));
3296 free(CHAIN_TO_TCFG(ch), M_IPFW);
3297}
3298
3299/*
3300 * Starts tables module.
3301 */
3302int
3303ipfw_init_tables(struct ip_fw_chain *ch, int first)
3304{
3305 struct tables_config *tcfg;
3306
3307 /* Allocate pointers */
3308 ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
3309 M_IPFW, M_WAITOK | M_ZERO);
3310
3311 tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
3312 tcfg->namehash = ipfw_objhash_create(V_fw_tables_max);
3313 ch->tblcfg = tcfg;
3314
3315 ipfw_table_value_init(ch, first);
3316 ipfw_table_algo_init(ch);
3317
3318 IPFW_ADD_OBJ_REWRITER(first, opcodes);
3319 IPFW_ADD_SOPT_HANDLER(first, scodes);
3320 return (0);
3321}
3322
3323
3324
1140 IPFW_UH_RUNLOCK(ch);
1141
1142 return (error);
1143}
1144
1145/*
1146 * Flushes all entries or destroys given table.
1147 * Data layout (v0)(current):
1148 * Request: [ ipfw_obj_header ]
1149 *
1150 * Returns 0 on success
1151 */
1152static int
1153flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1154 struct sockopt_data *sd)
1155{
1156 int error;
1157 struct _ipfw_obj_header *oh;
1158 struct tid_info ti;
1159
1160 if (sd->valsize != sizeof(*oh))
1161 return (EINVAL);
1162
1163 oh = (struct _ipfw_obj_header *)op3;
1164 objheader_to_ti(oh, &ti);
1165
1166 if (op3->opcode == IP_FW_TABLE_XDESTROY)
1167 error = destroy_table(ch, &ti);
1168 else if (op3->opcode == IP_FW_TABLE_XFLUSH)
1169 error = flush_table(ch, &ti);
1170 else
1171 return (ENOTSUP);
1172
1173 return (error);
1174}
1175
1176static void
1177restart_flush(void *object, struct op_state *_state)
1178{
1179 struct tableop_state *ts;
1180
1181 ts = (struct tableop_state *)_state;
1182
1183 if (ts->tc != object)
1184 return;
1185
1186 /* Indicate we've called */
1187 ts->modified = 1;
1188}
1189
1190/*
1191 * Flushes given table.
1192 *
1193 * Function create new table instance with the same
1194 * parameters, swaps it with old one and
1195 * flushes state without holding runtime WLOCK.
1196 *
1197 * Returns 0 on success.
1198 */
1199int
1200flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
1201{
1202 struct namedobj_instance *ni;
1203 struct table_config *tc;
1204 struct table_algo *ta;
1205 struct table_info ti_old, ti_new, *tablestate;
1206 void *astate_old, *astate_new;
1207 char algostate[64], *pstate;
1208 struct tableop_state ts;
1209 int error, need_gc;
1210 uint16_t kidx;
1211 uint8_t tflags;
1212
1213 /*
1214 * Stage 1: save table algorithm.
1215 * Reference found table to ensure it won't disappear.
1216 */
1217 IPFW_UH_WLOCK(ch);
1218 ni = CHAIN_TO_NI(ch);
1219 if ((tc = find_table(ni, ti)) == NULL) {
1220 IPFW_UH_WUNLOCK(ch);
1221 return (ESRCH);
1222 }
1223 need_gc = 0;
1224 astate_new = NULL;
1225 memset(&ti_new, 0, sizeof(ti_new));
1226restart:
1227 /* Set up swap handler */
1228 memset(&ts, 0, sizeof(ts));
1229 ts.opstate.func = restart_flush;
1230 ts.tc = tc;
1231
1232 ta = tc->ta;
1233 /* Do not flush readonly tables */
1234 if ((ta->flags & TA_FLAG_READONLY) != 0) {
1235 IPFW_UH_WUNLOCK(ch);
1236 return (EACCES);
1237 }
1238 /* Save startup algo parameters */
1239 if (ta->print_config != NULL) {
1240 ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
1241 algostate, sizeof(algostate));
1242 pstate = algostate;
1243 } else
1244 pstate = NULL;
1245 tflags = tc->tflags;
1246 tc->no.refcnt++;
1247 add_toperation_state(ch, &ts);
1248 IPFW_UH_WUNLOCK(ch);
1249
1250 /*
1251 * Stage 1.5: if this is not the first attempt, destroy previous state
1252 */
1253 if (need_gc != 0) {
1254 ta->destroy(astate_new, &ti_new);
1255 need_gc = 0;
1256 }
1257
1258 /*
1259 * Stage 2: allocate new table instance using same algo.
1260 */
1261 memset(&ti_new, 0, sizeof(struct table_info));
1262 error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
1263
1264 /*
1265 * Stage 3: swap old state pointers with newly-allocated ones.
1266 * Decrease refcount.
1267 */
1268 IPFW_UH_WLOCK(ch);
1269 tc->no.refcnt--;
1270 del_toperation_state(ch, &ts);
1271
1272 if (error != 0) {
1273 IPFW_UH_WUNLOCK(ch);
1274 return (error);
1275 }
1276
1277 /*
1278 * Restart operation if table swap has happened:
1279 * even if algo may be the same, algo init parameters
1280 * may change. Restart operation instead of doing
1281 * complex checks.
1282 */
1283 if (ts.modified != 0) {
1284 /* Delay destroying data since we're holding UH lock */
1285 need_gc = 1;
1286 goto restart;
1287 }
1288
1289 ni = CHAIN_TO_NI(ch);
1290 kidx = tc->no.kidx;
1291 tablestate = (struct table_info *)ch->tablestate;
1292
1293 IPFW_WLOCK(ch);
1294 ti_old = tablestate[kidx];
1295 tablestate[kidx] = ti_new;
1296 IPFW_WUNLOCK(ch);
1297
1298 astate_old = tc->astate;
1299 tc->astate = astate_new;
1300 tc->ti_copy = ti_new;
1301 tc->count = 0;
1302
1303 /* Notify algo on real @ti address */
1304 if (ta->change_ti != NULL)
1305 ta->change_ti(tc->astate, &tablestate[kidx]);
1306
1307 /*
1308 * Stage 4: unref values.
1309 */
1310 ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
1311 IPFW_UH_WUNLOCK(ch);
1312
1313 /*
1314 * Stage 5: perform real flush/destroy.
1315 */
1316 ta->destroy(astate_old, &ti_old);
1317
1318 return (0);
1319}
1320
1321/*
1322 * Swaps two tables.
1323 * Data layout (v0)(current):
1324 * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
1325 *
1326 * Returns 0 on success
1327 */
1328static int
1329swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1330 struct sockopt_data *sd)
1331{
1332 int error;
1333 struct _ipfw_obj_header *oh;
1334 struct tid_info ti_a, ti_b;
1335
1336 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
1337 return (EINVAL);
1338
1339 oh = (struct _ipfw_obj_header *)op3;
1340 ntlv_to_ti(&oh->ntlv, &ti_a);
1341 ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
1342
1343 error = swap_tables(ch, &ti_a, &ti_b);
1344
1345 return (error);
1346}
1347
1348/*
1349 * Swaps two tables of the same type/valtype.
1350 *
1351 * Checks if tables are compatible and limits
1352 * permits swap, than actually perform swap.
1353 *
1354 * Each table consists of 2 different parts:
1355 * config:
1356 * @tc (with name, set, kidx) and rule bindings, which is "stable".
1357 * number of items
1358 * table algo
1359 * runtime:
1360 * runtime data @ti (ch->tablestate)
1361 * runtime cache in @tc
1362 * algo-specific data (@tc->astate)
1363 *
1364 * So we switch:
1365 * all runtime data
1366 * number of items
1367 * table algo
1368 *
1369 * After that we call @ti change handler for each table.
1370 *
1371 * Note that referencing @tc won't protect tc->ta from change.
1372 * XXX: Do we need to restrict swap between locked tables?
1373 * XXX: Do we need to exchange ftype?
1374 *
1375 * Returns 0 on success.
1376 */
1377static int
1378swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
1379 struct tid_info *b)
1380{
1381 struct namedobj_instance *ni;
1382 struct table_config *tc_a, *tc_b;
1383 struct table_algo *ta;
1384 struct table_info ti, *tablestate;
1385 void *astate;
1386 uint32_t count;
1387
1388 /*
1389 * Stage 1: find both tables and ensure they are of
1390 * the same type.
1391 */
1392 IPFW_UH_WLOCK(ch);
1393 ni = CHAIN_TO_NI(ch);
1394 if ((tc_a = find_table(ni, a)) == NULL) {
1395 IPFW_UH_WUNLOCK(ch);
1396 return (ESRCH);
1397 }
1398 if ((tc_b = find_table(ni, b)) == NULL) {
1399 IPFW_UH_WUNLOCK(ch);
1400 return (ESRCH);
1401 }
1402
1403 /* It is very easy to swap between the same table */
1404 if (tc_a == tc_b) {
1405 IPFW_UH_WUNLOCK(ch);
1406 return (0);
1407 }
1408
1409 /* Check type and value are the same */
1410 if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
1411 IPFW_UH_WUNLOCK(ch);
1412 return (EINVAL);
1413 }
1414
1415 /* Check limits before swap */
1416 if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
1417 (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
1418 IPFW_UH_WUNLOCK(ch);
1419 return (EFBIG);
1420 }
1421
1422 /* Check if one of the tables is readonly */
1423 if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
1424 IPFW_UH_WUNLOCK(ch);
1425 return (EACCES);
1426 }
1427
1428 /* Notify we're going to swap */
1429 rollback_toperation_state(ch, tc_a);
1430 rollback_toperation_state(ch, tc_b);
1431
1432 /* Everything is fine, prepare to swap */
1433 tablestate = (struct table_info *)ch->tablestate;
1434 ti = tablestate[tc_a->no.kidx];
1435 ta = tc_a->ta;
1436 astate = tc_a->astate;
1437 count = tc_a->count;
1438
1439 IPFW_WLOCK(ch);
1440 /* a <- b */
1441 tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
1442 tc_a->ta = tc_b->ta;
1443 tc_a->astate = tc_b->astate;
1444 tc_a->count = tc_b->count;
1445 /* b <- a */
1446 tablestate[tc_b->no.kidx] = ti;
1447 tc_b->ta = ta;
1448 tc_b->astate = astate;
1449 tc_b->count = count;
1450 IPFW_WUNLOCK(ch);
1451
1452 /* Ensure tc.ti copies are in sync */
1453 tc_a->ti_copy = tablestate[tc_a->no.kidx];
1454 tc_b->ti_copy = tablestate[tc_b->no.kidx];
1455
1456 /* Notify both tables on @ti change */
1457 if (tc_a->ta->change_ti != NULL)
1458 tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
1459 if (tc_b->ta->change_ti != NULL)
1460 tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
1461
1462 IPFW_UH_WUNLOCK(ch);
1463
1464 return (0);
1465}
1466
1467/*
1468 * Destroys table specified by @ti.
1469 * Data layout (v0)(current):
1470 * Request: [ ip_fw3_opheader ]
1471 *
1472 * Returns 0 on success
1473 */
1474static int
1475destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
1476{
1477 struct namedobj_instance *ni;
1478 struct table_config *tc;
1479
1480 IPFW_UH_WLOCK(ch);
1481
1482 ni = CHAIN_TO_NI(ch);
1483 if ((tc = find_table(ni, ti)) == NULL) {
1484 IPFW_UH_WUNLOCK(ch);
1485 return (ESRCH);
1486 }
1487
1488 /* Do not permit destroying referenced tables */
1489 if (tc->no.refcnt > 0) {
1490 IPFW_UH_WUNLOCK(ch);
1491 return (EBUSY);
1492 }
1493
1494 IPFW_WLOCK(ch);
1495 unlink_table(ch, tc);
1496 IPFW_WUNLOCK(ch);
1497
1498 /* Free obj index */
1499 if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
1500 printf("Error unlinking kidx %d from table %s\n",
1501 tc->no.kidx, tc->tablename);
1502
1503 /* Unref values used in tables while holding UH lock */
1504 ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
1505 IPFW_UH_WUNLOCK(ch);
1506
1507 free_table_config(ni, tc);
1508
1509 return (0);
1510}
1511
1512static uint32_t
1513roundup2p(uint32_t v)
1514{
1515
1516 v--;
1517 v |= v >> 1;
1518 v |= v >> 2;
1519 v |= v >> 4;
1520 v |= v >> 8;
1521 v |= v >> 16;
1522 v++;
1523
1524 return (v);
1525}
1526
1527/*
1528 * Grow tables index.
1529 *
1530 * Returns 0 on success.
1531 */
1532int
1533ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
1534{
1535 unsigned int ntables_old, tbl;
1536 struct namedobj_instance *ni;
1537 void *new_idx, *old_tablestate, *tablestate;
1538 struct table_info *ti;
1539 struct table_config *tc;
1540 int i, new_blocks;
1541
1542 /* Check new value for validity */
1543 if (ntables == 0)
1544 return (EINVAL);
1545 if (ntables > IPFW_TABLES_MAX)
1546 ntables = IPFW_TABLES_MAX;
1547 /* Alight to nearest power of 2 */
1548 ntables = (unsigned int)roundup2p(ntables);
1549
1550 /* Allocate new pointers */
1551 tablestate = malloc(ntables * sizeof(struct table_info),
1552 M_IPFW, M_WAITOK | M_ZERO);
1553
1554 ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
1555
1556 IPFW_UH_WLOCK(ch);
1557
1558 tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
1559 ni = CHAIN_TO_NI(ch);
1560
1561 /* Temporary restrict decreasing max_tables */
1562 if (ntables < V_fw_tables_max) {
1563
1564 /*
1565 * FIXME: Check if we really can shrink
1566 */
1567 IPFW_UH_WUNLOCK(ch);
1568 return (EINVAL);
1569 }
1570
1571 /* Copy table info/indices */
1572 memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
1573 ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
1574
1575 IPFW_WLOCK(ch);
1576
1577 /* Change pointers */
1578 old_tablestate = ch->tablestate;
1579 ch->tablestate = tablestate;
1580 ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
1581
1582 ntables_old = V_fw_tables_max;
1583 V_fw_tables_max = ntables;
1584
1585 IPFW_WUNLOCK(ch);
1586
1587 /* Notify all consumers that their @ti pointer has changed */
1588 ti = (struct table_info *)ch->tablestate;
1589 for (i = 0; i < tbl; i++, ti++) {
1590 if (ti->lookup == NULL)
1591 continue;
1592 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
1593 if (tc == NULL || tc->ta->change_ti == NULL)
1594 continue;
1595
1596 tc->ta->change_ti(tc->astate, ti);
1597 }
1598
1599 IPFW_UH_WUNLOCK(ch);
1600
1601 /* Free old pointers */
1602 free(old_tablestate, M_IPFW);
1603 ipfw_objhash_bitmap_free(new_idx, new_blocks);
1604
1605 return (0);
1606}
1607
1608/*
1609 * Lookup an IP @addr in table @tbl.
1610 * Stores found value in @val.
1611 *
1612 * Returns 1 if @addr was found.
1613 */
1614int
1615ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
1616 uint32_t *val)
1617{
1618 struct table_info *ti;
1619
1620 ti = KIDX_TO_TI(ch, tbl);
1621
1622 return (ti->lookup(ti, &addr, sizeof(in_addr_t), val));
1623}
1624
1625/*
1626 * Lookup an arbtrary key @paddr of legth @plen in table @tbl.
1627 * Stores found value in @val.
1628 *
1629 * Returns 1 if key was found.
1630 */
1631int
1632ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
1633 void *paddr, uint32_t *val)
1634{
1635 struct table_info *ti;
1636
1637 ti = KIDX_TO_TI(ch, tbl);
1638
1639 return (ti->lookup(ti, paddr, plen, val));
1640}
1641
1642/*
1643 * Info/List/dump support for tables.
1644 *
1645 */
1646
1647/*
1648 * High-level 'get' cmds sysctl handlers
1649 */
1650
1651/*
1652 * Lists all tables currently available in kernel.
1653 * Data layout (v0)(current):
1654 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
1655 * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
1656 *
1657 * Returns 0 on success
1658 */
1659static int
1660list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1661 struct sockopt_data *sd)
1662{
1663 struct _ipfw_obj_lheader *olh;
1664 int error;
1665
1666 olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
1667 if (olh == NULL)
1668 return (EINVAL);
1669 if (sd->valsize < olh->size)
1670 return (EINVAL);
1671
1672 IPFW_UH_RLOCK(ch);
1673 error = export_tables(ch, olh, sd);
1674 IPFW_UH_RUNLOCK(ch);
1675
1676 return (error);
1677}
1678
1679/*
1680 * Store table info to buffer provided by @sd.
1681 * Data layout (v0)(current):
1682 * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
1683 * Reply: [ ipfw_obj_header ipfw_xtable_info ]
1684 *
1685 * Returns 0 on success.
1686 */
1687static int
1688describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1689 struct sockopt_data *sd)
1690{
1691 struct _ipfw_obj_header *oh;
1692 struct table_config *tc;
1693 struct tid_info ti;
1694 size_t sz;
1695
1696 sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
1697 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1698 if (oh == NULL)
1699 return (EINVAL);
1700
1701 objheader_to_ti(oh, &ti);
1702
1703 IPFW_UH_RLOCK(ch);
1704 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
1705 IPFW_UH_RUNLOCK(ch);
1706 return (ESRCH);
1707 }
1708
1709 export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
1710 IPFW_UH_RUNLOCK(ch);
1711
1712 return (0);
1713}
1714
1715/*
1716 * Modifies existing table.
1717 * Data layout (v0)(current):
1718 * Request: [ ipfw_obj_header ipfw_xtable_info ]
1719 *
1720 * Returns 0 on success
1721 */
1722static int
1723modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1724 struct sockopt_data *sd)
1725{
1726 struct _ipfw_obj_header *oh;
1727 ipfw_xtable_info *i;
1728 char *tname;
1729 struct tid_info ti;
1730 struct namedobj_instance *ni;
1731 struct table_config *tc;
1732
1733 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1734 return (EINVAL);
1735
1736 oh = (struct _ipfw_obj_header *)sd->kbuf;
1737 i = (ipfw_xtable_info *)(oh + 1);
1738
1739 /*
1740 * Verify user-supplied strings.
1741 * Check for null-terminated/zero-length strings/
1742 */
1743 tname = oh->ntlv.name;
1744 if (check_table_name(tname) != 0)
1745 return (EINVAL);
1746
1747 objheader_to_ti(oh, &ti);
1748 ti.type = i->type;
1749
1750 IPFW_UH_WLOCK(ch);
1751 ni = CHAIN_TO_NI(ch);
1752 if ((tc = find_table(ni, &ti)) == NULL) {
1753 IPFW_UH_WUNLOCK(ch);
1754 return (ESRCH);
1755 }
1756
1757 /* Do not support any modifications for readonly tables */
1758 if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
1759 IPFW_UH_WUNLOCK(ch);
1760 return (EACCES);
1761 }
1762
1763 if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
1764 tc->limit = i->limit;
1765 if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
1766 tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
1767 IPFW_UH_WUNLOCK(ch);
1768
1769 return (0);
1770}
1771
1772/*
1773 * Creates new table.
1774 * Data layout (v0)(current):
1775 * Request: [ ipfw_obj_header ipfw_xtable_info ]
1776 *
1777 * Returns 0 on success
1778 */
1779static int
1780create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1781 struct sockopt_data *sd)
1782{
1783 struct _ipfw_obj_header *oh;
1784 ipfw_xtable_info *i;
1785 char *tname, *aname;
1786 struct tid_info ti;
1787 struct namedobj_instance *ni;
1788
1789 if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1790 return (EINVAL);
1791
1792 oh = (struct _ipfw_obj_header *)sd->kbuf;
1793 i = (ipfw_xtable_info *)(oh + 1);
1794
1795 /*
1796 * Verify user-supplied strings.
1797 * Check for null-terminated/zero-length strings/
1798 */
1799 tname = oh->ntlv.name;
1800 aname = i->algoname;
1801 if (check_table_name(tname) != 0 ||
1802 strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
1803 return (EINVAL);
1804
1805 if (aname[0] == '\0') {
1806 /* Use default algorithm */
1807 aname = NULL;
1808 }
1809
1810 objheader_to_ti(oh, &ti);
1811 ti.type = i->type;
1812
1813 ni = CHAIN_TO_NI(ch);
1814
1815 IPFW_UH_RLOCK(ch);
1816 if (find_table(ni, &ti) != NULL) {
1817 IPFW_UH_RUNLOCK(ch);
1818 return (EEXIST);
1819 }
1820 IPFW_UH_RUNLOCK(ch);
1821
1822 return (create_table_internal(ch, &ti, aname, i, NULL, 0));
1823}
1824
1825/*
1826 * Creates new table based on @ti and @aname.
1827 *
1828 * Assume @aname to be checked and valid.
1829 * Stores allocated table kidx inside @pkidx (if non-NULL).
1830 * Reference created table if @compat is non-zero.
1831 *
1832 * Returns 0 on success.
1833 */
1834static int
1835create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
1836 char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat)
1837{
1838 struct namedobj_instance *ni;
1839 struct table_config *tc, *tc_new, *tmp;
1840 struct table_algo *ta;
1841 uint16_t kidx;
1842
1843 ni = CHAIN_TO_NI(ch);
1844
1845 ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
1846 if (ta == NULL)
1847 return (ENOTSUP);
1848
1849 tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
1850 if (tc == NULL)
1851 return (ENOMEM);
1852
1853 tc->vmask = i->vmask;
1854 tc->limit = i->limit;
1855 if (ta->flags & TA_FLAG_READONLY)
1856 tc->locked = 1;
1857 else
1858 tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
1859
1860 IPFW_UH_WLOCK(ch);
1861
1862 /* Check if table has been already created */
1863 tc_new = find_table(ni, ti);
1864 if (tc_new != NULL) {
1865
1866 /*
1867 * Compat: do not fail if we're
1868 * requesting to create existing table
1869 * which has the same type
1870 */
1871 if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
1872 IPFW_UH_WUNLOCK(ch);
1873 free_table_config(ni, tc);
1874 return (EEXIST);
1875 }
1876
1877 /* Exchange tc and tc_new for proper refcounting & freeing */
1878 tmp = tc;
1879 tc = tc_new;
1880 tc_new = tmp;
1881 } else {
1882 /* New table */
1883 if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
1884 IPFW_UH_WUNLOCK(ch);
1885 printf("Unable to allocate table index."
1886 " Consider increasing net.inet.ip.fw.tables_max");
1887 free_table_config(ni, tc);
1888 return (EBUSY);
1889 }
1890 tc->no.kidx = kidx;
1891 tc->no.etlv = IPFW_TLV_TBL_NAME;
1892
1893 IPFW_WLOCK(ch);
1894 link_table(ch, tc);
1895 IPFW_WUNLOCK(ch);
1896 }
1897
1898 if (compat != 0)
1899 tc->no.refcnt++;
1900 if (pkidx != NULL)
1901 *pkidx = tc->no.kidx;
1902
1903 IPFW_UH_WUNLOCK(ch);
1904
1905 if (tc_new != NULL)
1906 free_table_config(ni, tc_new);
1907
1908 return (0);
1909}
1910
1911static void
1912ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
1913{
1914
1915 memset(ti, 0, sizeof(struct tid_info));
1916 ti->set = ntlv->set;
1917 ti->uidx = ntlv->idx;
1918 ti->tlvs = ntlv;
1919 ti->tlen = ntlv->head.length;
1920}
1921
1922static void
1923objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
1924{
1925
1926 ntlv_to_ti(&oh->ntlv, ti);
1927}
1928
1929struct namedobj_instance *
1930ipfw_get_table_objhash(struct ip_fw_chain *ch)
1931{
1932
1933 return (CHAIN_TO_NI(ch));
1934}
1935
1936/*
1937 * Exports basic table info as name TLV.
1938 * Used inside dump_static_rules() to provide info
1939 * about all tables referenced by current ruleset.
1940 *
1941 * Returns 0 on success.
1942 */
1943int
1944ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
1945 struct sockopt_data *sd)
1946{
1947 struct namedobj_instance *ni;
1948 struct named_object *no;
1949 ipfw_obj_ntlv *ntlv;
1950
1951 ni = CHAIN_TO_NI(ch);
1952
1953 no = ipfw_objhash_lookup_kidx(ni, kidx);
1954 KASSERT(no != NULL, ("invalid table kidx passed"));
1955
1956 ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
1957 if (ntlv == NULL)
1958 return (ENOMEM);
1959
1960 ntlv->head.type = IPFW_TLV_TBL_NAME;
1961 ntlv->head.length = sizeof(*ntlv);
1962 ntlv->idx = no->kidx;
1963 strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
1964
1965 return (0);
1966}
1967
1968struct dump_args {
1969 struct ip_fw_chain *ch;
1970 struct table_info *ti;
1971 struct table_config *tc;
1972 struct sockopt_data *sd;
1973 uint32_t cnt;
1974 uint16_t uidx;
1975 int error;
1976 uint32_t size;
1977 ipfw_table_entry *ent;
1978 ta_foreach_f *f;
1979 void *farg;
1980 ipfw_obj_tentry tent;
1981};
1982
1983static int
1984count_ext_entries(void *e, void *arg)
1985{
1986 struct dump_args *da;
1987
1988 da = (struct dump_args *)arg;
1989 da->cnt++;
1990
1991 return (0);
1992}
1993
1994/*
1995 * Gets number of items from table either using
1996 * internal counter or calling algo callback for
1997 * externally-managed tables.
1998 *
1999 * Returns number of records.
2000 */
2001static uint32_t
2002table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
2003{
2004 struct table_info *ti;
2005 struct table_algo *ta;
2006 struct dump_args da;
2007
2008 ti = KIDX_TO_TI(ch, tc->no.kidx);
2009 ta = tc->ta;
2010
2011 /* Use internal counter for self-managed tables */
2012 if ((ta->flags & TA_FLAG_READONLY) == 0)
2013 return (tc->count);
2014
2015 /* Use callback to quickly get number of items */
2016 if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
2017 return (ta->get_count(tc->astate, ti));
2018
2019 /* Count number of iterms ourselves */
2020 memset(&da, 0, sizeof(da));
2021 ta->foreach(tc->astate, ti, count_ext_entries, &da);
2022
2023 return (da.cnt);
2024}
2025
2026/*
2027 * Exports table @tc info into standard ipfw_xtable_info format.
2028 */
2029static void
2030export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
2031 ipfw_xtable_info *i)
2032{
2033 struct table_info *ti;
2034 struct table_algo *ta;
2035
2036 i->type = tc->no.subtype;
2037 i->tflags = tc->tflags;
2038 i->vmask = tc->vmask;
2039 i->set = tc->no.set;
2040 i->kidx = tc->no.kidx;
2041 i->refcnt = tc->no.refcnt;
2042 i->count = table_get_count(ch, tc);
2043 i->limit = tc->limit;
2044 i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
2045 i->size = i->count * sizeof(ipfw_obj_tentry);
2046 i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2047 strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
2048 ti = KIDX_TO_TI(ch, tc->no.kidx);
2049 ta = tc->ta;
2050 if (ta->print_config != NULL) {
2051 /* Use algo function to print table config to string */
2052 ta->print_config(tc->astate, ti, i->algoname,
2053 sizeof(i->algoname));
2054 } else
2055 strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2056 /* Dump algo-specific data, if possible */
2057 if (ta->dump_tinfo != NULL) {
2058 ta->dump_tinfo(tc->astate, ti, &i->ta_info);
2059 i->ta_info.flags |= IPFW_TATFLAGS_DATA;
2060 }
2061}
2062
2063struct dump_table_args {
2064 struct ip_fw_chain *ch;
2065 struct sockopt_data *sd;
2066};
2067
2068static int
2069export_table_internal(struct namedobj_instance *ni, struct named_object *no,
2070 void *arg)
2071{
2072 ipfw_xtable_info *i;
2073 struct dump_table_args *dta;
2074
2075 dta = (struct dump_table_args *)arg;
2076
2077 i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
2078 KASSERT(i != NULL, ("previously checked buffer is not enough"));
2079
2080 export_table_info(dta->ch, (struct table_config *)no, i);
2081 return (0);
2082}
2083
2084/*
2085 * Export all tables as ipfw_xtable_info structures to
2086 * storage provided by @sd.
2087 *
2088 * If supplied buffer is too small, fills in required size
2089 * and returns ENOMEM.
2090 * Returns 0 on success.
2091 */
2092static int
2093export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
2094 struct sockopt_data *sd)
2095{
2096 uint32_t size;
2097 uint32_t count;
2098 struct dump_table_args dta;
2099
2100 count = ipfw_objhash_count(CHAIN_TO_NI(ch));
2101 size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
2102
2103 /* Fill in header regadless of buffer size */
2104 olh->count = count;
2105 olh->objsize = sizeof(ipfw_xtable_info);
2106
2107 if (size > olh->size) {
2108 olh->size = size;
2109 return (ENOMEM);
2110 }
2111
2112 olh->size = size;
2113
2114 dta.ch = ch;
2115 dta.sd = sd;
2116
2117 ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
2118
2119 return (0);
2120}
2121
2122/*
2123 * Dumps all table data
2124 * Data layout (v1)(current):
2125 * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
2126 * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
2127 *
2128 * Returns 0 on success
2129 */
2130static int
2131dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2132 struct sockopt_data *sd)
2133{
2134 struct _ipfw_obj_header *oh;
2135 ipfw_xtable_info *i;
2136 struct tid_info ti;
2137 struct table_config *tc;
2138 struct table_algo *ta;
2139 struct dump_args da;
2140 uint32_t sz;
2141
2142 sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2143 oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
2144 if (oh == NULL)
2145 return (EINVAL);
2146
2147 i = (ipfw_xtable_info *)(oh + 1);
2148 objheader_to_ti(oh, &ti);
2149
2150 IPFW_UH_RLOCK(ch);
2151 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
2152 IPFW_UH_RUNLOCK(ch);
2153 return (ESRCH);
2154 }
2155 export_table_info(ch, tc, i);
2156
2157 if (sd->valsize < i->size) {
2158
2159 /*
2160 * Submitted buffer size is not enough.
2161 * WE've already filled in @i structure with
2162 * relevant table info including size, so we
2163 * can return. Buffer will be flushed automatically.
2164 */
2165 IPFW_UH_RUNLOCK(ch);
2166 return (ENOMEM);
2167 }
2168
2169 /*
2170 * Do the actual dump in eXtended format
2171 */
2172 memset(&da, 0, sizeof(da));
2173 da.ch = ch;
2174 da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2175 da.tc = tc;
2176 da.sd = sd;
2177
2178 ta = tc->ta;
2179
2180 ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
2181 IPFW_UH_RUNLOCK(ch);
2182
2183 return (da.error);
2184}
2185
2186/*
2187 * Dumps all table data
2188 * Data layout (version 0)(legacy):
2189 * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE()
2190 * Reply: [ ipfw_xtable ipfw_table_xentry x N ]
2191 *
2192 * Returns 0 on success
2193 */
2194static int
2195dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2196 struct sockopt_data *sd)
2197{
2198 ipfw_xtable *xtbl;
2199 struct tid_info ti;
2200 struct table_config *tc;
2201 struct table_algo *ta;
2202 struct dump_args da;
2203 size_t sz, count;
2204
2205 xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable));
2206 if (xtbl == NULL)
2207 return (EINVAL);
2208
2209 memset(&ti, 0, sizeof(ti));
2210 ti.uidx = xtbl->tbl;
2211
2212 IPFW_UH_RLOCK(ch);
2213 if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
2214 IPFW_UH_RUNLOCK(ch);
2215 return (0);
2216 }
2217 count = table_get_count(ch, tc);
2218 sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable);
2219
2220 xtbl->cnt = count;
2221 xtbl->size = sz;
2222 xtbl->type = tc->no.subtype;
2223 xtbl->tbl = ti.uidx;
2224
2225 if (sd->valsize < sz) {
2226
2227 /*
2228 * Submitted buffer size is not enough.
2229 * WE've already filled in @i structure with
2230 * relevant table info including size, so we
2231 * can return. Buffer will be flushed automatically.
2232 */
2233 IPFW_UH_RUNLOCK(ch);
2234 return (ENOMEM);
2235 }
2236
2237 /* Do the actual dump in eXtended format */
2238 memset(&da, 0, sizeof(da));
2239 da.ch = ch;
2240 da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2241 da.tc = tc;
2242 da.sd = sd;
2243
2244 ta = tc->ta;
2245
2246 ta->foreach(tc->astate, da.ti, dump_table_xentry, &da);
2247 IPFW_UH_RUNLOCK(ch);
2248
2249 return (0);
2250}
2251
2252/*
2253 * Legacy function to retrieve number of items in table.
2254 */
2255static int
2256get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2257 struct sockopt_data *sd)
2258{
2259 uint32_t *tbl;
2260 struct tid_info ti;
2261 size_t sz;
2262 int error;
2263
2264 sz = sizeof(*op3) + sizeof(uint32_t);
2265 op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz);
2266 if (op3 == NULL)
2267 return (EINVAL);
2268
2269 tbl = (uint32_t *)(op3 + 1);
2270 memset(&ti, 0, sizeof(ti));
2271 ti.uidx = *tbl;
2272 IPFW_UH_RLOCK(ch);
2273 error = ipfw_count_xtable(ch, &ti, tbl);
2274 IPFW_UH_RUNLOCK(ch);
2275 return (error);
2276}
2277
2278/*
2279 * Legacy IP_FW_TABLE_GETSIZE handler
2280 */
2281int
2282ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
2283{
2284 struct table_config *tc;
2285
2286 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
2287 return (ESRCH);
2288 *cnt = table_get_count(ch, tc);
2289 return (0);
2290}
2291
2292/*
2293 * Legacy IP_FW_TABLE_XGETSIZE handler
2294 */
2295int
2296ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
2297{
2298 struct table_config *tc;
2299 uint32_t count;
2300
2301 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) {
2302 *cnt = 0;
2303 return (0); /* 'table all list' requires success */
2304 }
2305
2306 count = table_get_count(ch, tc);
2307 *cnt = count * sizeof(ipfw_table_xentry);
2308 if (count > 0)
2309 *cnt += sizeof(ipfw_xtable);
2310 return (0);
2311}
2312
2313static int
2314dump_table_entry(void *e, void *arg)
2315{
2316 struct dump_args *da;
2317 struct table_config *tc;
2318 struct table_algo *ta;
2319 ipfw_table_entry *ent;
2320 struct table_value *pval;
2321 int error;
2322
2323 da = (struct dump_args *)arg;
2324
2325 tc = da->tc;
2326 ta = tc->ta;
2327
2328 /* Out of memory, returning */
2329 if (da->cnt == da->size)
2330 return (1);
2331 ent = da->ent++;
2332 ent->tbl = da->uidx;
2333 da->cnt++;
2334
2335 error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
2336 if (error != 0)
2337 return (error);
2338
2339 ent->addr = da->tent.k.addr.s_addr;
2340 ent->masklen = da->tent.masklen;
2341 pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
2342 ent->value = ipfw_export_table_value_legacy(pval);
2343
2344 return (0);
2345}
2346
2347/*
2348 * Dumps table in pre-8.1 legacy format.
2349 */
2350int
2351ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
2352 ipfw_table *tbl)
2353{
2354 struct table_config *tc;
2355 struct table_algo *ta;
2356 struct dump_args da;
2357
2358 tbl->cnt = 0;
2359
2360 if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
2361 return (0); /* XXX: We should return ESRCH */
2362
2363 ta = tc->ta;
2364
2365 /* This dump format supports IPv4 only */
2366 if (tc->no.subtype != IPFW_TABLE_ADDR)
2367 return (0);
2368
2369 memset(&da, 0, sizeof(da));
2370 da.ch = ch;
2371 da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2372 da.tc = tc;
2373 da.ent = &tbl->ent[0];
2374 da.size = tbl->size;
2375
2376 tbl->cnt = 0;
2377 ta->foreach(tc->astate, da.ti, dump_table_entry, &da);
2378 tbl->cnt = da.cnt;
2379
2380 return (0);
2381}
2382
2383/*
2384 * Dumps table entry in eXtended format (v1)(current).
2385 */
2386static int
2387dump_table_tentry(void *e, void *arg)
2388{
2389 struct dump_args *da;
2390 struct table_config *tc;
2391 struct table_algo *ta;
2392 struct table_value *pval;
2393 ipfw_obj_tentry *tent;
2394 int error;
2395
2396 da = (struct dump_args *)arg;
2397
2398 tc = da->tc;
2399 ta = tc->ta;
2400
2401 tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
2402 /* Out of memory, returning */
2403 if (tent == NULL) {
2404 da->error = ENOMEM;
2405 return (1);
2406 }
2407 tent->head.length = sizeof(ipfw_obj_tentry);
2408 tent->idx = da->uidx;
2409
2410 error = ta->dump_tentry(tc->astate, da->ti, e, tent);
2411 if (error != 0)
2412 return (error);
2413
2414 pval = get_table_value(da->ch, da->tc, tent->v.kidx);
2415 ipfw_export_table_value_v1(pval, &tent->v.value);
2416
2417 return (0);
2418}
2419
2420/*
2421 * Dumps table entry in eXtended format (v0).
2422 */
2423static int
2424dump_table_xentry(void *e, void *arg)
2425{
2426 struct dump_args *da;
2427 struct table_config *tc;
2428 struct table_algo *ta;
2429 ipfw_table_xentry *xent;
2430 ipfw_obj_tentry *tent;
2431 struct table_value *pval;
2432 int error;
2433
2434 da = (struct dump_args *)arg;
2435
2436 tc = da->tc;
2437 ta = tc->ta;
2438
2439 xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent));
2440 /* Out of memory, returning */
2441 if (xent == NULL)
2442 return (1);
2443 xent->len = sizeof(ipfw_table_xentry);
2444 xent->tbl = da->uidx;
2445
2446 memset(&da->tent, 0, sizeof(da->tent));
2447 tent = &da->tent;
2448 error = ta->dump_tentry(tc->astate, da->ti, e, tent);
2449 if (error != 0)
2450 return (error);
2451
2452 /* Convert current format to previous one */
2453 xent->masklen = tent->masklen;
2454 pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
2455 xent->value = ipfw_export_table_value_legacy(pval);
2456 /* Apply some hacks */
2457 if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
2458 xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
2459 xent->flags = IPFW_TCF_INET;
2460 } else
2461 memcpy(&xent->k, &tent->k, sizeof(xent->k));
2462
2463 return (0);
2464}
2465
2466/*
2467 * Helper function to export table algo data
2468 * to tentry format before calling user function.
2469 *
2470 * Returns 0 on success.
2471 */
2472static int
2473prepare_table_tentry(void *e, void *arg)
2474{
2475 struct dump_args *da;
2476 struct table_config *tc;
2477 struct table_algo *ta;
2478 int error;
2479
2480 da = (struct dump_args *)arg;
2481
2482 tc = da->tc;
2483 ta = tc->ta;
2484
2485 error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
2486 if (error != 0)
2487 return (error);
2488
2489 da->f(&da->tent, da->farg);
2490
2491 return (0);
2492}
2493
2494/*
2495 * Allow external consumers to read table entries in standard format.
2496 */
2497int
2498ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
2499 ta_foreach_f *f, void *arg)
2500{
2501 struct namedobj_instance *ni;
2502 struct table_config *tc;
2503 struct table_algo *ta;
2504 struct dump_args da;
2505
2506 ni = CHAIN_TO_NI(ch);
2507
2508 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
2509 if (tc == NULL)
2510 return (ESRCH);
2511
2512 ta = tc->ta;
2513
2514 memset(&da, 0, sizeof(da));
2515 da.ch = ch;
2516 da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2517 da.tc = tc;
2518 da.f = f;
2519 da.farg = arg;
2520
2521 ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
2522
2523 return (0);
2524}
2525
2526/*
2527 * Table algorithms
2528 */
2529
2530/*
2531 * Finds algorithm by index, table type or supplied name.
2532 *
2533 * Returns pointer to algo or NULL.
2534 */
2535static struct table_algo *
2536find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
2537{
2538 int i, l;
2539 struct table_algo *ta;
2540
2541 if (ti->type > IPFW_TABLE_MAXTYPE)
2542 return (NULL);
2543
2544 /* Search by index */
2545 if (ti->atype != 0) {
2546 if (ti->atype > tcfg->algo_count)
2547 return (NULL);
2548 return (tcfg->algo[ti->atype]);
2549 }
2550
2551 if (name == NULL) {
2552 /* Return default algorithm for given type if set */
2553 return (tcfg->def_algo[ti->type]);
2554 }
2555
2556 /* Search by name */
2557 /* TODO: better search */
2558 for (i = 1; i <= tcfg->algo_count; i++) {
2559 ta = tcfg->algo[i];
2560
2561 /*
2562 * One can supply additional algorithm
2563 * parameters so we compare only the first word
2564 * of supplied name:
2565 * 'addr:chash hsize=32'
2566 * '^^^^^^^^^'
2567 *
2568 */
2569 l = strlen(ta->name);
2570 if (strncmp(name, ta->name, l) != 0)
2571 continue;
2572 if (name[l] != '\0' && name[l] != ' ')
2573 continue;
2574 /* Check if we're requesting proper table type */
2575 if (ti->type != 0 && ti->type != ta->type)
2576 return (NULL);
2577 return (ta);
2578 }
2579
2580 return (NULL);
2581}
2582
2583/*
2584 * Register new table algo @ta.
2585 * Stores algo id inside @idx.
2586 *
2587 * Returns 0 on success.
2588 */
2589int
2590ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
2591 int *idx)
2592{
2593 struct tables_config *tcfg;
2594 struct table_algo *ta_new;
2595 size_t sz;
2596
2597 if (size > sizeof(struct table_algo))
2598 return (EINVAL);
2599
2600 /* Check for the required on-stack size for add/del */
2601 sz = roundup2(ta->ta_buf_size, sizeof(void *));
2602 if (sz > TA_BUF_SZ)
2603 return (EINVAL);
2604
2605 KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
2606
2607 /* Copy algorithm data to stable storage. */
2608 ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
2609 memcpy(ta_new, ta, size);
2610
2611 tcfg = CHAIN_TO_TCFG(ch);
2612
2613 KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
2614
2615 tcfg->algo[++tcfg->algo_count] = ta_new;
2616 ta_new->idx = tcfg->algo_count;
2617
2618 /* Set algorithm as default one for given type */
2619 if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
2620 tcfg->def_algo[ta_new->type] == NULL)
2621 tcfg->def_algo[ta_new->type] = ta_new;
2622
2623 *idx = ta_new->idx;
2624
2625 return (0);
2626}
2627
2628/*
2629 * Unregisters table algo using @idx as id.
2630 * XXX: It is NOT safe to call this function in any place
2631 * other than ipfw instance destroy handler.
2632 */
2633void
2634ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
2635{
2636 struct tables_config *tcfg;
2637 struct table_algo *ta;
2638
2639 tcfg = CHAIN_TO_TCFG(ch);
2640
2641 KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
2642 idx, tcfg->algo_count));
2643
2644 ta = tcfg->algo[idx];
2645 KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
2646
2647 if (tcfg->def_algo[ta->type] == ta)
2648 tcfg->def_algo[ta->type] = NULL;
2649
2650 free(ta, M_IPFW);
2651}
2652
2653/*
2654 * Lists all table algorithms currently available.
2655 * Data layout (v0)(current):
2656 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
2657 * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
2658 *
2659 * Returns 0 on success
2660 */
2661static int
2662list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2663 struct sockopt_data *sd)
2664{
2665 struct _ipfw_obj_lheader *olh;
2666 struct tables_config *tcfg;
2667 ipfw_ta_info *i;
2668 struct table_algo *ta;
2669 uint32_t count, n, size;
2670
2671 olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
2672 if (olh == NULL)
2673 return (EINVAL);
2674 if (sd->valsize < olh->size)
2675 return (EINVAL);
2676
2677 IPFW_UH_RLOCK(ch);
2678 tcfg = CHAIN_TO_TCFG(ch);
2679 count = tcfg->algo_count;
2680 size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
2681
2682 /* Fill in header regadless of buffer size */
2683 olh->count = count;
2684 olh->objsize = sizeof(ipfw_ta_info);
2685
2686 if (size > olh->size) {
2687 olh->size = size;
2688 IPFW_UH_RUNLOCK(ch);
2689 return (ENOMEM);
2690 }
2691 olh->size = size;
2692
2693 for (n = 1; n <= count; n++) {
2694 i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
2695 KASSERT(i != NULL, ("previously checked buffer is not enough"));
2696 ta = tcfg->algo[n];
2697 strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2698 i->type = ta->type;
2699 i->refcnt = ta->refcnt;
2700 }
2701
2702 IPFW_UH_RUNLOCK(ch);
2703
2704 return (0);
2705}
2706
2707static int
2708classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
2709{
2710 /* Basic IPv4/IPv6 or u32 lookups */
2711 *puidx = cmd->arg1;
2712 /* Assume ADDR by default */
2713 *ptype = IPFW_TABLE_ADDR;
2714 int v;
2715
2716 if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
2717 /*
2718 * generic lookup. The key must be
2719 * in 32bit big-endian format.
2720 */
2721 v = ((ipfw_insn_u32 *)cmd)->d[1];
2722 switch (v) {
2723 case 0:
2724 case 1:
2725 /* IPv4 src/dst */
2726 break;
2727 case 2:
2728 case 3:
2729 /* src/dst port */
2730 *ptype = IPFW_TABLE_NUMBER;
2731 break;
2732 case 4:
2733 /* uid/gid */
2734 *ptype = IPFW_TABLE_NUMBER;
2735 break;
2736 case 5:
2737 /* jid */
2738 *ptype = IPFW_TABLE_NUMBER;
2739 break;
2740 case 6:
2741 /* dscp */
2742 *ptype = IPFW_TABLE_NUMBER;
2743 break;
2744 }
2745 }
2746
2747 return (0);
2748}
2749
2750static int
2751classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
2752{
2753 ipfw_insn_if *cmdif;
2754
2755 /* Interface table, possibly */
2756 cmdif = (ipfw_insn_if *)cmd;
2757 if (cmdif->name[0] != '\1')
2758 return (1);
2759
2760 *ptype = IPFW_TABLE_INTERFACE;
2761 *puidx = cmdif->p.kidx;
2762
2763 return (0);
2764}
2765
2766static int
2767classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
2768{
2769
2770 *puidx = cmd->arg1;
2771 *ptype = IPFW_TABLE_FLOW;
2772
2773 return (0);
2774}
2775
2776static void
2777update_arg1(ipfw_insn *cmd, uint16_t idx)
2778{
2779
2780 cmd->arg1 = idx;
2781}
2782
2783static void
2784update_via(ipfw_insn *cmd, uint16_t idx)
2785{
2786 ipfw_insn_if *cmdif;
2787
2788 cmdif = (ipfw_insn_if *)cmd;
2789 cmdif->p.kidx = idx;
2790}
2791
2792static int
2793table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
2794 struct named_object **pno)
2795{
2796 struct table_config *tc;
2797 int error;
2798
2799 IPFW_UH_WLOCK_ASSERT(ch);
2800
2801 error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
2802 if (error != 0)
2803 return (error);
2804
2805 *pno = &tc->no;
2806 return (0);
2807}
2808
2809/* XXX: sets-sets! */
2810static struct named_object *
2811table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
2812{
2813 struct namedobj_instance *ni;
2814 struct table_config *tc;
2815
2816 IPFW_UH_WLOCK_ASSERT(ch);
2817 ni = CHAIN_TO_NI(ch);
2818 tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
2819 KASSERT(tc != NULL, ("Table with index %d not found", idx));
2820
2821 return (&tc->no);
2822}
2823
2824static int
2825table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
2826 enum ipfw_sets_cmd cmd)
2827{
2828
2829 switch (cmd) {
2830 case SWAP_ALL:
2831 case TEST_ALL:
2832 case MOVE_ALL:
2833 /*
2834 * Always return success, the real action and decision
2835 * should make table_manage_sets_all().
2836 */
2837 return (0);
2838 case TEST_ONE:
2839 case MOVE_ONE:
2840 /*
2841 * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add
2842 * if set number will be used in hash function. Currently
2843 * we can just use generic handler that replaces set value.
2844 */
2845 if (V_fw_tables_sets == 0)
2846 return (0);
2847 break;
2848 case COUNT_ONE:
2849 /*
2850 * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is
2851 * disabled. This allow skip table's opcodes from additional
2852 * checks when specific rules moved to another set.
2853 */
2854 if (V_fw_tables_sets == 0)
2855 return (EOPNOTSUPP);
2856 }
2857 /* Use generic sets handler when per-set sysctl is enabled. */
2858 return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2859 set, new_set, cmd));
2860}
2861
2862/*
2863 * We register several opcode rewriters for lookup tables.
2864 * All tables opcodes have the same ETLV type, but different subtype.
2865 * To avoid invoking sets handler several times for XXX_ALL commands,
2866 * we use separate manage_sets handler. O_RECV has the lowest value,
2867 * so it should be called first.
2868 */
2869static int
2870table_manage_sets_all(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
2871 enum ipfw_sets_cmd cmd)
2872{
2873
2874 switch (cmd) {
2875 case SWAP_ALL:
2876 case TEST_ALL:
2877 /*
2878 * Return success for TEST_ALL, since nothing prevents
2879 * move rules from one set to another. All tables are
2880 * accessible from all sets when per-set tables sysctl
2881 * is disabled.
2882 */
2883 case MOVE_ALL:
2884 if (V_fw_tables_sets == 0)
2885 return (0);
2886 break;
2887 default:
2888 return (table_manage_sets(ch, set, new_set, cmd));
2889 }
2890 /* Use generic sets handler when per-set sysctl is enabled. */
2891 return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME,
2892 set, new_set, cmd));
2893}
2894
2895static struct opcode_obj_rewrite opcodes[] = {
2896 {
2897 .opcode = O_IP_SRC_LOOKUP,
2898 .etlv = IPFW_TLV_TBL_NAME,
2899 .classifier = classify_srcdst,
2900 .update = update_arg1,
2901 .find_byname = table_findbyname,
2902 .find_bykidx = table_findbykidx,
2903 .create_object = create_table_compat,
2904 .manage_sets = table_manage_sets,
2905 },
2906 {
2907 .opcode = O_IP_DST_LOOKUP,
2908 .etlv = IPFW_TLV_TBL_NAME,
2909 .classifier = classify_srcdst,
2910 .update = update_arg1,
2911 .find_byname = table_findbyname,
2912 .find_bykidx = table_findbykidx,
2913 .create_object = create_table_compat,
2914 .manage_sets = table_manage_sets,
2915 },
2916 {
2917 .opcode = O_IP_FLOW_LOOKUP,
2918 .etlv = IPFW_TLV_TBL_NAME,
2919 .classifier = classify_flow,
2920 .update = update_arg1,
2921 .find_byname = table_findbyname,
2922 .find_bykidx = table_findbykidx,
2923 .create_object = create_table_compat,
2924 .manage_sets = table_manage_sets,
2925 },
2926 {
2927 .opcode = O_XMIT,
2928 .etlv = IPFW_TLV_TBL_NAME,
2929 .classifier = classify_via,
2930 .update = update_via,
2931 .find_byname = table_findbyname,
2932 .find_bykidx = table_findbykidx,
2933 .create_object = create_table_compat,
2934 .manage_sets = table_manage_sets,
2935 },
2936 {
2937 .opcode = O_RECV,
2938 .etlv = IPFW_TLV_TBL_NAME,
2939 .classifier = classify_via,
2940 .update = update_via,
2941 .find_byname = table_findbyname,
2942 .find_bykidx = table_findbykidx,
2943 .create_object = create_table_compat,
2944 .manage_sets = table_manage_sets_all,
2945 },
2946 {
2947 .opcode = O_VIA,
2948 .etlv = IPFW_TLV_TBL_NAME,
2949 .classifier = classify_via,
2950 .update = update_via,
2951 .find_byname = table_findbyname,
2952 .find_bykidx = table_findbykidx,
2953 .create_object = create_table_compat,
2954 .manage_sets = table_manage_sets,
2955 },
2956};
2957
2958static int
2959test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no,
2960 void *arg __unused)
2961{
2962
2963 /* Check that there aren't any tables in not default set */
2964 if (no->set != 0)
2965 return (EBUSY);
2966 return (0);
2967}
2968
2969/*
2970 * Switch between "set 0" and "rule's set" table binding,
2971 * Check all ruleset bindings and permits changing
2972 * IFF each binding has both rule AND table in default set (set 0).
2973 *
2974 * Returns 0 on success.
2975 */
2976int
2977ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
2978{
2979 struct opcode_obj_rewrite *rw;
2980 struct namedobj_instance *ni;
2981 struct named_object *no;
2982 struct ip_fw *rule;
2983 ipfw_insn *cmd;
2984 int cmdlen, i, l;
2985 uint16_t kidx;
2986 uint8_t subtype;
2987
2988 IPFW_UH_WLOCK(ch);
2989
2990 if (V_fw_tables_sets == sets) {
2991 IPFW_UH_WUNLOCK(ch);
2992 return (0);
2993 }
2994 ni = CHAIN_TO_NI(ch);
2995 if (sets == 0) {
2996 /*
2997 * Prevent disabling sets support if we have some tables
2998 * in not default sets.
2999 */
3000 if (ipfw_objhash_foreach_type(ni, test_sets_cb,
3001 NULL, IPFW_TLV_TBL_NAME) != 0) {
3002 IPFW_UH_WUNLOCK(ch);
3003 return (EBUSY);
3004 }
3005 }
3006 /*
3007 * Scan all rules and examine tables opcodes.
3008 */
3009 for (i = 0; i < ch->n_rules; i++) {
3010 rule = ch->map[i];
3011
3012 l = rule->cmd_len;
3013 cmd = rule->cmd;
3014 cmdlen = 0;
3015 for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) {
3016 cmdlen = F_LEN(cmd);
3017 /* Check only tables opcodes */
3018 for (kidx = 0, rw = opcodes;
3019 rw < opcodes + nitems(opcodes); rw++) {
3020 if (rw->opcode != cmd->opcode)
3021 continue;
3022 if (rw->classifier(cmd, &kidx, &subtype) == 0)
3023 break;
3024 }
3025 if (kidx == 0)
3026 continue;
3027 no = ipfw_objhash_lookup_kidx(ni, kidx);
3028 /* Check if both table object and rule has the set 0 */
3029 if (no->set != 0 || rule->set != 0) {
3030 IPFW_UH_WUNLOCK(ch);
3031 return (EBUSY);
3032 }
3033
3034 }
3035 }
3036 V_fw_tables_sets = sets;
3037 IPFW_UH_WUNLOCK(ch);
3038 return (0);
3039}
3040
3041/*
3042 * Checks table name for validity.
3043 * Enforce basic length checks, the rest
3044 * should be done in userland.
3045 *
3046 * Returns 0 if name is considered valid.
3047 */
3048static int
3049check_table_name(const char *name)
3050{
3051
3052 /*
3053 * TODO: do some more complicated checks
3054 */
3055 return (ipfw_check_object_name_generic(name));
3056}
3057
3058/*
3059 * Finds table config based on either legacy index
3060 * or name in ntlv.
3061 * Note @ti structure contains unchecked data from userland.
3062 *
3063 * Returns 0 in success and fills in @tc with found config
3064 */
3065static int
3066find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
3067 struct table_config **tc)
3068{
3069 char *name, bname[16];
3070 struct named_object *no;
3071 ipfw_obj_ntlv *ntlv;
3072 uint32_t set;
3073
3074 if (ti->tlvs != NULL) {
3075 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
3076 IPFW_TLV_TBL_NAME);
3077 if (ntlv == NULL)
3078 return (EINVAL);
3079 name = ntlv->name;
3080
3081 /*
3082 * Use set provided by @ti instead of @ntlv one.
3083 * This is needed due to different sets behavior
3084 * controlled by V_fw_tables_sets.
3085 */
3086 set = (V_fw_tables_sets != 0) ? ti->set : 0;
3087 } else {
3088 snprintf(bname, sizeof(bname), "%d", ti->uidx);
3089 name = bname;
3090 set = 0;
3091 }
3092
3093 no = ipfw_objhash_lookup_name(ni, set, name);
3094 *tc = (struct table_config *)no;
3095
3096 return (0);
3097}
3098
3099/*
3100 * Finds table config based on either legacy index
3101 * or name in ntlv.
3102 * Note @ti structure contains unchecked data from userland.
3103 *
3104 * Returns pointer to table_config or NULL.
3105 */
3106static struct table_config *
3107find_table(struct namedobj_instance *ni, struct tid_info *ti)
3108{
3109 struct table_config *tc;
3110
3111 if (find_table_err(ni, ti, &tc) != 0)
3112 return (NULL);
3113
3114 return (tc);
3115}
3116
3117/*
3118 * Allocate new table config structure using
3119 * specified @algo and @aname.
3120 *
3121 * Returns pointer to config or NULL.
3122 */
3123static struct table_config *
3124alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
3125 struct table_algo *ta, char *aname, uint8_t tflags)
3126{
3127 char *name, bname[16];
3128 struct table_config *tc;
3129 int error;
3130 ipfw_obj_ntlv *ntlv;
3131 uint32_t set;
3132
3133 if (ti->tlvs != NULL) {
3134 ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx,
3135 IPFW_TLV_TBL_NAME);
3136 if (ntlv == NULL)
3137 return (NULL);
3138 name = ntlv->name;
3139 set = ntlv->set;
3140 } else {
3141 /* Compat part: convert number to string representation */
3142 snprintf(bname, sizeof(bname), "%d", ti->uidx);
3143 name = bname;
3144 set = 0;
3145 }
3146
3147 tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
3148 tc->no.name = tc->tablename;
3149 tc->no.subtype = ta->type;
3150 tc->no.set = set;
3151 tc->tflags = tflags;
3152 tc->ta = ta;
3153 strlcpy(tc->tablename, name, sizeof(tc->tablename));
3154 /* Set "shared" value type by default */
3155 tc->vshared = 1;
3156
3157 /* Preallocate data structures for new tables */
3158 error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
3159 if (error != 0) {
3160 free(tc, M_IPFW);
3161 return (NULL);
3162 }
3163
3164 return (tc);
3165}
3166
3167/*
3168 * Destroys table state and config.
3169 */
3170static void
3171free_table_config(struct namedobj_instance *ni, struct table_config *tc)
3172{
3173
3174 KASSERT(tc->linked == 0, ("free() on linked config"));
3175 /* UH lock MUST NOT be held */
3176
3177 /*
3178 * We're using ta without any locking/referencing.
3179 * TODO: fix this if we're going to use unloadable algos.
3180 */
3181 tc->ta->destroy(tc->astate, &tc->ti_copy);
3182 free(tc, M_IPFW);
3183}
3184
3185/*
3186 * Links @tc to @chain table named instance.
3187 * Sets appropriate type/states in @chain table info.
3188 */
3189static void
3190link_table(struct ip_fw_chain *ch, struct table_config *tc)
3191{
3192 struct namedobj_instance *ni;
3193 struct table_info *ti;
3194 uint16_t kidx;
3195
3196 IPFW_UH_WLOCK_ASSERT(ch);
3197 IPFW_WLOCK_ASSERT(ch);
3198
3199 ni = CHAIN_TO_NI(ch);
3200 kidx = tc->no.kidx;
3201
3202 ipfw_objhash_add(ni, &tc->no);
3203
3204 ti = KIDX_TO_TI(ch, kidx);
3205 *ti = tc->ti_copy;
3206
3207 /* Notify algo on real @ti address */
3208 if (tc->ta->change_ti != NULL)
3209 tc->ta->change_ti(tc->astate, ti);
3210
3211 tc->linked = 1;
3212 tc->ta->refcnt++;
3213}
3214
3215/*
3216 * Unlinks @tc from @chain table named instance.
3217 * Zeroes states in @chain and stores them in @tc.
3218 */
3219static void
3220unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
3221{
3222 struct namedobj_instance *ni;
3223 struct table_info *ti;
3224 uint16_t kidx;
3225
3226 IPFW_UH_WLOCK_ASSERT(ch);
3227 IPFW_WLOCK_ASSERT(ch);
3228
3229 ni = CHAIN_TO_NI(ch);
3230 kidx = tc->no.kidx;
3231
3232 /* Clear state. @ti copy is already saved inside @tc */
3233 ipfw_objhash_del(ni, &tc->no);
3234 ti = KIDX_TO_TI(ch, kidx);
3235 memset(ti, 0, sizeof(struct table_info));
3236 tc->linked = 0;
3237 tc->ta->refcnt--;
3238
3239 /* Notify algo on real @ti address */
3240 if (tc->ta->change_ti != NULL)
3241 tc->ta->change_ti(tc->astate, NULL);
3242}
3243
3244static struct ipfw_sopt_handler scodes[] = {
3245 { IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table },
3246 { IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 },
3247 { IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 },
3248 { IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table },
3249 { IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table },
3250 { IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables },
3251 { IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 },
3252 { IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 },
3253 { IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 },
3254 { IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 },
3255 { IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 },
3256 { IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 },
3257 { IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry },
3258 { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table },
3259 { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo },
3260 { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size },
3261};
3262
3263static int
3264destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
3265 void *arg)
3266{
3267
3268 unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
3269 if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
3270 printf("Error unlinking kidx %d from table %s\n",
3271 no->kidx, no->name);
3272 free_table_config(ni, (struct table_config *)no);
3273 return (0);
3274}
3275
3276/*
3277 * Shuts tables module down.
3278 */
3279void
3280ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
3281{
3282
3283 IPFW_DEL_SOPT_HANDLER(last, scodes);
3284 IPFW_DEL_OBJ_REWRITER(last, opcodes);
3285
3286 /* Remove all tables from working set */
3287 IPFW_UH_WLOCK(ch);
3288 IPFW_WLOCK(ch);
3289 ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
3290 IPFW_WUNLOCK(ch);
3291 IPFW_UH_WUNLOCK(ch);
3292
3293 /* Free pointers itself */
3294 free(ch->tablestate, M_IPFW);
3295
3296 ipfw_table_value_destroy(ch, last);
3297 ipfw_table_algo_destroy(ch);
3298
3299 ipfw_objhash_destroy(CHAIN_TO_NI(ch));
3300 free(CHAIN_TO_TCFG(ch), M_IPFW);
3301}
3302
3303/*
3304 * Starts tables module.
3305 */
3306int
3307ipfw_init_tables(struct ip_fw_chain *ch, int first)
3308{
3309 struct tables_config *tcfg;
3310
3311 /* Allocate pointers */
3312 ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
3313 M_IPFW, M_WAITOK | M_ZERO);
3314
3315 tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
3316 tcfg->namehash = ipfw_objhash_create(V_fw_tables_max);
3317 ch->tblcfg = tcfg;
3318
3319 ipfw_table_value_init(ch, first);
3320 ipfw_table_algo_init(ch);
3321
3322 IPFW_ADD_OBJ_REWRITER(first, opcodes);
3323 IPFW_ADD_SOPT_HANDLER(first, scodes);
3324 return (0);
3325}
3326
3327
3328