ip_fw_table.c revision 274087
1193326Sed/*-
2193326Sed * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
3193326Sed * Copyright (c) 2014 Yandex LLC
4193326Sed * Copyright (c) 2014 Alexander V. Chernikov
5193326Sed *
6193326Sed * Redistribution and use in source and binary forms, with or without
7193326Sed * modification, are permitted provided that the following conditions
8193326Sed * are met:
9193326Sed * 1. Redistributions of source code must retain the above copyright
10221345Sdim *    notice, this list of conditions and the following disclaimer.
11193326Sed * 2. Redistributions in binary form must reproduce the above copyright
12193326Sed *    notice, this list of conditions and the following disclaimer in the
13193326Sed *    documentation and/or other materials provided with the distribution.
14193326Sed *
15193326Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16193326Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17193326Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18193326Sed * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19193326Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20218893Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21193326Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22193326Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23193326Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24193326Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25193326Sed * SUCH DAMAGE.
26221345Sdim */
27221345Sdim
28193326Sed#include <sys/cdefs.h>
29193326Sed__FBSDID("$FreeBSD: head/sys/netpfil/ipfw/ip_fw_table.c 274087 2014-11-04 10:25:52Z melifaro $");
30193326Sed
31207619Srdivacky/*
32193326Sed * Lookup table support for ipfw.
33193326Sed *
34221345Sdim * This file contains handlers for all generic tables' operations:
35193326Sed * add/del/flush entries, list/dump tables etc..
36193326Sed *
37193326Sed * Table data modification is protected by both UH and runtime lock
38226633Sdim * while reading configuration/data is protected by UH lock.
39193326Sed *
40193326Sed * Lookup algorithms for all table types are located in ip_fw_table_algo.c
41193326Sed */
42193326Sed
43193326Sed#include "opt_ipfw.h"
44193326Sed
45193326Sed#include <sys/param.h>
46221345Sdim#include <sys/systm.h>
47221345Sdim#include <sys/malloc.h>
48221345Sdim#include <sys/kernel.h>
49221345Sdim#include <sys/lock.h>
50221345Sdim#include <sys/rwlock.h>
51224145Sdim#include <sys/rmlock.h>
52221345Sdim#include <sys/socket.h>
53223017Sdim#include <sys/socketvar.h>
54221345Sdim#include <sys/queue.h>
55221345Sdim#include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
56221345Sdim
57221345Sdim#include <netinet/in.h>
58221345Sdim#include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
59221345Sdim#include <netinet/ip_fw.h>
60221345Sdim
61221345Sdim#include <netpfil/ipfw/ip_fw_private.h>
62221345Sdim#include <netpfil/ipfw/ip_fw_table.h>
63221345Sdim
64224145Sdim /*
65221345Sdim * Table has the following `type` concepts:
66221345Sdim *
67221345Sdim * `no.type` represents lookup key type (addr, ifp, uid, etc..)
68221345Sdim * vmask represents bitmask of table values which are present at the moment.
69221345Sdim * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
70221345Sdim * single-value-for-all approach.
71224145Sdim */
72221345Sdimstruct table_config {
73221345Sdim	struct named_object	no;
74221345Sdim	uint8_t		tflags;		/* type flags */
75221345Sdim	uint8_t		locked;		/* 1 if locked from changes */
76221345Sdim	uint8_t		linked;		/* 1 if already linked */
77221345Sdim	uint8_t		ochanged;	/* used by set swapping */
78221345Sdim	uint8_t		vshared;	/* 1 if using shared value array */
79223017Sdim	uint8_t		spare[3];
80221345Sdim	uint32_t	count;		/* Number of records */
81221345Sdim	uint32_t	limit;		/* Max number of records */
82221345Sdim	uint32_t	vmask;		/* bitmask with supported values */
83226633Sdim	uint32_t	ocount;		/* used by set swapping */
84221345Sdim	uint64_t	gencnt;		/* generation count */
85221345Sdim	char		tablename[64];	/* table name */
86221345Sdim	struct table_algo	*ta;	/* Callbacks for given algo */
87223017Sdim	void		*astate;	/* algorithm state */
88221345Sdim	struct table_info	ti_copy;	/* data to put to table_info */
89221345Sdim	struct namedobj_instance	*vi;
90221345Sdim};
91221345Sdim
92221345Sdimstatic struct table_config *find_table(struct namedobj_instance *ni,
93221345Sdim    struct tid_info *ti);
94223017Sdimstatic struct table_config *alloc_table_config(struct ip_fw_chain *ch,
95223017Sdim    struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
96223017Sdimstatic void free_table_config(struct namedobj_instance *ni,
97223017Sdim    struct table_config *tc);
98221345Sdimstatic int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
99221345Sdim    char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref);
100221345Sdimstatic void link_table(struct ip_fw_chain *ch, struct table_config *tc);
101221345Sdimstatic void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
102221345Sdimstatic int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
103221345Sdim    struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
104221345Sdim#define	OP_ADD	1
105221345Sdim#define	OP_DEL	0
106221345Sdimstatic int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
107193326Sed    struct sockopt_data *sd);
108221345Sdimstatic void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
109221345Sdim    ipfw_xtable_info *i);
110226633Sdimstatic int dump_table_tentry(void *e, void *arg);
111221345Sdimstatic int dump_table_xentry(void *e, void *arg);
112221345Sdim
113226633Sdimstatic int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
114221345Sdim    struct tid_info *b);
115221345Sdim
116221345Sdimstatic int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
117224145Sdim    struct table_config *tc, struct table_info *ti, uint32_t count);
118221345Sdimstatic int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
119221345Sdim
120226633Sdimstatic struct table_algo *find_table_algo(struct tables_config *tableconf,
121221345Sdim    struct tid_info *ti, char *name);
122221345Sdim
123224145Sdimstatic void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
124221345Sdimstatic void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
125221345Sdimstatic int classify_table_opcode(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype);
126221345Sdim
127221345Sdim#define	CHAIN_TO_NI(chain)	(CHAIN_TO_TCFG(chain)->namehash)
128221345Sdim#define	KIDX_TO_TI(ch, k)	(&(((struct table_info *)(ch)->tablestate)[k]))
129224145Sdim
130221345Sdim#define	TA_BUF_SZ	128	/* On-stack buffer for add/delete state */
131221345Sdim
132221345Sdimvoid
133221345Sdimrollback_toperation_state(struct ip_fw_chain *ch, void *object)
134224145Sdim{
135221345Sdim	struct tables_config *tcfg;
136221345Sdim	struct op_state *os;
137226633Sdim
138221345Sdim	tcfg = CHAIN_TO_TCFG(ch);
139221345Sdim	TAILQ_FOREACH(os, &tcfg->state_list, next)
140204643Srdivacky		os->func(object, os);
141221345Sdim}
142224145Sdim
143221345Sdimvoid
144221345Sdimadd_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
145221345Sdim{
146224145Sdim	struct tables_config *tcfg;
147221345Sdim
148221345Sdim	tcfg = CHAIN_TO_TCFG(ch);
149221345Sdim	TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
150224145Sdim}
151221345Sdim
152226633Sdimvoid
153226633Sdimdel_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
154226633Sdim{
155221345Sdim	struct tables_config *tcfg;
156226633Sdim
157221345Sdim	tcfg = CHAIN_TO_TCFG(ch);
158221345Sdim	TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
159226633Sdim}
160221345Sdim
161226633Sdimvoid
162226633Sdimtc_ref(struct table_config *tc)
163226633Sdim{
164226633Sdim
165226633Sdim	tc->no.refcnt++;
166221345Sdim}
167221345Sdim
168221345Sdimvoid
169221345Sdimtc_unref(struct table_config *tc)
170221345Sdim{
171221345Sdim
172221345Sdim	tc->no.refcnt--;
173221345Sdim}
174221345Sdim
175221345Sdimstatic struct table_value *
176226633Sdimget_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
177221345Sdim{
178221345Sdim	struct table_value *pval;
179221345Sdim
180221345Sdim	pval = (struct table_value *)ch->valuestate;
181221345Sdim
182221345Sdim	return (&pval[kidx]);
183221345Sdim}
184221345Sdim
185221345Sdim
186221345Sdim/*
187234353Sdim * Checks if we're able to insert/update entry @tei into table
188221345Sdim * w.r.t @tc limits.
189221345Sdim * May alter @tei to indicate insertion error / insert
190221345Sdim * options.
191226633Sdim *
192221345Sdim * Returns 0 if operation can be performed/
193221345Sdim */
194221345Sdimstatic int
195221345Sdimcheck_table_limit(struct table_config *tc, struct tentry_info *tei)
196226633Sdim{
197226633Sdim
198226633Sdim	if (tc->limit == 0 || tc->count < tc->limit)
199221345Sdim		return (0);
200221345Sdim
201221345Sdim	if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
202221345Sdim		/* Notify userland on error cause */
203221345Sdim		tei->flags |= TEI_FLAGS_LIMIT;
204221345Sdim		return (EFBIG);
205221345Sdim	}
206221345Sdim
207221345Sdim	/*
208226633Sdim	 * We have UPDATE flag set.
209226633Sdim	 * Permit updating record (if found),
210226633Sdim	 * but restrict adding new one since we've
211221345Sdim	 * already hit the limit.
212221345Sdim	 */
213221345Sdim	tei->flags |= TEI_FLAGS_DONTADD;
214221345Sdim
215221345Sdim	return (0);
216221345Sdim}
217221345Sdim
218221345Sdim/*
219226633Sdim * Convert algorithm callback return code into
220226633Sdim * one of pre-defined states known by userland.
221226633Sdim */
222221345Sdimstatic void
223221345Sdimstore_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
224221345Sdim{
225221345Sdim	int flag;
226221345Sdim
227221345Sdim	flag = 0;
228221345Sdim
229221345Sdim	switch (error) {
230226633Sdim	case 0:
231221345Sdim		if (op == OP_ADD && num != 0)
232221345Sdim			flag = TEI_FLAGS_ADDED;
233221345Sdim		if (op == OP_DEL)
234221345Sdim			flag = TEI_FLAGS_DELETED;
235221345Sdim		break;
236221345Sdim	case ENOENT:
237221345Sdim		flag = TEI_FLAGS_NOTFOUND;
238221345Sdim		break;
239221345Sdim	case EEXIST:
240221345Sdim		flag = TEI_FLAGS_EXISTS;
241221345Sdim		break;
242221345Sdim	default:
243221345Sdim		flag = TEI_FLAGS_ERROR;
244221345Sdim	}
245193326Sed
246221345Sdim	tei->flags |= flag;
247221345Sdim}
248221345Sdim
249221345Sdim/*
250193326Sed * Creates and references table with default parameters.
251221345Sdim * Saves table config, algo and allocated kidx info @ptc, @pta and
252193326Sed * @pkidx if non-zero.
253221345Sdim * Used for table auto-creation to support old binaries.
254193326Sed *
255221345Sdim * Returns 0 on success.
256221345Sdim */
257193326Sedstatic int
258221345Sdimcreate_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
259221345Sdim    uint16_t *pkidx)
260221345Sdim{
261203955Srdivacky	ipfw_xtable_info xi;
262221345Sdim	int error;
263193326Sed
264221345Sdim	memset(&xi, 0, sizeof(xi));
265221345Sdim	/* Set default value mask for legacy clients */
266221345Sdim	xi.vmask = IPFW_VTYPE_LEGACY;
267221345Sdim
268221345Sdim	error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
269221345Sdim	if (error != 0)
270221345Sdim		return (error);
271221345Sdim
272221345Sdim	return (0);
273226633Sdim}
274221345Sdim
275221345Sdim/*
276221345Sdim * Find and reference existing table optionally
277221345Sdim * creating new one.
278221345Sdim *
279221345Sdim * Saves found table config into @ptc.
280221345Sdim * Note function may drop/acquire UH_WLOCK.
281203955Srdivacky * Returns 0 if table was found/created and referenced
282221345Sdim * or non-zero return code.
283221345Sdim */
284221345Sdimstatic int
285221345Sdimfind_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
286221345Sdim    struct tentry_info *tei, uint32_t count, int op,
287221345Sdim    struct table_config **ptc)
288234353Sdim{
289234353Sdim	struct namedobj_instance *ni;
290234353Sdim	struct table_config *tc;
291234353Sdim	uint16_t kidx;
292221345Sdim	int error;
293221345Sdim
294221345Sdim	IPFW_UH_WLOCK_ASSERT(ch);
295221345Sdim
296221345Sdim	ni = CHAIN_TO_NI(ch);
297221345Sdim	tc = NULL;
298221345Sdim	if ((tc = find_table(ni, ti)) != NULL) {
299221345Sdim		/* check table type */
300221345Sdim		if (tc->no.type != ti->type)
301221345Sdim			return (EINVAL);
302221345Sdim
303221345Sdim		if (tc->locked != 0)
304221345Sdim			return (EACCES);
305221345Sdim
306221345Sdim		/* Try to exit early on limit hit */
307221345Sdim		if (op == OP_ADD && count == 1 &&
308221345Sdim		    check_table_limit(tc, tei) != 0)
309193326Sed			return (EFBIG);
310221345Sdim
311221345Sdim		/* Reference and return */
312221345Sdim		tc->no.refcnt++;
313221345Sdim		*ptc = tc;
314221345Sdim		return (0);
315221345Sdim	}
316221345Sdim
317221345Sdim	if (op == OP_DEL)
318221345Sdim		return (ESRCH);
319221345Sdim
320221345Sdim	/* Compability mode: create new table for old clients */
321221345Sdim	if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
322221345Sdim		return (ESRCH);
323221345Sdim
324221345Sdim	IPFW_UH_WUNLOCK(ch);
325221345Sdim	error = create_table_compat(ch, ti, &kidx);
326221345Sdim	IPFW_UH_WLOCK(ch);
327223017Sdim
328221345Sdim	if (error != 0)
329221345Sdim		return (error);
330221345Sdim
331221345Sdim	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
332221345Sdim	KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx));
333221345Sdim
334221345Sdim	/* OK, now we've got referenced table. */
335221345Sdim	*ptc = tc;
336221345Sdim	return (0);
337221345Sdim}
338221345Sdim
339221345Sdim/*
340234353Sdim * Rolls back already @added to @tc entries using state array @ta_buf_m.
341234353Sdim * Assume the following layout:
342234353Sdim * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
343221345Sdim * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
344221345Sdim *   for storing deleted state
345221345Sdim */
346221345Sdimstatic void
347221345Sdimrollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
348226633Sdim    struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
349226633Sdim    uint32_t count, uint32_t added)
350234353Sdim{
351234353Sdim	struct table_algo *ta;
352193326Sed	struct tentry_info *ptei;
353221345Sdim	caddr_t v, vv;
354221345Sdim	size_t ta_buf_sz;
355221345Sdim	int error, i;
356193326Sed	uint32_t num;
357221345Sdim
358221345Sdim	IPFW_UH_WLOCK_ASSERT(ch);
359198092Srdivacky
360226633Sdim	ta = tc->ta;
361226633Sdim	ta_buf_sz = ta->ta_buf_size;
362221345Sdim	v = ta_buf_m;
363221345Sdim	vv = v + count * ta_buf_sz;
364234353Sdim	for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
365221345Sdim		ptei = &tei[i];
366221345Sdim		if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
367221345Sdim
368221345Sdim			/*
369198092Srdivacky			 * We have old value stored by previous
370221345Sdim			 * call in @ptei->value. Do add once again
371193326Sed			 * to restore it.
372193326Sed			 */
373193326Sed			error = ta->add(tc->astate, tinfo, ptei, v, &num);
374193326Sed			KASSERT(error == 0, ("rollback UPDATE fail"));
375193326Sed			KASSERT(num == 0, ("rollback UPDATE fail2"));
376193326Sed			continue;
377193326Sed		}
378193326Sed
379193326Sed		error = ta->prepare_del(ch, ptei, vv);
380198092Srdivacky		KASSERT(error == 0, ("pre-rollback INSERT failed"));
381198092Srdivacky		error = ta->del(tc->astate, tinfo, ptei, vv, &num);
382207619Srdivacky		KASSERT(error == 0, ("rollback INSERT failed"));
383226633Sdim		tc->count -= num;
384226633Sdim	}
385207619Srdivacky}
386221345Sdim
387221345Sdim/*
388193326Sed * Prepares add/del state for all @count entries in @tei.
389234353Sdim * Uses either stack buffer (@ta_buf) or allocates a new one.
390234353Sdim * Stores pointer to allocated buffer back to @ta_buf.
391221345Sdim *
392221345Sdim * Returns 0 on success.
393221345Sdim */
394221345Sdimstatic int
395221345Sdimprepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
396193326Sed    struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
397193326Sed{
398221345Sdim	caddr_t ta_buf_m, v;
399221345Sdim	size_t ta_buf_sz, sz;
400194613Sed	struct tentry_info *ptei;
401224145Sdim	int error, i;
402224145Sdim
403224145Sdim	error = 0;
404221345Sdim	ta_buf_sz = ta->ta_buf_size;
405221345Sdim	if (count == 1) {
406221345Sdim		/* Sigle add/delete, use on-stack buffer */
407221345Sdim		memset(*ta_buf, 0, TA_BUF_SZ);
408221345Sdim		ta_buf_m = *ta_buf;
409221345Sdim	} else {
410221345Sdim
411221345Sdim		/*
412226633Sdim		 * Multiple adds/deletes, allocate larger buffer
413226633Sdim		 *
414226633Sdim		 * Note we need 2xcount buffer for add case:
415221345Sdim		 * we have hold both ADD state
416221345Sdim		 * and DELETE state (this may be needed
417221345Sdim		 * if we need to rollback all changes)
418226633Sdim		 */
419226633Sdim		sz = count * ta_buf_sz;
420226633Sdim		ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
421226633Sdim		    M_WAITOK | M_ZERO);
422226633Sdim	}
423226633Sdim
424226633Sdim	v = ta_buf_m;
425226633Sdim	for (i = 0; i < count; i++, v += ta_buf_sz) {
426226633Sdim		ptei = &tei[i];
427226633Sdim		error = (op == OP_ADD) ?
428226633Sdim		    ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
429234353Sdim
430193326Sed		/*
431221345Sdim		 * Some syntax error (incorrect mask, or address, or
432221345Sdim		 * anything). Return error regardless of atomicity
433221345Sdim		 * settings.
434202879Srdivacky		 */
435221345Sdim		if (error != 0)
436221345Sdim			break;
437221345Sdim	}
438208600Srdivacky
439193326Sed	*ta_buf = ta_buf_m;
440193326Sed	return (error);
441193326Sed}
442193326Sed
443207619Srdivacky/*
444193326Sed * Flushes allocated state for each @count entries in @tei.
445221345Sdim * Frees @ta_buf_m if differs from stack buffer @ta_buf.
446221345Sdim */
447208600Srdivackystatic void
448193326Sedflush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
449193326Sed    struct tentry_info *tei, uint32_t count, int rollback,
450193326Sed    caddr_t ta_buf_m, caddr_t ta_buf)
451193326Sed{
452193326Sed	caddr_t v;
453193326Sed	struct tentry_info *ptei;
454198092Srdivacky	size_t ta_buf_sz;
455198092Srdivacky	int i;
456193326Sed
457193326Sed	ta_buf_sz = ta->ta_buf_size;
458210299Sed
459210299Sed	/* Run cleaning callback anyway */
460193326Sed	v = ta_buf_m;
461193326Sed	for (i = 0; i < count; i++, v += ta_buf_sz) {
462212904Sdim		ptei = &tei[i];
463198092Srdivacky		ta->flush_entry(ch, ptei, v);
464198092Srdivacky		if (ptei->ptv != NULL) {
465193326Sed			free(ptei->ptv, M_IPFW);
466193326Sed			ptei->ptv = NULL;
467193326Sed		}
468234353Sdim	}
469193326Sed
470193326Sed	/* Clean up "deleted" state in case of rollback */
471193326Sed	if (rollback != 0) {
472193326Sed		v = ta_buf_m + count * ta_buf_sz;
473223017Sdim		for (i = 0; i < count; i++, v += ta_buf_sz)
474223017Sdim			ta->flush_entry(ch, &tei[i], v);
475234353Sdim	}
476234353Sdim
477223017Sdim	if (ta_buf_m != ta_buf)
478234353Sdim		free(ta_buf_m, M_TEMP);
479223017Sdim}
480198092Srdivacky
481198092Srdivacky
482221345Sdimstatic void
483210299Sedrollback_add_entry(void *object, struct op_state *_state)
484221345Sdim{
485210299Sed	struct ip_fw_chain *ch;
486221345Sdim	struct tableop_state *ts;
487193326Sed
488221345Sdim	ts = (struct tableop_state *)_state;
489193326Sed
490221345Sdim	if (ts->tc != object && ts->ch != object)
491193326Sed		return;
492221345Sdim
493212904Sdim	ch = ts->ch;
494212904Sdim
495221345Sdim	IPFW_UH_WLOCK_ASSERT(ch);
496198092Srdivacky
497198092Srdivacky	/* Call specifid unlockers */
498221345Sdim	rollback_table_values(ts);
499193326Sed
500221345Sdim	/* Indicate we've called */
501198092Srdivacky	ts->modified = 1;
502198092Srdivacky}
503210299Sed
504221345Sdim/*
505193326Sed * Adds/updates one or more entries in table @ti.
506193326Sed *
507193326Sed * Function may drop/reacquire UH wlock multiple times due to
508193326Sed * items alloc, algorithm callbacks (check_space), value linkage
509221345Sdim * (new values, value storage realloc), etc..
510193326Sed * Other processes like other adds (which may involve storage resize),
511193326Sed * table swaps (which changes table data and may change algo type),
512224145Sdim * table modify (which may change value mask) may be executed
513221345Sdim * simultaneously so we need to deal with it.
514218893Sdim *
515212904Sdim * The following approach was implemented:
516212904Sdim * we have per-chain linked list, protected with UH lock.
517223017Sdim * add_table_entry prepares special on-stack structure wthich is passed
518223017Sdim * to its descendants. Users add this structure to this list before unlock.
519223017Sdim * After performing needed operations and acquiring UH lock back, each user
520223017Sdim * checks if structure has changed. If true, it rolls local state back and
521193326Sed * returns without error to the caller.
522221345Sdim * add_table_entry() on its own checks if structure has changed and restarts
523221345Sdim * its operation from the beginning (goto restart).
524221345Sdim *
525221345Sdim * Functions which are modifying fields of interest (currently
526221345Sdim *   resize_shared_value_storage() and swap_tables() )
527221345Sdim * traverses given list while holding UH lock immediately before
528221345Sdim * performing their operations calling function provided be list entry
529221345Sdim * ( currently rollback_add_entry  ) which performs rollback for all necessary
530221345Sdim * state and sets appropriate values in structure indicating rollback
531221345Sdim * has happened.
532221345Sdim *
533221345Sdim * Algo interaction:
534221345Sdim * Function references @ti first to ensure table won't
535221345Sdim * disappear or change its type.
536221345Sdim * After that, prepare_add callback is called for each @tei entry.
537221345Sdim * Next, we try to add each entry under UH+WHLOCK
538221345Sdim * using add() callback.
539221345Sdim * Finally, we free all state by calling flush_entry callback
540221345Sdim * for each @tei.
541221345Sdim *
542221345Sdim * Returns 0 on success.
543221345Sdim */
544228379Sdimint
545234353Sdimadd_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
546234353Sdim    struct tentry_info *tei, uint8_t flags, uint32_t count)
547228379Sdim{
548228379Sdim	struct table_config *tc;
549228379Sdim	struct table_algo *ta;
550221345Sdim	uint16_t kidx;
551221345Sdim	int error, first_error, i, rollback;
552221345Sdim	uint32_t num, numadd;
553221345Sdim	struct tentry_info *ptei;
554221345Sdim	struct tableop_state ts;
555221345Sdim	char ta_buf[TA_BUF_SZ];
556221345Sdim	caddr_t ta_buf_m, v;
557224145Sdim
558221345Sdim	memset(&ts, 0, sizeof(ts));
559221345Sdim	ta = NULL;
560221345Sdim	IPFW_UH_WLOCK(ch);
561221345Sdim
562221345Sdim	/*
563221345Sdim	 * Find and reference existing table.
564221345Sdim	 */
565221345Sdimrestart:
566221345Sdim	if (ts.modified != 0) {
567221345Sdim		IPFW_UH_WUNLOCK(ch);
568221345Sdim		flush_batch_buffer(ch, ta, tei, count, rollback,
569221345Sdim		    ta_buf_m, ta_buf);
570221345Sdim		memset(&ts, 0, sizeof(ts));
571221345Sdim		ta = NULL;
572221345Sdim		IPFW_UH_WLOCK(ch);
573221345Sdim	}
574221345Sdim
575221345Sdim	error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
576221345Sdim	if (error != 0) {
577221345Sdim		IPFW_UH_WUNLOCK(ch);
578221345Sdim		return (error);
579221345Sdim	}
580221345Sdim	ta = tc->ta;
581221345Sdim
582221345Sdim	/* Fill in tablestate */
583221345Sdim	ts.ch = ch;
584221345Sdim	ts.opstate.func = rollback_add_entry;
585221345Sdim	ts.tc = tc;
586221345Sdim	ts.vshared = tc->vshared;
587221345Sdim	ts.vmask = tc->vmask;
588221345Sdim	ts.ta = ta;
589221345Sdim	ts.tei = tei;
590221345Sdim	ts.count = count;
591221345Sdim	rollback = 0;
592221345Sdim	add_toperation_state(ch, &ts);
593221345Sdim	IPFW_UH_WUNLOCK(ch);
594221345Sdim
595221345Sdim	/* Allocate memory and prepare record(s) */
596221345Sdim	/* Pass stack buffer by default */
597221345Sdim	ta_buf_m = ta_buf;
598221345Sdim	error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
599221345Sdim	if (error != 0)
600221345Sdim		goto cleanup;
601221345Sdim
602221345Sdim	IPFW_UH_WLOCK(ch);
603221345Sdim	/* Drop reference we've used in first search */
604228379Sdim	tc->no.refcnt--;
605234353Sdim
606234353Sdim	/*
607234353Sdim	 * Check if table swap has happened.
608228379Sdim	 * (so table algo might be changed).
609228379Sdim	 * Restart operation to achieve consistent behavior.
610221345Sdim	 */
611221345Sdim	del_toperation_state(ch, &ts);
612221345Sdim	if (ts.modified != 0)
613228379Sdim		goto restart;
614228379Sdim
615221345Sdim	/*
616221345Sdim	 * Link all values values to shared/per-table value array.
617221345Sdim	 *
618221345Sdim	 * May release/reacquire UH_WLOCK.
619221345Sdim	 */
620221345Sdim	error = ipfw_link_table_values(ch, &ts);
621221345Sdim	if (error != 0)
622221345Sdim		goto cleanup;
623221345Sdim	if (ts.modified != 0)
624221345Sdim		goto restart;
625221345Sdim
626221345Sdim	/*
627224145Sdim	 * Ensure we are able to add all entries without additional
628221345Sdim	 * memory allocations. May release/reacquire UH_WLOCK.
629221345Sdim	 */
630221345Sdim	kidx = tc->no.kidx;
631221345Sdim	error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
632221345Sdim	if (error != 0)
633221345Sdim		goto cleanup;
634224145Sdim	if (ts.modified != 0)
635221345Sdim		goto restart;
636221345Sdim
637221345Sdim	/* We've got valid table in @tc. Let's try to add data */
638221345Sdim	kidx = tc->no.kidx;
639221345Sdim	ta = tc->ta;
640221345Sdim	numadd = 0;
641221345Sdim	first_error = 0;
642221345Sdim
643221345Sdim	IPFW_WLOCK(ch);
644234353Sdim
645224145Sdim	v = ta_buf_m;
646221345Sdim	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
647221345Sdim		ptei = &tei[i];
648221345Sdim		num = 0;
649226633Sdim		/* check limit before adding */
650221345Sdim		if ((error = check_table_limit(tc, ptei)) == 0) {
651221345Sdim			error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
652221345Sdim			    ptei, v, &num);
653221345Sdim			/* Set status flag to inform userland */
654221345Sdim			store_tei_result(ptei, OP_ADD, error, num);
655221345Sdim		}
656193326Sed		if (error == 0) {
657193326Sed			/* Update number of records to ease limit checking */
658193326Sed			tc->count += num;
659194613Sed			numadd += num;
660194613Sed			continue;
661194613Sed		}
662198092Srdivacky
663194613Sed		if (first_error == 0)
664194613Sed			first_error = error;
665198092Srdivacky
666198092Srdivacky		/*
667194613Sed		 * Some error have happened. Check our atomicity
668194613Sed		 * settings: continue if atomicity is not required,
669194613Sed		 * rollback changes otherwise.
670198092Srdivacky		 */
671198092Srdivacky		if ((flags & IPFW_CTF_ATOMIC) == 0)
672194613Sed			continue;
673198092Srdivacky
674198092Srdivacky		rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
675194613Sed		    tei, ta_buf_m, count, i);
676193326Sed
677226633Sdim		rollback = 1;
678226633Sdim		break;
679221345Sdim	}
680221345Sdim
681202379Srdivacky	IPFW_WUNLOCK(ch);
682202379Srdivacky
683226633Sdim	ipfw_garbage_table_values(ch, tc, tei, count, rollback);
684221345Sdim
685193326Sed	/* Permit post-add algorithm grow/rehash. */
686193326Sed	if (numadd != 0)
687221345Sdim		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
688221345Sdim
689234353Sdim	/* Return first error to user, if any */
690234353Sdim	error = first_error;
691234353Sdim
692207619Srdivackycleanup:
693207619Srdivacky	IPFW_UH_WUNLOCK(ch);
694207619Srdivacky
695221345Sdim	flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
696193326Sed
697221345Sdim	return (error);
698193326Sed}
699221345Sdim
700218893Sdim/*
701221345Sdim * Deletes one or more entries in table @ti.
702218893Sdim *
703221345Sdim * Returns 0 on success.
704218893Sdim */
705198092Srdivackyint
706198092Srdivackydel_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
707198092Srdivacky    struct tentry_info *tei, uint8_t flags, uint32_t count)
708198092Srdivacky{
709198092Srdivacky	struct table_config *tc;
710193326Sed	struct table_algo *ta;
711193326Sed	struct tentry_info *ptei;
712198092Srdivacky	uint16_t kidx;
713193326Sed	int error, first_error, i;
714202879Srdivacky	uint32_t num, numdel;
715202879Srdivacky	char ta_buf[TA_BUF_SZ];
716202879Srdivacky	caddr_t ta_buf_m, v;
717202879Srdivacky
718202879Srdivacky	/*
719202879Srdivacky	 * Find and reference existing table.
720193326Sed	 */
721193326Sed	IPFW_UH_WLOCK(ch);
722193326Sed	error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
723198092Srdivacky	if (error != 0) {
724226633Sdim		IPFW_UH_WUNLOCK(ch);
725226633Sdim		return (error);
726226633Sdim	}
727226633Sdim	ta = tc->ta;
728226633Sdim	IPFW_UH_WUNLOCK(ch);
729226633Sdim
730193326Sed	/* Allocate memory and prepare record(s) */
731221345Sdim	/* Pass stack buffer by default */
732221345Sdim	ta_buf_m = ta_buf;
733221345Sdim	error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
734221345Sdim	if (error != 0)
735221345Sdim		goto cleanup;
736221345Sdim
737202879Srdivacky	IPFW_UH_WLOCK(ch);
738202879Srdivacky
739203955Srdivacky	/* Drop reference we've used in first search */
740198092Srdivacky	tc->no.refcnt--;
741224145Sdim
742221345Sdim	/*
743221345Sdim	 * Check if table algo is still the same.
744224145Sdim	 * (changed ta may be the result of table swap).
745221345Sdim	 */
746221345Sdim	if (ta != tc->ta) {
747221345Sdim		IPFW_UH_WUNLOCK(ch);
748221345Sdim		error = EINVAL;
749221345Sdim		goto cleanup;
750221345Sdim	}
751221345Sdim
752221345Sdim	kidx = tc->no.kidx;
753221345Sdim	numdel = 0;
754221345Sdim	first_error = 0;
755221345Sdim
756221345Sdim	IPFW_WLOCK(ch);
757221345Sdim	v = ta_buf_m;
758221345Sdim	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
759221345Sdim		ptei = &tei[i];
760221345Sdim		num = 0;
761221345Sdim		error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
762221345Sdim		    &num);
763221345Sdim		/* Save state for userland */
764221345Sdim		store_tei_result(ptei, OP_DEL, error, num);
765221345Sdim		if (error != 0 && first_error == 0)
766221345Sdim			first_error = error;
767221345Sdim		tc->count -= num;
768221345Sdim		numdel += num;
769221345Sdim	}
770221345Sdim	IPFW_WUNLOCK(ch);
771193326Sed
772224145Sdim	/* Unlink non-used values */
773221345Sdim	ipfw_garbage_table_values(ch, tc, tei, count, 0);
774221345Sdim
775203955Srdivacky	if (numdel != 0) {
776234353Sdim		/* Run post-del hook to permit shrinking */
777226633Sdim		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
778224145Sdim	}
779224145Sdim
780221345Sdim	IPFW_UH_WUNLOCK(ch);
781226633Sdim
782223017Sdim	/* Return first error to user, if any */
783223017Sdim	error = first_error;
784223017Sdim
785203955Srdivackycleanup:
786203955Srdivacky	flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
787203955Srdivacky
788203955Srdivacky	return (error);
789203955Srdivacky}
790203955Srdivacky
791203955Srdivacky/*
792203955Srdivacky * Ensure that table @tc has enough space to add @count entries without
793221345Sdim * need for reallocation.
794221345Sdim *
795203955Srdivacky * Callbacks order:
796221345Sdim * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
797221345Sdim *
798203955Srdivacky * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
799221345Sdim * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
800221345Sdim * 3) modify (UH_WLOCK + WLOCK) - switch pointers
801203955Srdivacky * 4) flush_modify (UH_WLOCK) - free state, if needed
802221345Sdim *
803203955Srdivacky * Returns 0 on success.
804221345Sdim */
805203955Srdivackystatic int
806221345Sdimcheck_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
807221345Sdim    struct table_config *tc, struct table_info *ti, uint32_t count)
808203955Srdivacky{
809193326Sed	struct table_algo *ta;
810198092Srdivacky	uint64_t pflags;
811224145Sdim	char ta_buf[TA_BUF_SZ];
812224145Sdim	int error;
813224145Sdim
814224145Sdim	IPFW_UH_WLOCK_ASSERT(ch);
815202379Srdivacky
816202379Srdivacky	error = 0;
817202379Srdivacky	ta = tc->ta;
818202379Srdivacky	if (ta->need_modify == NULL)
819224145Sdim		return (0);
820224145Sdim
821224145Sdim	/* Acquire reference not to loose @tc between locks/unlocks */
822224145Sdim	tc->no.refcnt++;
823224145Sdim
824193326Sed	/*
825193326Sed	 * TODO: think about avoiding race between large add/large delete
826193326Sed	 * operation on algorithm which implements shrinking along with
827226633Sdim	 * growing.
828193326Sed	 */
829193326Sed	while (true) {
830193326Sed		pflags = 0;
831193326Sed		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
832224145Sdim			error = 0;
833224145Sdim			break;
834224145Sdim		}
835224145Sdim
836224145Sdim		/* We have to shrink/grow table */
837224145Sdim		if (ts != NULL)
838224145Sdim			add_toperation_state(ch, ts);
839224145Sdim		IPFW_UH_WUNLOCK(ch);
840224145Sdim
841224145Sdim		memset(&ta_buf, 0, sizeof(ta_buf));
842210299Sed		error = ta->prepare_mod(ta_buf, &pflags);
843221345Sdim
844221345Sdim		IPFW_UH_WLOCK(ch);
845226633Sdim		if (ts != NULL)
846221345Sdim			del_toperation_state(ch, ts);
847221345Sdim
848221345Sdim		if (error != 0)
849226633Sdim			break;
850221345Sdim
851221345Sdim		if (ts != NULL && ts->modified != 0) {
852221345Sdim
853221345Sdim			/*
854221345Sdim			 * Swap operation has happened
855221345Sdim			 * so we're currently operating on other
856221345Sdim			 * table data. Stop doing this.
857221345Sdim			 */
858221345Sdim			ta->flush_mod(ta_buf);
859221345Sdim			break;
860221345Sdim		}
861221345Sdim
862221345Sdim		/* Check if we still need to alter table */
863221345Sdim		ti = KIDX_TO_TI(ch, tc->no.kidx);
864221345Sdim		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
865221345Sdim			IPFW_UH_WUNLOCK(ch);
866221345Sdim
867221345Sdim			/*
868221345Sdim			 * Other thread has already performed resize.
869221345Sdim			 * Flush our state and return.
870221345Sdim			 */
871221345Sdim			ta->flush_mod(ta_buf);
872221345Sdim			break;
873210299Sed		}
874221345Sdim
875193326Sed		error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
876193326Sed		if (error == 0) {
877193326Sed			/* Do actual modification */
878193326Sed			IPFW_WLOCK(ch);
879193326Sed			ta->modify(tc->astate, ti, ta_buf, pflags);
880193326Sed			IPFW_WUNLOCK(ch);
881221345Sdim		}
882221345Sdim
883193326Sed		/* Anyway, flush data and retry */
884221345Sdim		ta->flush_mod(ta_buf);
885234353Sdim	}
886221345Sdim
887221345Sdim	tc->no.refcnt--;
888221345Sdim	return (error);
889221345Sdim}
890221345Sdim
891221345Sdim/*
892234353Sdim * Adds or deletes record in table.
893221345Sdim * Data layout (v0):
894221345Sdim * Request: [ ip_fw3_opheader ipfw_table_xentry ]
895221345Sdim *
896221345Sdim * Returns 0 on success
897221345Sdim */
898221345Sdimstatic int
899221345Sdimmanage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
900221345Sdim    struct sockopt_data *sd)
901221345Sdim{
902221345Sdim	ipfw_table_xentry *xent;
903221345Sdim	struct tentry_info tei;
904221345Sdim	struct tid_info ti;
905221345Sdim	struct table_value v;
906221345Sdim	int error, hdrlen, read;
907193326Sed
908221345Sdim	hdrlen = offsetof(ipfw_table_xentry, k);
909221345Sdim
910193326Sed	/* Check minimum header size */
911221345Sdim	if (sd->valsize < (sizeof(*op3) + hdrlen))
912221345Sdim		return (EINVAL);
913221345Sdim
914221345Sdim	read = sizeof(ip_fw3_opheader);
915221345Sdim
916221345Sdim	/* Check if xentry len field is valid */
917221345Sdim	xent = (ipfw_table_xentry *)(op3 + 1);
918221345Sdim	if (xent->len < hdrlen || xent->len + read > sd->valsize)
919221345Sdim		return (EINVAL);
920221345Sdim
921221345Sdim	memset(&tei, 0, sizeof(tei));
922221345Sdim	tei.paddr = &xent->k;
923221345Sdim	tei.masklen = xent->masklen;
924193326Sed	ipfw_import_table_value_legacy(xent->value, &v);
925221345Sdim	tei.pvalue = &v;
926221345Sdim	/* Old requests compability */
927221345Sdim	tei.flags = TEI_FLAGS_COMPAT;
928221345Sdim	if (xent->type == IPFW_TABLE_ADDR) {
929221345Sdim		if (xent->len - hdrlen == sizeof(in_addr_t))
930221345Sdim			tei.subtype = AF_INET;
931193326Sed		else
932221345Sdim			tei.subtype = AF_INET6;
933212904Sdim	}
934221345Sdim
935221345Sdim	memset(&ti, 0, sizeof(ti));
936234353Sdim	ti.uidx = xent->tbl;
937234353Sdim	ti.type = xent->type;
938221345Sdim
939221345Sdim	error = (op3->opcode == IP_FW_TABLE_XADD) ?
940221345Sdim	    add_table_entry(ch, &ti, &tei, 0, 1) :
941221345Sdim	    del_table_entry(ch, &ti, &tei, 0, 1);
942221345Sdim
943221345Sdim	return (error);
944221345Sdim}
945221345Sdim
946221345Sdim/*
947221345Sdim * Adds or deletes record in table.
948221345Sdim * Data layout (v1)(current):
949198092Srdivacky * Request: [ ipfw_obj_header
950221345Sdim *   ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
951226633Sdim * ]
952221345Sdim *
953198092Srdivacky * Returns 0 on success
954221345Sdim */
955221345Sdimstatic int
956198092Srdivackymanage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
957221345Sdim    struct sockopt_data *sd)
958221345Sdim{
959221345Sdim	ipfw_obj_tentry *tent, *ptent;
960221345Sdim	ipfw_obj_ctlv *ctlv;
961224145Sdim	ipfw_obj_header *oh;
962221345Sdim	struct tentry_info *ptei, tei, *tei_buf;
963221345Sdim	struct tid_info ti;
964221345Sdim	int error, i, kidx, read;
965193326Sed
966193326Sed	/* Check minimum header size */
967193326Sed	if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
968202879Srdivacky		return (EINVAL);
969203955Srdivacky
970212904Sdim	/* Check if passed data is too long */
971202879Srdivacky	if (sd->valsize != sd->kavail)
972203955Srdivacky		return (EINVAL);
973203955Srdivacky
974203955Srdivacky	oh = (ipfw_obj_header *)sd->kbuf;
975203955Srdivacky
976203955Srdivacky	/* Basic length checks for TLVs */
977234353Sdim	if (oh->ntlv.head.length != sizeof(oh->ntlv))
978234353Sdim		return (EINVAL);
979234353Sdim
980234353Sdim	read = sizeof(*oh);
981234353Sdim
982234353Sdim	ctlv = (ipfw_obj_ctlv *)(oh + 1);
983234353Sdim	if (ctlv->head.length + read != sd->valsize)
984234353Sdim		return (EINVAL);
985234353Sdim
986234353Sdim	read += sizeof(*ctlv);
987234353Sdim	tent = (ipfw_obj_tentry *)(ctlv + 1);
988234353Sdim	if (ctlv->count * sizeof(*tent) + read != sd->valsize)
989234353Sdim		return (EINVAL);
990234353Sdim
991234353Sdim	if (ctlv->count == 0)
992193326Sed		return (0);
993234353Sdim
994193326Sed	/*
995193326Sed	 * Mark entire buffer as "read".
996193326Sed	 * This instructs sopt api write it back
997234353Sdim	 * after function return.
998193326Sed	 */
999203955Srdivacky	ipfw_get_sopt_header(sd, sd->valsize);
1000203955Srdivacky
1001203955Srdivacky	/* Perform basic checks for each entry */
1002193326Sed	ptent = tent;
1003193326Sed	kidx = tent->idx;
1004193326Sed	for (i = 0; i < ctlv->count; i++, ptent++) {
1005193326Sed		if (ptent->head.length != sizeof(*ptent))
1006193326Sed			return (EINVAL);
1007193326Sed		if (ptent->idx != kidx)
1008221345Sdim			return (ENOTSUP);
1009221345Sdim	}
1010208600Srdivacky
1011193326Sed	/* Convert data into kernel request objects */
1012193326Sed	objheader_to_ti(oh, &ti);
1013193326Sed	ti.type = oh->ntlv.type;
1014193326Sed	ti.uidx = kidx;
1015193326Sed
1016193326Sed	/* Use on-stack buffer for single add/del */
1017198092Srdivacky	if (ctlv->count == 1) {
1018198092Srdivacky		memset(&tei, 0, sizeof(tei));
1019223017Sdim		tei_buf = &tei;
1020234353Sdim	} else
1021203955Srdivacky		tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
1022223017Sdim		    M_WAITOK | M_ZERO);
1023223017Sdim
1024203955Srdivacky	ptei = tei_buf;
1025203955Srdivacky	ptent = tent;
1026203955Srdivacky	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
1027203955Srdivacky		ptei->paddr = &ptent->k;
1028203955Srdivacky		ptei->subtype = ptent->subtype;
1029193326Sed		ptei->masklen = ptent->masklen;
1030207619Srdivacky		if (ptent->head.flags & IPFW_TF_UPDATE)
1031207619Srdivacky			ptei->flags |= TEI_FLAGS_UPDATE;
1032207619Srdivacky
1033193326Sed		ipfw_import_table_value_v1(&ptent->v.value);
1034193326Sed		ptei->pvalue = (struct table_value *)&ptent->v.value;
1035221345Sdim	}
1036221345Sdim
1037224145Sdim	error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
1038193326Sed	    add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
1039234353Sdim	    del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
1040193326Sed
1041193326Sed	/* Translate result back to userland */
1042193326Sed	ptei = tei_buf;
1043193326Sed	ptent = tent;
1044193326Sed	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
1045193326Sed		if (ptei->flags & TEI_FLAGS_ADDED)
1046226633Sdim			ptent->result = IPFW_TR_ADDED;
1047193326Sed		else if (ptei->flags & TEI_FLAGS_DELETED)
1048193326Sed			ptent->result = IPFW_TR_DELETED;
1049226633Sdim		else if (ptei->flags & TEI_FLAGS_UPDATED)
1050193326Sed			ptent->result = IPFW_TR_UPDATED;
1051207619Srdivacky		else if (ptei->flags & TEI_FLAGS_LIMIT)
1052193326Sed			ptent->result = IPFW_TR_LIMIT;
1053193326Sed		else if (ptei->flags & TEI_FLAGS_ERROR)
1054193326Sed			ptent->result = IPFW_TR_ERROR;
1055193326Sed		else if (ptei->flags & TEI_FLAGS_NOTFOUND)
1056198092Srdivacky			ptent->result = IPFW_TR_NOTFOUND;
1057198092Srdivacky		else if (ptei->flags & TEI_FLAGS_EXISTS)
1058193326Sed			ptent->result = IPFW_TR_EXISTS;
1059193326Sed		ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
1060193326Sed	}
1061193326Sed
1062193326Sed	if (tei_buf != &tei)
1063193326Sed		free(tei_buf, M_TEMP);
1064193326Sed
1065193326Sed	return (error);
1066193326Sed}
1067193326Sed
1068193326Sed/*
1069193326Sed * Looks up an entry in given table.
1070193326Sed * Data layout (v0)(current):
1071193326Sed * Request: [ ipfw_obj_header ipfw_obj_tentry ]
1072193326Sed * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
1073193326Sed *
1074193326Sed * Returns 0 on success
1075193326Sed */
1076207619Srdivackystatic int
1077198092Srdivackyfind_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1078224145Sdim    struct sockopt_data *sd)
1079193326Sed{
1080207619Srdivacky	ipfw_obj_tentry *tent;
1081193326Sed	ipfw_obj_header *oh;
1082207619Srdivacky	struct tid_info ti;
1083193326Sed	struct table_config *tc;
1084224145Sdim	struct table_algo *ta;
1085198092Srdivacky	struct table_info *kti;
1086207619Srdivacky	struct namedobj_instance *ni;
1087193326Sed	int error;
1088207619Srdivacky	size_t sz;
1089207619Srdivacky
1090193326Sed	/* Check minimum header size */
1091221345Sdim	sz = sizeof(*oh) + sizeof(*tent);
1092221345Sdim	if (sd->valsize != sz)
1093193326Sed		return (EINVAL);
1094221345Sdim
1095234353Sdim	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1096207619Srdivacky	tent = (ipfw_obj_tentry *)(oh + 1);
1097207619Srdivacky
1098207619Srdivacky	/* Basic length checks for TLVs */
1099207619Srdivacky	if (oh->ntlv.head.length != sizeof(oh->ntlv))
1100207619Srdivacky		return (EINVAL);
1101207619Srdivacky
1102221345Sdim	objheader_to_ti(oh, &ti);
1103193326Sed	ti.type = oh->ntlv.type;
1104207619Srdivacky	ti.uidx = tent->idx;
1105234353Sdim
1106207619Srdivacky	IPFW_UH_RLOCK(ch);
1107207619Srdivacky	ni = CHAIN_TO_NI(ch);
1108193326Sed
1109193326Sed	/*
1110198092Srdivacky	 * Find existing table and check its type .
1111221345Sdim	 */
1112221345Sdim	ta = NULL;
1113208600Srdivacky	if ((tc = find_table(ni, &ti)) == NULL) {
1114193326Sed		IPFW_UH_RUNLOCK(ch);
1115193326Sed		return (ESRCH);
1116193326Sed	}
1117193326Sed
1118207619Srdivacky	/* check table type */
1119193326Sed	if (tc->no.type != ti.type) {
1120223017Sdim		IPFW_UH_RUNLOCK(ch);
1121223017Sdim		return (EINVAL);
1122207619Srdivacky	}
1123234353Sdim
1124203955Srdivacky	kti = KIDX_TO_TI(ch, tc->no.kidx);
1125223017Sdim	ta = tc->ta;
1126223017Sdim
1127203955Srdivacky	if (ta->find_tentry == NULL)
1128203955Srdivacky		return (ENOTSUP);
1129203955Srdivacky
1130203955Srdivacky	error = ta->find_tentry(tc->astate, kti, tent);
1131203955Srdivacky
1132207619Srdivacky	IPFW_UH_RUNLOCK(ch);
1133207619Srdivacky
1134207619Srdivacky	return (error);
1135207619Srdivacky}
1136207619Srdivacky
1137207619Srdivacky/*
1138207619Srdivacky * Flushes all entries or destroys given table.
1139207619Srdivacky * Data layout (v0)(current):
1140207619Srdivacky * Request: [ ipfw_obj_header ]
1141207619Srdivacky *
1142221345Sdim * Returns 0 on success
1143221345Sdim */
1144221345Sdimstatic int
1145221345Sdimflush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1146207619Srdivacky    struct sockopt_data *sd)
1147207619Srdivacky{
1148207619Srdivacky	int error;
1149208600Srdivacky	struct _ipfw_obj_header *oh;
1150207619Srdivacky	struct tid_info ti;
1151207619Srdivacky
1152207619Srdivacky	if (sd->valsize != sizeof(*oh))
1153207619Srdivacky		return (EINVAL);
1154208600Srdivacky
1155207619Srdivacky	oh = (struct _ipfw_obj_header *)op3;
1156207619Srdivacky	objheader_to_ti(oh, &ti);
1157207619Srdivacky
1158208600Srdivacky	if (op3->opcode == IP_FW_TABLE_XDESTROY)
1159207619Srdivacky		error = destroy_table(ch, &ti);
1160207619Srdivacky	else if (op3->opcode == IP_FW_TABLE_XFLUSH)
1161207619Srdivacky		error = flush_table(ch, &ti);
1162198092Srdivacky	else
1163193326Sed		return (ENOTSUP);
1164193326Sed
1165198092Srdivacky	return (error);
1166193326Sed}
1167198092Srdivacky
1168221345Sdimstatic void
1169221345Sdimrestart_flush(void *object, struct op_state *_state)
1170221345Sdim{
1171221345Sdim	struct tableop_state *ts;
1172221345Sdim
1173221345Sdim	ts = (struct tableop_state *)_state;
1174221345Sdim
1175221345Sdim	if (ts->tc != object)
1176221345Sdim		return;
1177221345Sdim
1178193326Sed	/* Indicate we've called */
1179221345Sdim	ts->modified = 1;
1180221345Sdim}
1181224145Sdim
1182193326Sed/*
1183234353Sdim * Flushes given table.
1184193326Sed *
1185234353Sdim * Function create new table instance with the same
1186234353Sdim * parameters, swaps it with old one and
1187207619Srdivacky * flushes state without holding runtime WLOCK.
1188234353Sdim *
1189234353Sdim * Returns 0 on success.
1190234353Sdim */
1191234353Sdimint
1192234353Sdimflush_table(struct ip_fw_chain *ch, struct tid_info *ti)
1193234353Sdim{
1194234353Sdim	struct namedobj_instance *ni;
1195234353Sdim	struct table_config *tc;
1196234353Sdim	struct table_algo *ta;
1197234353Sdim	struct table_info ti_old, ti_new, *tablestate;
1198234353Sdim	void *astate_old, *astate_new;
1199234353Sdim	char algostate[64], *pstate;
1200234353Sdim	struct tableop_state ts;
1201234353Sdim	int error;
1202234353Sdim	uint16_t kidx;
1203234353Sdim	uint8_t tflags;
1204234353Sdim
1205234353Sdim	/*
1206234353Sdim	 * Stage 1: save table algoritm.
1207234353Sdim	 * Reference found table to ensure it won't disappear.
1208234353Sdim	 */
1209234353Sdim	IPFW_UH_WLOCK(ch);
1210234353Sdim	ni = CHAIN_TO_NI(ch);
1211234353Sdim	if ((tc = find_table(ni, ti)) == NULL) {
1212234353Sdim		IPFW_UH_WUNLOCK(ch);
1213234353Sdim		return (ESRCH);
1214234353Sdim	}
1215234353Sdimrestart:
1216207619Srdivacky	/* Set up swap handler */
1217234353Sdim	memset(&ts, 0, sizeof(ts));
1218207619Srdivacky	ts.opstate.func = restart_flush;
1219207619Srdivacky	ts.tc = tc;
1220193326Sed
1221208600Srdivacky	ta = tc->ta;
1222207619Srdivacky	/* Do not flush readonly tables */
1223208600Srdivacky	if ((ta->flags & TA_FLAG_READONLY) != 0) {
1224208600Srdivacky		IPFW_UH_WUNLOCK(ch);
1225208600Srdivacky		return (EACCES);
1226207619Srdivacky	}
1227207619Srdivacky	/* Save startup algo parameters */
1228221345Sdim	if (ta->print_config != NULL) {
1229207619Srdivacky		ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
1230207619Srdivacky		    algostate, sizeof(algostate));
1231207619Srdivacky		pstate = algostate;
1232207619Srdivacky	} else
1233207619Srdivacky		pstate = NULL;
1234221345Sdim	tflags = tc->tflags;
1235226633Sdim	tc->no.refcnt++;
1236207619Srdivacky	add_toperation_state(ch, &ts);
1237207619Srdivacky	IPFW_UH_WUNLOCK(ch);
1238207619Srdivacky
1239207619Srdivacky	/*
1240207619Srdivacky	 * Stage 2: allocate new table instance using same algo.
1241207619Srdivacky	 */
1242207619Srdivacky	memset(&ti_new, 0, sizeof(struct table_info));
1243207619Srdivacky	error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
1244207619Srdivacky
1245221345Sdim	/*
1246207619Srdivacky	 * Stage 3: swap old state pointers with newly-allocated ones.
1247207619Srdivacky	 * Decrease refcount.
1248207619Srdivacky	 */
1249221345Sdim	IPFW_UH_WLOCK(ch);
1250207619Srdivacky	tc->no.refcnt--;
1251207619Srdivacky	del_toperation_state(ch, &ts);
1252207619Srdivacky
1253207619Srdivacky	if (error != 0) {
1254207619Srdivacky		IPFW_UH_WUNLOCK(ch);
1255207619Srdivacky		return (error);
1256207619Srdivacky	}
1257193326Sed
1258193326Sed	/*
1259198092Srdivacky	 * Restart operation if table swap has happened:
1260193326Sed	 * even if algo may be the same, algo init parameters
1261234353Sdim	 * may change. Restart operation instead of doing
1262234353Sdim	 * complex checks.
1263234353Sdim	 */
1264234353Sdim	if (ts.modified != 0) {
1265234353Sdim		ta->destroy(astate_new, &ti_new);
1266234353Sdim		goto restart;
1267198092Srdivacky	}
1268198092Srdivacky
1269198092Srdivacky	ni = CHAIN_TO_NI(ch);
1270224145Sdim	kidx = tc->no.kidx;
1271193326Sed	tablestate = (struct table_info *)ch->tablestate;
1272193326Sed
1273221345Sdim	IPFW_WLOCK(ch);
1274193326Sed	ti_old = tablestate[kidx];
1275193326Sed	tablestate[kidx] = ti_new;
1276193326Sed	IPFW_WUNLOCK(ch);
1277193326Sed
1278193326Sed	astate_old = tc->astate;
1279221345Sdim	tc->astate = astate_new;
1280193326Sed	tc->ti_copy = ti_new;
1281221345Sdim	tc->count = 0;
1282221345Sdim
1283221345Sdim	/* Notify algo on real @ti address */
1284221345Sdim	if (ta->change_ti != NULL)
1285221345Sdim		ta->change_ti(tc->astate, &tablestate[kidx]);
1286221345Sdim
1287221345Sdim	/*
1288221345Sdim	 * Stage 4: unref values.
1289221345Sdim	 */
1290221345Sdim	ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
1291193326Sed	IPFW_UH_WUNLOCK(ch);
1292193326Sed
1293193326Sed	/*
1294193326Sed	 * Stage 5: perform real flush/destroy.
1295193326Sed	 */
1296193326Sed	ta->destroy(astate_old, &ti_old);
1297193326Sed
1298193326Sed	return (0);
1299193326Sed}
1300226633Sdim
1301224145Sdim/*
1302224145Sdim * Swaps two tables.
1303198092Srdivacky * Data layout (v0)(current):
1304198092Srdivacky * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
1305193326Sed *
1306193326Sed * Returns 0 on success
1307193326Sed */
1308193326Sedstatic int
1309193326Sedswap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1310193326Sed    struct sockopt_data *sd)
1311226633Sdim{
1312193326Sed	int error;
1313198092Srdivacky	struct _ipfw_obj_header *oh;
1314193326Sed	struct tid_info ti_a, ti_b;
1315193326Sed
1316193326Sed	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
1317193326Sed		return (EINVAL);
1318193326Sed
1319234353Sdim	oh = (struct _ipfw_obj_header *)op3;
1320234353Sdim	ntlv_to_ti(&oh->ntlv, &ti_a);
1321234353Sdim	ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
1322234353Sdim
1323199482Srdivacky	error = swap_tables(ch, &ti_a, &ti_b);
1324199482Srdivacky
1325198092Srdivacky	return (error);
1326224145Sdim}
1327193326Sed
1328193326Sed/*
1329193326Sed * Swaps two tables of the same type/valtype.
1330193326Sed *
1331193326Sed * Checks if tables are compatible and limits
1332193326Sed * permits swap, than actually perform swap.
1333193326Sed *
1334193326Sed * Each table consists of 2 different parts:
1335198092Srdivacky * config:
1336198092Srdivacky *   @tc (with name, set, kidx) and rule bindings, which is "stable".
1337193326Sed *   number of items
1338193326Sed *   table algo
1339193326Sed * runtime:
1340193326Sed *   runtime data @ti (ch->tablestate)
1341193326Sed *   runtime cache in @tc
1342193326Sed *   algo-specific data (@tc->astate)
1343193326Sed *
1344193326Sed * So we switch:
1345198092Srdivacky *  all runtime data
1346193326Sed *   number of items
1347193326Sed *   table algo
1348193326Sed *
1349193326Sed * After that we call @ti change handler for each table.
1350224145Sdim *
1351193326Sed * Note that referencing @tc won't protect tc->ta from change.
1352193326Sed * XXX: Do we need to restrict swap between locked tables?
1353193326Sed * XXX: Do we need to exchange ftype?
1354193326Sed *
1355193326Sed * Returns 0 on success.
1356193326Sed */
1357193326Sedstatic int
1358193326Sedswap_tables(struct ip_fw_chain *ch, struct tid_info *a,
1359193326Sed    struct tid_info *b)
1360193326Sed{
1361193326Sed	struct namedobj_instance *ni;
1362193326Sed	struct table_config *tc_a, *tc_b;
1363193326Sed	struct table_algo *ta;
1364193326Sed	struct table_info ti, *tablestate;
1365193326Sed	void *astate;
1366193326Sed	uint32_t count;
1367193326Sed
1368198092Srdivacky	/*
1369198092Srdivacky	 * Stage 1: find both tables and ensure they are of
1370207619Srdivacky	 * the same type.
1371226633Sdim	 */
1372226633Sdim	IPFW_UH_WLOCK(ch);
1373207619Srdivacky	ni = CHAIN_TO_NI(ch);
1374193326Sed	if ((tc_a = find_table(ni, a)) == NULL) {
1375193326Sed		IPFW_UH_WUNLOCK(ch);
1376193326Sed		return (ESRCH);
1377198092Srdivacky	}
1378198092Srdivacky	if ((tc_b = find_table(ni, b)) == NULL) {
1379198092Srdivacky		IPFW_UH_WUNLOCK(ch);
1380198092Srdivacky		return (ESRCH);
1381224145Sdim	}
1382234353Sdim
1383193326Sed	/* It is very easy to swap between the same table */
1384193326Sed	if (tc_a == tc_b) {
1385193326Sed		IPFW_UH_WUNLOCK(ch);
1386193326Sed		return (0);
1387193326Sed	}
1388193326Sed
1389193326Sed	/* Check type and value are the same */
1390198092Srdivacky	if (tc_a->no.type != tc_b->no.type || tc_a->tflags != tc_b->tflags) {
1391193326Sed		IPFW_UH_WUNLOCK(ch);
1392193326Sed		return (EINVAL);
1393193326Sed	}
1394193326Sed
1395193326Sed	/* Check limits before swap */
1396198092Srdivacky	if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
1397198092Srdivacky	    (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
1398198092Srdivacky		IPFW_UH_WUNLOCK(ch);
1399198092Srdivacky		return (EFBIG);
1400228379Sdim	}
1401228379Sdim
1402193326Sed	/* Check if one of the tables is readonly */
1403193326Sed	if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
1404193326Sed		IPFW_UH_WUNLOCK(ch);
1405193326Sed		return (EACCES);
1406193326Sed	}
1407193326Sed
1408193326Sed	/* Notify we're going to swap */
1409193326Sed	rollback_toperation_state(ch, tc_a);
1410193326Sed	rollback_toperation_state(ch, tc_b);
1411219077Sdim
1412219077Sdim	/* Everything is fine, prepare to swap */
1413221345Sdim	tablestate = (struct table_info *)ch->tablestate;
1414221345Sdim	ti = tablestate[tc_a->no.kidx];
1415221345Sdim	ta = tc_a->ta;
1416221345Sdim	astate = tc_a->astate;
1417219077Sdim	count = tc_a->count;
1418219077Sdim
1419193326Sed	IPFW_WLOCK(ch);
1420193326Sed	/* a <- b */
1421198092Srdivacky	tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
1422198092Srdivacky	tc_a->ta = tc_b->ta;
1423198092Srdivacky	tc_a->astate = tc_b->astate;
1424193326Sed	tc_a->count = tc_b->count;
1425198092Srdivacky	/* b <- a */
1426226633Sdim	tablestate[tc_b->no.kidx] = ti;
1427198092Srdivacky	tc_b->ta = ta;
1428198092Srdivacky	tc_b->astate = astate;
1429226633Sdim	tc_b->count = count;
1430226633Sdim	IPFW_WUNLOCK(ch);
1431193326Sed
1432202379Srdivacky	/* Ensure tc.ti copies are in sync */
1433234353Sdim	tc_a->ti_copy = tablestate[tc_a->no.kidx];
1434234353Sdim	tc_b->ti_copy = tablestate[tc_b->no.kidx];
1435234353Sdim
1436234353Sdim	/* Notify both tables on @ti change */
1437234353Sdim	if (tc_a->ta->change_ti != NULL)
1438234353Sdim		tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
1439234353Sdim	if (tc_b->ta->change_ti != NULL)
1440234353Sdim		tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
1441234353Sdim
1442234353Sdim	IPFW_UH_WUNLOCK(ch);
1443234353Sdim
1444234353Sdim	return (0);
1445234353Sdim}
1446234353Sdim
1447193326Sed/*
1448193326Sed * Destroys table specified by @ti.
1449234353Sdim * Data layout (v0)(current):
1450234353Sdim * Request: [ ip_fw3_opheader ]
1451234353Sdim *
1452198092Srdivacky * Returns 0 on success
1453224145Sdim */
1454193326Sedstatic int
1455193326Seddestroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
1456193326Sed{
1457193326Sed	struct namedobj_instance *ni;
1458193326Sed	struct table_config *tc;
1459193326Sed
1460193326Sed	IPFW_UH_WLOCK(ch);
1461198092Srdivacky
1462198092Srdivacky	ni = CHAIN_TO_NI(ch);
1463193326Sed	if ((tc = find_table(ni, ti)) == NULL) {
1464193326Sed		IPFW_UH_WUNLOCK(ch);
1465193326Sed		return (ESRCH);
1466193326Sed	}
1467198092Srdivacky
1468198092Srdivacky	/* Do not permit destroying referenced tables */
1469224145Sdim	if (tc->no.refcnt > 0) {
1470193326Sed		IPFW_UH_WUNLOCK(ch);
1471193326Sed		return (EBUSY);
1472193326Sed	}
1473193326Sed
1474193326Sed	IPFW_WLOCK(ch);
1475193326Sed	unlink_table(ch, tc);
1476198092Srdivacky	IPFW_WUNLOCK(ch);
1477193326Sed
1478234353Sdim	/* Free obj index */
1479193326Sed	if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
1480193326Sed		printf("Error unlinking kidx %d from table %s\n",
1481224145Sdim		    tc->no.kidx, tc->tablename);
1482193326Sed
1483218893Sdim	/* Unref values used in tables while holding UH lock */
1484193326Sed	ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
1485193326Sed	IPFW_UH_WUNLOCK(ch);
1486198092Srdivacky
1487193326Sed	free_table_config(ni, tc);
1488193326Sed
1489199990Srdivacky	return (0);
1490199990Srdivacky}
1491199990Srdivacky
1492199990Srdivackystatic uint32_t
1493193326Sedroundup2p(uint32_t v)
1494199990Srdivacky{
1495199990Srdivacky
1496199990Srdivacky	v--;
1497198092Srdivacky	v |= v >> 1;
1498198092Srdivacky	v |= v >> 2;
1499193326Sed	v |= v >> 4;
1500193326Sed	v |= v >> 8;
1501193326Sed	v |= v >> 16;
1502193326Sed	v++;
1503193326Sed
1504193326Sed	return (v);
1505193326Sed}
1506193326Sed
1507193326Sed/*
1508193326Sed * Grow tables index.
1509193326Sed *
1510198092Srdivacky * Returns 0 on success.
1511193326Sed */
1512193326Sedint
1513226633Sdimipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
1514193326Sed{
1515193326Sed	unsigned int ntables_old, tbl;
1516193326Sed	struct namedobj_instance *ni;
1517193326Sed	void *new_idx, *old_tablestate, *tablestate;
1518193326Sed	struct table_info *ti;
1519193326Sed	struct table_config *tc;
1520226633Sdim	int i, new_blocks;
1521226633Sdim
1522193326Sed	/* Check new value for validity */
1523193326Sed	if (ntables == 0)
1524198092Srdivacky		return (EINVAL);
1525193326Sed	if (ntables > IPFW_TABLES_MAX)
1526193326Sed		ntables = IPFW_TABLES_MAX;
1527193326Sed	/* Alight to nearest power of 2 */
1528224145Sdim	ntables = (unsigned int)roundup2p(ntables);
1529193326Sed
1530193326Sed	/* Allocate new pointers */
1531198092Srdivacky	tablestate = malloc(ntables * sizeof(struct table_info),
1532198092Srdivacky	    M_IPFW, M_WAITOK | M_ZERO);
1533198092Srdivacky
1534198092Srdivacky	ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
1535193326Sed
1536198092Srdivacky	IPFW_UH_WLOCK(ch);
1537193326Sed
1538193326Sed	tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
1539193326Sed	ni = CHAIN_TO_NI(ch);
1540226633Sdim
1541193326Sed	/* Temporary restrict decreasing max_tables */
1542193326Sed	if (ntables < V_fw_tables_max) {
1543198092Srdivacky
1544198092Srdivacky		/*
1545198092Srdivacky		 * FIXME: Check if we really can shrink
1546198092Srdivacky		 */
1547193326Sed		IPFW_UH_WUNLOCK(ch);
1548193326Sed		return (EINVAL);
1549193326Sed	}
1550193326Sed
1551193326Sed	/* Copy table info/indices */
1552193326Sed	memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
1553234353Sdim	ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
1554234353Sdim
1555234353Sdim	IPFW_WLOCK(ch);
1556234353Sdim
1557234353Sdim	/* Change pointers */
1558226633Sdim	old_tablestate = ch->tablestate;
1559193326Sed	ch->tablestate = tablestate;
1560193326Sed	ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
1561193326Sed
1562226633Sdim	ntables_old = V_fw_tables_max;
1563226633Sdim	V_fw_tables_max = ntables;
1564226633Sdim
1565226633Sdim	IPFW_WUNLOCK(ch);
1566195341Sed
1567195341Sed	/* Notify all consumers that their @ti pointer has changed */
1568193326Sed	ti = (struct table_info *)ch->tablestate;
1569193326Sed	for (i = 0; i < tbl; i++, ti++) {
1570198092Srdivacky		if (ti->lookup == NULL)
1571198092Srdivacky			continue;
1572198092Srdivacky		tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
1573198092Srdivacky		if (tc == NULL || tc->ta->change_ti == NULL)
1574198092Srdivacky			continue;
1575198092Srdivacky
1576198092Srdivacky		tc->ta->change_ti(tc->astate, ti);
1577198092Srdivacky	}
1578198092Srdivacky
1579193326Sed	IPFW_UH_WUNLOCK(ch);
1580193326Sed
1581226633Sdim	/* Free old pointers */
1582226633Sdim	free(old_tablestate, M_IPFW);
1583226633Sdim	ipfw_objhash_bitmap_free(new_idx, new_blocks);
1584226633Sdim
1585198092Srdivacky	return (0);
1586195341Sed}
1587195341Sed
1588193326Sed/*
1589193326Sed * Switch between "set 0" and "rule's set" table binding,
1590198092Srdivacky * Check all ruleset bindings and permits changing
1591198092Srdivacky * IFF each binding has both rule AND table in default set (set 0).
1592198092Srdivacky *
1593198092Srdivacky * Returns 0 on success.
1594198092Srdivacky */
1595198092Srdivackyint
1596198092Srdivackyipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
1597198092Srdivacky{
1598198092Srdivacky	struct namedobj_instance *ni;
1599193326Sed	struct named_object *no;
1600193326Sed	struct ip_fw *rule;
1601193326Sed	ipfw_insn *cmd;
1602193326Sed	int cmdlen, i, l;
1603193326Sed	uint16_t kidx;
1604193326Sed	uint8_t type;
1605193326Sed
1606198092Srdivacky	IPFW_UH_WLOCK(ch);
1607198092Srdivacky
1608198092Srdivacky	if (V_fw_tables_sets == sets) {
1609198092Srdivacky		IPFW_UH_WUNLOCK(ch);
1610198092Srdivacky		return (0);
1611198092Srdivacky	}
1612198092Srdivacky
1613198092Srdivacky	ni = CHAIN_TO_NI(ch);
1614198092Srdivacky
1615198092Srdivacky	/*
1616198092Srdivacky	 * Scan all rules and examine tables opcodes.
1617198092Srdivacky	 */
1618224145Sdim	for (i = 0; i < ch->n_rules; i++) {
1619198092Srdivacky		rule = ch->map[i];
1620198092Srdivacky
1621198092Srdivacky		l = rule->cmd_len;
1622198092Srdivacky		cmd = rule->cmd;
1623198092Srdivacky		cmdlen = 0;
1624198092Srdivacky		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
1625198092Srdivacky			cmdlen = F_LEN(cmd);
1626198092Srdivacky
1627198092Srdivacky			if (classify_table_opcode(cmd, &kidx, &type) != 0)
1628198092Srdivacky				continue;
1629198092Srdivacky
1630198092Srdivacky			no = ipfw_objhash_lookup_kidx(ni, kidx);
1631198092Srdivacky
1632198092Srdivacky			/* Check if both table object and rule has the set 0 */
1633198092Srdivacky			if (no->set != 0 || rule->set != 0) {
1634198092Srdivacky				IPFW_UH_WUNLOCK(ch);
1635198092Srdivacky				return (EBUSY);
1636198092Srdivacky			}
1637198092Srdivacky
1638198092Srdivacky		}
1639198092Srdivacky	}
1640198092Srdivacky	V_fw_tables_sets = sets;
1641198092Srdivacky
1642198092Srdivacky	IPFW_UH_WUNLOCK(ch);
1643198092Srdivacky
1644198092Srdivacky	return (0);
1645198092Srdivacky}
1646198092Srdivacky
1647198092Srdivacky/*
1648198092Srdivacky * Lookup an IP @addr in table @tbl.
1649198092Srdivacky * Stores found value in @val.
1650198092Srdivacky *
1651198092Srdivacky * Returns 1 if @addr was found.
1652198092Srdivacky */
1653198092Srdivackyint
1654198092Srdivackyipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
1655198092Srdivacky    uint32_t *val)
1656198092Srdivacky{
1657198092Srdivacky	struct table_info *ti;
1658198092Srdivacky
1659198092Srdivacky	ti = KIDX_TO_TI(ch, tbl);
1660198092Srdivacky
1661198092Srdivacky	return (ti->lookup(ti, &addr, sizeof(in_addr_t), val));
1662198092Srdivacky}
1663198092Srdivacky
1664198092Srdivacky/*
1665198092Srdivacky * Lookup an arbtrary key @paddr of legth @plen in table @tbl.
1666198092Srdivacky * Stores found value in @val.
1667198092Srdivacky *
1668198092Srdivacky * Returns 1 if key was found.
1669198092Srdivacky */
1670224145Sdimint
1671198092Srdivackyipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
1672198092Srdivacky    void *paddr, uint32_t *val)
1673198092Srdivacky{
1674198092Srdivacky	struct table_info *ti;
1675198092Srdivacky
1676198092Srdivacky	ti = KIDX_TO_TI(ch, tbl);
1677198092Srdivacky
1678198092Srdivacky	return (ti->lookup(ti, paddr, plen, val));
1679198092Srdivacky}
1680198092Srdivacky
1681198092Srdivacky/*
1682198092Srdivacky * Info/List/dump support for tables.
1683224145Sdim *
1684198092Srdivacky */
1685198092Srdivacky
1686198092Srdivacky/*
1687198092Srdivacky * High-level 'get' cmds sysctl handlers
1688198092Srdivacky */
1689193326Sed
1690193326Sed/*
1691224145Sdim * Lists all tables currently available in kernel.
1692193326Sed * Data layout (v0)(current):
1693193326Sed * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
1694193326Sed * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
1695193326Sed *
1696198092Srdivacky * Returns 0 on success
1697198092Srdivacky */
1698198092Srdivackystatic int
1699198092Srdivackylist_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1700193326Sed    struct sockopt_data *sd)
1701198092Srdivacky{
1702193326Sed	struct _ipfw_obj_lheader *olh;
1703193326Sed	int error;
1704193326Sed
1705226633Sdim	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
1706193326Sed	if (olh == NULL)
1707193326Sed		return (EINVAL);
1708193326Sed	if (sd->valsize < olh->size)
1709193326Sed		return (EINVAL);
1710198092Srdivacky
1711198092Srdivacky	IPFW_UH_RLOCK(ch);
1712198092Srdivacky	error = export_tables(ch, olh, sd);
1713198092Srdivacky	IPFW_UH_RUNLOCK(ch);
1714198092Srdivacky
1715193326Sed	return (error);
1716193326Sed}
1717193326Sed
1718198092Srdivacky/*
1719198092Srdivacky * Store table info to buffer provided by @sd.
1720226633Sdim * Data layout (v0)(current):
1721226633Sdim * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
1722193326Sed * Reply: [ ipfw_obj_header ipfw_xtable_info ]
1723198092Srdivacky *
1724198092Srdivacky * Returns 0 on success.
1725198092Srdivacky */
1726198092Srdivackystatic int
1727198092Srdivackydescribe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1728198092Srdivacky    struct sockopt_data *sd)
1729198092Srdivacky{
1730198092Srdivacky	struct _ipfw_obj_header *oh;
1731198092Srdivacky	struct table_config *tc;
1732198092Srdivacky	struct tid_info ti;
1733198092Srdivacky	size_t sz;
1734198092Srdivacky
1735198092Srdivacky	sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
1736198092Srdivacky	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
1737224145Sdim	if (oh == NULL)
1738198092Srdivacky		return (EINVAL);
1739218893Sdim
1740198092Srdivacky	objheader_to_ti(oh, &ti);
1741198092Srdivacky
1742198092Srdivacky	IPFW_UH_RLOCK(ch);
1743198092Srdivacky	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
1744198092Srdivacky		IPFW_UH_RUNLOCK(ch);
1745198092Srdivacky		return (ESRCH);
1746198092Srdivacky	}
1747198092Srdivacky
1748198092Srdivacky	export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
1749198092Srdivacky	IPFW_UH_RUNLOCK(ch);
1750198092Srdivacky
1751198092Srdivacky	return (0);
1752198092Srdivacky}
1753198092Srdivacky
1754198092Srdivacky/*
1755198092Srdivacky * Modifies existing table.
1756198092Srdivacky * Data layout (v0)(current):
1757198092Srdivacky * Request: [ ipfw_obj_header ipfw_xtable_info ]
1758198092Srdivacky *
1759198092Srdivacky * Returns 0 on success
1760224145Sdim */
1761198092Srdivackystatic int
1762198092Srdivackymodify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1763198092Srdivacky    struct sockopt_data *sd)
1764226633Sdim{
1765226633Sdim	struct _ipfw_obj_header *oh;
1766226633Sdim	ipfw_xtable_info *i;
1767226633Sdim	char *tname;
1768226633Sdim	struct tid_info ti;
1769226633Sdim	struct namedobj_instance *ni;
1770226633Sdim	struct table_config *tc;
1771226633Sdim
1772226633Sdim	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1773226633Sdim		return (EINVAL);
1774226633Sdim
1775234353Sdim	oh = (struct _ipfw_obj_header *)sd->kbuf;
1776226633Sdim	i = (ipfw_xtable_info *)(oh + 1);
1777228379Sdim
1778228379Sdim	/*
1779228379Sdim	 * Verify user-supplied strings.
1780226633Sdim	 * Check for null-terminated/zero-length strings/
1781226633Sdim	 */
1782226633Sdim	tname = oh->ntlv.name;
1783226633Sdim	if (ipfw_check_table_name(tname) != 0)
1784228379Sdim		return (EINVAL);
1785226633Sdim
1786226633Sdim	objheader_to_ti(oh, &ti);
1787226633Sdim	ti.type = i->type;
1788226633Sdim
1789226633Sdim	IPFW_UH_WLOCK(ch);
1790226633Sdim	ni = CHAIN_TO_NI(ch);
1791226633Sdim	if ((tc = find_table(ni, &ti)) == NULL) {
1792226633Sdim		IPFW_UH_WUNLOCK(ch);
1793226633Sdim		return (ESRCH);
1794226633Sdim	}
1795226633Sdim
1796226633Sdim	/* Do not support any modifications for readonly tables */
1797226633Sdim	if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
1798226633Sdim		IPFW_UH_WUNLOCK(ch);
1799226633Sdim		return (EACCES);
1800226633Sdim	}
1801226633Sdim
1802226633Sdim	if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
1803226633Sdim		tc->limit = i->limit;
1804226633Sdim	if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
1805226633Sdim		tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
1806226633Sdim	IPFW_UH_WUNLOCK(ch);
1807193326Sed
1808193326Sed	return (0);
1809193326Sed}
1810193326Sed
1811226633Sdim/*
1812226633Sdim * Creates new table.
1813193326Sed * Data layout (v0)(current):
1814195341Sed * Request: [ ipfw_obj_header ipfw_xtable_info ]
1815193326Sed *
1816193326Sed * Returns 0 on success
1817193326Sed */
1818193326Sedstatic int
1819198092Srdivackycreate_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
1820193326Sed    struct sockopt_data *sd)
1821193326Sed{
1822193326Sed	struct _ipfw_obj_header *oh;
1823226633Sdim	ipfw_xtable_info *i;
1824226633Sdim	char *tname, *aname;
1825198092Srdivacky	struct tid_info ti;
1826195341Sed	struct namedobj_instance *ni;
1827193326Sed
1828193326Sed	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
1829193326Sed		return (EINVAL);
1830193326Sed
1831198092Srdivacky	oh = (struct _ipfw_obj_header *)sd->kbuf;
1832193326Sed	i = (ipfw_xtable_info *)(oh + 1);
1833193326Sed
1834193326Sed	/*
1835226633Sdim	 * Verify user-supplied strings.
1836205219Srdivacky	 * Check for null-terminated/zero-length strings/
1837205219Srdivacky	 */
1838193326Sed	tname = oh->ntlv.name;
1839193326Sed	aname = i->algoname;
1840193326Sed	if (ipfw_check_table_name(tname) != 0 ||
1841193326Sed	    strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
1842193326Sed		return (EINVAL);
1843193326Sed
1844193326Sed	if (aname[0] == '\0') {
1845198092Srdivacky		/* Use default algorithm */
1846193326Sed		aname = NULL;
1847193326Sed	}
1848193326Sed
1849193326Sed	objheader_to_ti(oh, &ti);
1850193326Sed	ti.type = i->type;
1851193326Sed
1852193326Sed	ni = CHAIN_TO_NI(ch);
1853193326Sed
1854193326Sed	IPFW_UH_RLOCK(ch);
1855193326Sed	if (find_table(ni, &ti) != NULL) {
1856193326Sed		IPFW_UH_RUNLOCK(ch);
1857224145Sdim		return (EEXIST);
1858198092Srdivacky	}
1859193326Sed	IPFW_UH_RUNLOCK(ch);
1860193326Sed
1861198092Srdivacky	return (create_table_internal(ch, &ti, aname, i, NULL, 0));
1862226633Sdim}
1863226633Sdim
1864198092Srdivacky/*
1865198092Srdivacky * Creates new table based on @ti and @aname.
1866198092Srdivacky *
1867198092Srdivacky * Relies on table name checking inside find_name_tlv()
1868224145Sdim * Assume @aname to be checked and valid.
1869198092Srdivacky * Stores allocated table kidx inside @pkidx (if non-NULL).
1870198092Srdivacky * Reference created table if @compat is non-zero.
1871198092Srdivacky *
1872198092Srdivacky * Returns 0 on success.
1873198092Srdivacky */
1874198092Srdivackystatic int
1875198092Srdivackycreate_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
1876198092Srdivacky    char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat)
1877198092Srdivacky{
1878198092Srdivacky	struct namedobj_instance *ni;
1879198092Srdivacky	struct table_config *tc, *tc_new, *tmp;
1880198092Srdivacky	struct table_algo *ta;
1881204643Srdivacky	uint16_t kidx;
1882204643Srdivacky
1883204643Srdivacky	ni = CHAIN_TO_NI(ch);
1884198092Srdivacky
1885198092Srdivacky	ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
1886198092Srdivacky	if (ta == NULL)
1887198092Srdivacky		return (ENOTSUP);
1888204643Srdivacky
1889198092Srdivacky	tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
1890198092Srdivacky	if (tc == NULL)
1891198092Srdivacky		return (ENOMEM);
1892198092Srdivacky
1893204643Srdivacky	tc->vmask = i->vmask;
1894204643Srdivacky	tc->limit = i->limit;
1895204643Srdivacky	if (ta->flags & TA_FLAG_READONLY)
1896204643Srdivacky		tc->locked = 1;
1897198092Srdivacky	else
1898198092Srdivacky		tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
1899198092Srdivacky
1900198092Srdivacky	IPFW_UH_WLOCK(ch);
1901198092Srdivacky
1902198092Srdivacky	/* Check if table has been already created */
1903198092Srdivacky	tc_new = find_table(ni, ti);
1904198092Srdivacky	if (tc_new != NULL) {
1905198092Srdivacky
1906198092Srdivacky		/*
1907204643Srdivacky		 * Compat: do not fail if we're
1908204643Srdivacky		 * requesting to create existing table
1909204643Srdivacky		 * which has the same type
1910204643Srdivacky		 */
1911198092Srdivacky		if (compat == 0 || tc_new->no.type != tc->no.type) {
1912198092Srdivacky			IPFW_UH_WUNLOCK(ch);
1913198092Srdivacky			free_table_config(ni, tc);
1914198092Srdivacky			return (EEXIST);
1915198092Srdivacky		}
1916198092Srdivacky
1917198092Srdivacky		/* Exchange tc and tc_new for proper refcounting & freeing */
1918198092Srdivacky		tmp = tc;
1919198092Srdivacky		tc = tc_new;
1920198092Srdivacky		tc_new = tmp;
1921198092Srdivacky	} else {
1922198092Srdivacky		/* New table */
1923198092Srdivacky		if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
1924198092Srdivacky			IPFW_UH_WUNLOCK(ch);
1925198092Srdivacky			printf("Unable to allocate table index."
1926198092Srdivacky			    " Consider increasing net.inet.ip.fw.tables_max");
1927224145Sdim			free_table_config(ni, tc);
1928198092Srdivacky			return (EBUSY);
1929198092Srdivacky		}
1930198092Srdivacky		tc->no.kidx = kidx;
1931198092Srdivacky
1932198092Srdivacky		IPFW_WLOCK(ch);
1933234353Sdim		link_table(ch, tc);
1934234353Sdim		IPFW_WUNLOCK(ch);
1935234353Sdim	}
1936234353Sdim
1937234353Sdim	if (compat != 0)
1938234353Sdim		tc->no.refcnt++;
1939234353Sdim	if (pkidx != NULL)
1940234353Sdim		*pkidx = tc->no.kidx;
1941234353Sdim
1942193326Sed	IPFW_UH_WUNLOCK(ch);
1943193326Sed
1944193326Sed	if (tc_new != NULL)
1945193326Sed		free_table_config(ni, tc_new);
1946198092Srdivacky
1947193326Sed	return (0);
1948193326Sed}
1949194613Sed
1950193326Sedstatic void
1951194613Sedntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
1952194613Sed{
1953193326Sed
1954193326Sed	memset(ti, 0, sizeof(struct tid_info));
1955193326Sed	ti->set = ntlv->set;
1956193326Sed	ti->uidx = ntlv->idx;
1957193326Sed	ti->tlvs = ntlv;
1958194613Sed	ti->tlen = ntlv->head.length;
1959194613Sed}
1960195341Sed
1961198092Srdivackystatic void
1962195341Sedobjheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
1963195341Sed{
1964195341Sed
1965198092Srdivacky	ntlv_to_ti(&oh->ntlv, ti);
1966198092Srdivacky}
1967198092Srdivacky
1968195341Sed/*
1969198092Srdivacky * Exports basic table info as name TLV.
1970193326Sed * Used inside dump_static_rules() to provide info
1971218893Sdim * about all tables referenced by current ruleset.
1972218893Sdim *
1973193326Sed * Returns 0 on success.
1974193326Sed */
1975226633Sdimint
1976226633Sdimipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
1977226633Sdim    struct sockopt_data *sd)
1978198092Srdivacky{
1979198092Srdivacky	struct namedobj_instance *ni;
1980226633Sdim	struct named_object *no;
1981226633Sdim	ipfw_obj_ntlv *ntlv;
1982198092Srdivacky
1983198092Srdivacky	ni = CHAIN_TO_NI(ch);
1984218893Sdim
1985193326Sed	no = ipfw_objhash_lookup_kidx(ni, kidx);
1986193326Sed	KASSERT(no != NULL, ("invalid table kidx passed"));
1987234353Sdim
1988193326Sed	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
1989193326Sed	if (ntlv == NULL)
1990207619Srdivacky		return (ENOMEM);
1991226633Sdim
1992226633Sdim	ntlv->head.type = IPFW_TLV_TBL_NAME;
1993193326Sed	ntlv->head.length = sizeof(*ntlv);
1994207619Srdivacky	ntlv->idx = no->kidx;
1995193326Sed	strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
1996193326Sed
1997207619Srdivacky	return (0);
1998198092Srdivacky}
1999193326Sed
2000207619Srdivacky/*
2001199482Srdivacky * Marks every table kidx used in @rule with bit in @bmask.
2002234353Sdim * Used to generate bitmask of referenced tables for given ruleset.
2003198092Srdivacky *
2004193326Sed * Returns number of newly-referenced tables.
2005224145Sdim */
2006224145Sdimint
2007224145Sdimipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule,
2008224145Sdim    uint32_t *bmask)
2009224145Sdim{
2010224145Sdim	int cmdlen, l, count;
2011224145Sdim	ipfw_insn *cmd;
2012224145Sdim	uint16_t kidx;
2013224145Sdim	uint8_t type;
2014224145Sdim
2015224145Sdim	l = rule->cmd_len;
2016224145Sdim	cmd = rule->cmd;
2017224145Sdim	cmdlen = 0;
2018198092Srdivacky	count = 0;
2019224145Sdim	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
2020198092Srdivacky		cmdlen = F_LEN(cmd);
2021224145Sdim
2022224145Sdim		if (classify_table_opcode(cmd, &kidx, &type) != 0)
2023224145Sdim			continue;
2024226633Sdim
2025226633Sdim		if ((bmask[kidx / 32] & (1 << (kidx % 32))) == 0)
2026226633Sdim			count++;
2027226633Sdim
2028226633Sdim		bmask[kidx / 32] |= 1 << (kidx % 32);
2029226633Sdim	}
2030226633Sdim
2031226633Sdim	return (count);
2032226633Sdim}
2033226633Sdim
2034226633Sdimstruct dump_args {
2035226633Sdim	struct ip_fw_chain *ch;
2036226633Sdim	struct table_info *ti;
2037226633Sdim	struct table_config *tc;
2038193326Sed	struct sockopt_data *sd;
2039226633Sdim	uint32_t cnt;
2040226633Sdim	uint16_t uidx;
2041221345Sdim	int error;
2042221345Sdim	uint32_t size;
2043193326Sed	ipfw_table_entry *ent;
2044221345Sdim	ta_foreach_f *f;
2045193326Sed	void *farg;
2046226633Sdim	ipfw_obj_tentry tent;
2047226633Sdim};
2048198092Srdivacky
2049195341Sedstatic int
2050193326Sedcount_ext_entries(void *e, void *arg)
2051193326Sed{
2052193326Sed	struct dump_args *da;
2053193326Sed
2054198092Srdivacky	da = (struct dump_args *)arg;
2055193326Sed	da->cnt++;
2056193326Sed
2057198092Srdivacky	return (0);
2058198092Srdivacky}
2059198092Srdivacky
2060198092Srdivacky/*
2061193326Sed * Gets number of items from table either using
2062226633Sdim * internal counter or calling algo callback for
2063226633Sdim * externally-managed tables.
2064193326Sed *
2065195341Sed * Returns number of records.
2066193326Sed */
2067193326Sedstatic uint32_t
2068193326Sedtable_get_count(struct ip_fw_chain *ch, struct table_config *tc)
2069193326Sed{
2070198092Srdivacky	struct table_info *ti;
2071193326Sed	struct table_algo *ta;
2072193326Sed	struct dump_args da;
2073226633Sdim
2074234353Sdim	ti = KIDX_TO_TI(ch, tc->no.kidx);
2075234353Sdim	ta = tc->ta;
2076234353Sdim
2077193326Sed	/* Use internal counter for self-managed tables */
2078193326Sed	if ((ta->flags & TA_FLAG_READONLY) == 0)
2079193326Sed		return (tc->count);
2080193326Sed
2081193326Sed	/* Use callback to quickly get number of items */
2082193326Sed	if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
2083193326Sed		return (ta->get_count(tc->astate, ti));
2084193326Sed
2085193326Sed	/* Count number of iterms ourselves */
2086193326Sed	memset(&da, 0, sizeof(da));
2087193326Sed	ta->foreach(tc->astate, ti, count_ext_entries, &da);
2088193326Sed
2089226633Sdim	return (da.cnt);
2090193326Sed}
2091193326Sed
2092193326Sed/*
2093193326Sed * Exports table @tc info into standard ipfw_xtable_info format.
2094193326Sed */
2095193326Sedstatic void
2096193326Sedexport_table_info(struct ip_fw_chain *ch, struct table_config *tc,
2097198092Srdivacky    ipfw_xtable_info *i)
2098198092Srdivacky{
2099198092Srdivacky	struct table_info *ti;
2100198092Srdivacky	struct table_algo *ta;
2101198092Srdivacky
2102218893Sdim	i->type = tc->no.type;
2103198092Srdivacky	i->tflags = tc->tflags;
2104198092Srdivacky	i->vmask = tc->vmask;
2105198092Srdivacky	i->set = tc->no.set;
2106198092Srdivacky	i->kidx = tc->no.kidx;
2107226633Sdim	i->refcnt = tc->no.refcnt;
2108198092Srdivacky	i->count = table_get_count(ch, tc);
2109198092Srdivacky	i->limit = tc->limit;
2110198092Srdivacky	i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
2111198092Srdivacky	i->size = tc->count * sizeof(ipfw_obj_tentry);
2112226633Sdim	i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2113226633Sdim	strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
2114226633Sdim	ti = KIDX_TO_TI(ch, tc->no.kidx);
2115198092Srdivacky	ta = tc->ta;
2116218893Sdim	if (ta->print_config != NULL) {
2117226633Sdim		/* Use algo function to print table config to string */
2118198092Srdivacky		ta->print_config(tc->astate, ti, i->algoname,
2119198092Srdivacky		    sizeof(i->algoname));
2120226633Sdim	} else
2121218893Sdim		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2122198092Srdivacky	/* Dump algo-specific data, if possible */
2123198092Srdivacky	if (ta->dump_tinfo != NULL) {
2124234353Sdim		ta->dump_tinfo(tc->astate, ti, &i->ta_info);
2125234353Sdim		i->ta_info.flags |= IPFW_TATFLAGS_DATA;
2126234353Sdim	}
2127234353Sdim}
2128234353Sdim
2129198092Srdivackystruct dump_table_args {
2130234353Sdim	struct ip_fw_chain *ch;
2131234353Sdim	struct sockopt_data *sd;
2132234353Sdim};
2133234353Sdim
2134198092Srdivackystatic void
2135226633Sdimexport_table_internal(struct namedobj_instance *ni, struct named_object *no,
2136198092Srdivacky    void *arg)
2137228379Sdim{
2138193326Sed	ipfw_xtable_info *i;
2139193326Sed	struct dump_table_args *dta;
2140199482Srdivacky
2141226633Sdim	dta = (struct dump_table_args *)arg;
2142228379Sdim
2143193326Sed	i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
2144193326Sed	KASSERT(i != 0, ("previously checked buffer is not enough"));
2145193326Sed
2146198092Srdivacky	export_table_info(dta->ch, (struct table_config *)no, i);
2147193326Sed}
2148193326Sed
2149198092Srdivacky/*
2150226633Sdim * Export all tables as ipfw_xtable_info structures to
2151193326Sed * storage provided by @sd.
2152193326Sed *
2153193326Sed * If supplied buffer is too small, fills in required size
2154218893Sdim * and returns ENOMEM.
2155193326Sed * Returns 0 on success.
2156218893Sdim */
2157193326Sedstatic int
2158193326Sedexport_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
2159193326Sed    struct sockopt_data *sd)
2160218893Sdim{
2161193326Sed	uint32_t size;
2162218893Sdim	uint32_t count;
2163193326Sed	struct dump_table_args dta;
2164193326Sed
2165193326Sed	count = ipfw_objhash_count(CHAIN_TO_NI(ch));
2166193326Sed	size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
2167193326Sed
2168193326Sed	/* Fill in header regadless of buffer size */
2169193326Sed	olh->count = count;
2170193326Sed	olh->objsize = sizeof(ipfw_xtable_info);
2171195099Sed
2172198092Srdivacky	if (size > olh->size) {
2173193326Sed		olh->size = size;
2174193326Sed		return (ENOMEM);
2175221345Sdim	}
2176193326Sed
2177193326Sed	olh->size = size;
2178198092Srdivacky
2179198092Srdivacky	dta.ch = ch;
2180198092Srdivacky	dta.sd = sd;
2181226633Sdim
2182193326Sed	ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
2183224145Sdim
2184193326Sed	return (0);
2185224145Sdim}
2186193326Sed
2187193326Sed/*
2188193326Sed * Dumps all table data
2189193326Sed * Data layout (v1)(current):
2190193326Sed * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
2191193326Sed * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
2192193326Sed *
2193193326Sed * Returns 0 on success
2194193326Sed */
2195193326Sedstatic int
2196198092Srdivackydump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2197234353Sdim    struct sockopt_data *sd)
2198221345Sdim{
2199199990Srdivacky	struct _ipfw_obj_header *oh;
2200221345Sdim	ipfw_xtable_info *i;
2201198092Srdivacky	struct tid_info ti;
2202198092Srdivacky	struct table_config *tc;
2203193326Sed	struct table_algo *ta;
2204193326Sed	struct dump_args da;
2205198092Srdivacky	uint32_t sz;
2206224145Sdim
2207198092Srdivacky	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
2208198092Srdivacky	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
2209193326Sed	if (oh == NULL)
2210193326Sed		return (EINVAL);
2211193326Sed
2212193326Sed	i = (ipfw_xtable_info *)(oh + 1);
2213193326Sed	objheader_to_ti(oh, &ti);
2214193326Sed
2215193326Sed	IPFW_UH_RLOCK(ch);
2216193326Sed	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
2217193326Sed		IPFW_UH_RUNLOCK(ch);
2218193326Sed		return (ESRCH);
2219193326Sed	}
2220193326Sed	export_table_info(ch, tc, i);
2221224145Sdim
2222198092Srdivacky	if (sd->valsize < i->size) {
2223198092Srdivacky
2224193326Sed		/*
2225193326Sed		 * Submitted buffer size is not enough.
2226193326Sed		 * WE've already filled in @i structure with
2227193326Sed		 * relevant table info including size, so we
2228193326Sed		 * can return. Buffer will be flushed automatically.
2229221345Sdim		 */
2230221345Sdim		IPFW_UH_RUNLOCK(ch);
2231221345Sdim		return (ENOMEM);
2232221345Sdim	}
2233221345Sdim
2234221345Sdim	/*
2235221345Sdim	 * Do the actual dump in eXtended format
2236226633Sdim	 */
2237226633Sdim	memset(&da, 0, sizeof(da));
2238221345Sdim	da.ch = ch;
2239221345Sdim	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2240221345Sdim	da.tc = tc;
2241221345Sdim	da.sd = sd;
2242221345Sdim
2243221345Sdim	ta = tc->ta;
2244221345Sdim
2245221345Sdim	ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
2246221345Sdim	IPFW_UH_RUNLOCK(ch);
2247221345Sdim
2248221345Sdim	return (da.error);
2249221345Sdim}
2250221345Sdim
2251221345Sdim/*
2252193326Sed * Dumps all table data
2253221345Sdim * Data layout (version 0)(legacy):
2254221345Sdim * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE()
2255221345Sdim * Reply: [ ipfw_xtable ipfw_table_xentry x N ]
2256221345Sdim *
2257221345Sdim * Returns 0 on success
2258193326Sed */
2259193326Sedstatic int
2260193326Seddump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2261193326Sed    struct sockopt_data *sd)
2262221345Sdim{
2263193326Sed	ipfw_xtable *xtbl;
2264221345Sdim	struct tid_info ti;
2265221345Sdim	struct table_config *tc;
2266193326Sed	struct table_algo *ta;
2267198092Srdivacky	struct dump_args da;
2268193326Sed	size_t sz, count;
2269193326Sed
2270193326Sed	xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable));
2271221345Sdim	if (xtbl == NULL)
2272221345Sdim		return (EINVAL);
2273193326Sed
2274226633Sdim	memset(&ti, 0, sizeof(ti));
2275221345Sdim	ti.uidx = xtbl->tbl;
2276221345Sdim
2277226633Sdim	IPFW_UH_RLOCK(ch);
2278193326Sed	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
2279193326Sed		IPFW_UH_RUNLOCK(ch);
2280221345Sdim		return (0);
2281221345Sdim	}
2282221345Sdim	count = table_get_count(ch, tc);
2283193326Sed	sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable);
2284221345Sdim
2285193326Sed	xtbl->cnt = count;
2286198092Srdivacky	xtbl->size = sz;
2287193326Sed	xtbl->type = tc->no.type;
2288193326Sed	xtbl->tbl = ti.uidx;
2289198092Srdivacky
2290193326Sed	if (sd->valsize < sz) {
2291193326Sed
2292193326Sed		/*
2293193326Sed		 * Submitted buffer size is not enough.
2294193326Sed		 * WE've already filled in @i structure with
2295193326Sed		 * relevant table info including size, so we
2296193326Sed		 * can return. Buffer will be flushed automatically.
2297193326Sed		 */
2298198092Srdivacky		IPFW_UH_RUNLOCK(ch);
2299193326Sed		return (ENOMEM);
2300193326Sed	}
2301193326Sed
2302193326Sed	/* Do the actual dump in eXtended format */
2303224145Sdim	memset(&da, 0, sizeof(da));
2304223017Sdim	da.ch = ch;
2305224145Sdim	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2306193326Sed	da.tc = tc;
2307221345Sdim	da.sd = sd;
2308221345Sdim
2309193326Sed	ta = tc->ta;
2310203955Srdivacky
2311221345Sdim	ta->foreach(tc->astate, da.ti, dump_table_xentry, &da);
2312221345Sdim	IPFW_UH_RUNLOCK(ch);
2313221345Sdim
2314221345Sdim	return (0);
2315221345Sdim}
2316221345Sdim
2317221345Sdim/*
2318221345Sdim * Legacy function to retrieve number of items in table.
2319221345Sdim */
2320221345Sdimstatic int
2321221345Sdimget_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2322223017Sdim    struct sockopt_data *sd)
2323221345Sdim{
2324224145Sdim	uint32_t *tbl;
2325234353Sdim	struct tid_info ti;
2326224145Sdim	size_t sz;
2327223017Sdim	int error;
2328223017Sdim
2329224145Sdim	sz = sizeof(*op3) + sizeof(uint32_t);
2330234353Sdim	op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz);
2331224145Sdim	if (op3 == NULL)
2332224145Sdim		return (EINVAL);
2333224145Sdim
2334224145Sdim	tbl = (uint32_t *)(op3 + 1);
2335224145Sdim	memset(&ti, 0, sizeof(ti));
2336224145Sdim	ti.uidx = *tbl;
2337224145Sdim	IPFW_UH_RLOCK(ch);
2338224145Sdim	error = ipfw_count_xtable(ch, &ti, tbl);
2339223017Sdim	IPFW_UH_RUNLOCK(ch);
2340193326Sed	return (error);
2341193326Sed}
2342193326Sed
2343193326Sed/*
2344193326Sed * Legacy IP_FW_TABLE_GETSIZE handler
2345198092Srdivacky */
2346193326Sedint
2347193326Sedipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
2348198092Srdivacky{
2349198092Srdivacky	struct table_config *tc;
2350198092Srdivacky
2351193326Sed	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
2352193326Sed		return (ESRCH);
2353223017Sdim	*cnt = table_get_count(ch, tc);
2354226633Sdim	return (0);
2355226633Sdim}
2356223017Sdim
2357193326Sed/*
2358234353Sdim * Legacy IP_FW_TABLE_XGETSIZE handler
2359234353Sdim */
2360234353Sdimint
2361234353Sdimipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
2362234353Sdim{
2363234353Sdim	struct table_config *tc;
2364234353Sdim	uint32_t count;
2365234353Sdim
2366234353Sdim	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) {
2367234353Sdim		*cnt = 0;
2368234353Sdim		return (0); /* 'table all list' requires success */
2369234353Sdim	}
2370234353Sdim
2371234353Sdim	count = table_get_count(ch, tc);
2372234353Sdim	*cnt = count * sizeof(ipfw_table_xentry);
2373234353Sdim	if (count > 0)
2374234353Sdim		*cnt += sizeof(ipfw_xtable);
2375234353Sdim	return (0);
2376234353Sdim}
2377234353Sdim
2378234353Sdimstatic int
2379234353Sdimdump_table_entry(void *e, void *arg)
2380234353Sdim{
2381234353Sdim	struct dump_args *da;
2382234353Sdim	struct table_config *tc;
2383234353Sdim	struct table_algo *ta;
2384234353Sdim	ipfw_table_entry *ent;
2385234353Sdim	struct table_value *pval;
2386234353Sdim	int error;
2387234353Sdim
2388234353Sdim	da = (struct dump_args *)arg;
2389234353Sdim
2390234353Sdim	tc = da->tc;
2391234353Sdim	ta = tc->ta;
2392234353Sdim
2393234353Sdim	/* Out of memory, returning */
2394234353Sdim	if (da->cnt == da->size)
2395234353Sdim		return (1);
2396234353Sdim	ent = da->ent++;
2397234353Sdim	ent->tbl = da->uidx;
2398193326Sed	da->cnt++;
2399193326Sed
2400193326Sed	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
2401193326Sed	if (error != 0)
2402193326Sed		return (error);
2403193326Sed
2404198092Srdivacky	ent->addr = da->tent.k.addr.s_addr;
2405198092Srdivacky	ent->masklen = da->tent.masklen;
2406193326Sed	pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
2407226633Sdim	ent->value = ipfw_export_table_value_legacy(pval);
2408226633Sdim
2409221345Sdim	return (0);
2410193326Sed}
2411193326Sed
2412193326Sed/*
2413226633Sdim * Dumps table in pre-8.1 legacy format.
2414234353Sdim */
2415193326Sedint
2416193326Sedipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
2417193326Sed    ipfw_table *tbl)
2418198092Srdivacky{
2419198092Srdivacky	struct table_config *tc;
2420198092Srdivacky	struct table_algo *ta;
2421198092Srdivacky	struct dump_args da;
2422198092Srdivacky
2423193326Sed	tbl->cnt = 0;
2424193326Sed
2425193326Sed	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
2426223017Sdim		return (0);	/* XXX: We should return ESRCH */
2427221345Sdim
2428193326Sed	ta = tc->ta;
2429193326Sed
2430223017Sdim	/* This dump format supports IPv4 only */
2431221345Sdim	if (tc->no.type != IPFW_TABLE_ADDR)
2432193326Sed		return (0);
2433193326Sed
2434234353Sdim	memset(&da, 0, sizeof(da));
2435234353Sdim	da.ch = ch;
2436234353Sdim	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2437234353Sdim	da.tc = tc;
2438234353Sdim	da.ent = &tbl->ent[0];
2439223017Sdim	da.size = tbl->size;
2440221345Sdim
2441207619Srdivacky	tbl->cnt = 0;
2442223017Sdim	ta->foreach(tc->astate, da.ti, dump_table_entry, &da);
2443221345Sdim	tbl->cnt = da.cnt;
2444218893Sdim
2445234353Sdim	return (0);
2446234353Sdim}
2447234353Sdim
2448207619Srdivacky/*
2449198092Srdivacky * Dumps table entry in eXtended format (v1)(current).
2450221345Sdim */
2451193326Sedstatic int
2452193326Seddump_table_tentry(void *e, void *arg)
2453221345Sdim{
2454210299Sed	struct dump_args *da;
2455221345Sdim	struct table_config *tc;
2456210299Sed	struct table_algo *ta;
2457193326Sed	struct table_value *pval;
2458193326Sed	ipfw_obj_tentry *tent;
2459221345Sdim	int error;
2460210299Sed
2461210299Sed	da = (struct dump_args *)arg;
2462210299Sed
2463210299Sed	tc = da->tc;
2464210299Sed	ta = tc->ta;
2465210299Sed
2466210299Sed	tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
2467210299Sed	/* Out of memory, returning */
2468210299Sed	if (tent == NULL) {
2469221345Sdim		da->error = ENOMEM;
2470221345Sdim		return (1);
2471221345Sdim	}
2472221345Sdim	tent->head.length = sizeof(ipfw_obj_tentry);
2473221345Sdim	tent->idx = da->uidx;
2474193326Sed
2475193326Sed	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
2476221345Sdim	if (error != 0)
2477193326Sed		return (error);
2478193326Sed
2479193326Sed	pval = get_table_value(da->ch, da->tc, tent->v.kidx);
2480193326Sed	ipfw_export_table_value_v1(pval, &tent->v.value);
2481226633Sdim
2482193326Sed	return (0);
2483193326Sed}
2484198092Srdivacky
2485193326Sed/*
2486193326Sed * Dumps table entry in eXtended format (v0).
2487193326Sed */
2488226633Sdimstatic int
2489198092Srdivackydump_table_xentry(void *e, void *arg)
2490193326Sed{
2491193326Sed	struct dump_args *da;
2492193326Sed	struct table_config *tc;
2493193326Sed	struct table_algo *ta;
2494193326Sed	ipfw_table_xentry *xent;
2495193326Sed	ipfw_obj_tentry *tent;
2496193326Sed	struct table_value *pval;
2497193326Sed	int error;
2498193326Sed
2499193326Sed	da = (struct dump_args *)arg;
2500193326Sed
2501221345Sdim	tc = da->tc;
2502193326Sed	ta = tc->ta;
2503193326Sed
2504224145Sdim	xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent));
2505224145Sdim	/* Out of memory, returning */
2506193326Sed	if (xent == NULL)
2507193326Sed		return (1);
2508193326Sed	xent->len = sizeof(ipfw_table_xentry);
2509193326Sed	xent->tbl = da->uidx;
2510193326Sed
2511221345Sdim	memset(&da->tent, 0, sizeof(da->tent));
2512198092Srdivacky	tent = &da->tent;
2513203955Srdivacky	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
2514223017Sdim	if (error != 0)
2515203955Srdivacky		return (error);
2516193326Sed
2517193326Sed	/* Convert current format to previous one */
2518221345Sdim	xent->masklen = tent->masklen;
2519198092Srdivacky	pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
2520203955Srdivacky	xent->value = ipfw_export_table_value_legacy(pval);
2521203955Srdivacky	/* Apply some hacks */
2522203955Srdivacky	if (tc->no.type == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
2523203955Srdivacky		xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
2524193326Sed		xent->flags = IPFW_TCF_INET;
2525193326Sed	} else
2526221345Sdim		memcpy(&xent->k, &tent->k, sizeof(xent->k));
2527212904Sdim
2528212904Sdim	return (0);
2529203955Srdivacky}
2530203955Srdivacky
2531203955Srdivacky/*
2532212904Sdim * Helper function to export table algo data
2533212904Sdim * to tentry format before calling user function.
2534212904Sdim *
2535212904Sdim * Returns 0 on success.
2536226633Sdim */
2537193326Sedstatic int
2538193326Sedprepare_table_tentry(void *e, void *arg)
2539221345Sdim{
2540198092Srdivacky	struct dump_args *da;
2541198092Srdivacky	struct table_config *tc;
2542203955Srdivacky	struct table_algo *ta;
2543203955Srdivacky	int error;
2544223017Sdim
2545203955Srdivacky	da = (struct dump_args *)arg;
2546193326Sed
2547193326Sed	tc = da->tc;
2548221345Sdim	ta = tc->ta;
2549198092Srdivacky
2550203955Srdivacky	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
2551203955Srdivacky	if (error != 0)
2552203955Srdivacky		return (error);
2553203955Srdivacky
2554193326Sed	da->f(&da->tent, da->farg);
2555193326Sed
2556221345Sdim	return (0);
2557198092Srdivacky}
2558198092Srdivacky
2559210299Sed/*
2560203955Srdivacky * Allow external consumers to read table entries in standard format.
2561223017Sdim */
2562223017Sdimint
2563203955Srdivackyipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
2564210299Sed    ta_foreach_f *f, void *arg)
2565198092Srdivacky{
2566198092Srdivacky	struct namedobj_instance *ni;
2567193326Sed	struct table_config *tc;
2568193326Sed	struct table_algo *ta;
2569193326Sed	struct dump_args da;
2570193326Sed
2571193326Sed	ni = CHAIN_TO_NI(ch);
2572193326Sed
2573193326Sed	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
2574193326Sed	if (tc == NULL)
2575198092Srdivacky		return (ESRCH);
2576198092Srdivacky
2577218893Sdim	ta = tc->ta;
2578218893Sdim
2579218893Sdim	memset(&da, 0, sizeof(da));
2580218893Sdim	da.ch = ch;
2581218893Sdim	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
2582218893Sdim	da.tc = tc;
2583218893Sdim	da.f = f;
2584218893Sdim	da.farg = arg;
2585218893Sdim
2586218893Sdim	ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
2587218893Sdim
2588207619Srdivacky	return (0);
2589207619Srdivacky}
2590207619Srdivacky
2591226633Sdim/*
2592226633Sdim * Table algorithms
2593198092Srdivacky */
2594198092Srdivacky
2595198092Srdivacky/*
2596198092Srdivacky * Finds algoritm by index, table type or supplied name.
2597234353Sdim *
2598198092Srdivacky * Returns pointer to algo or NULL.
2599226633Sdim */
2600198092Srdivackystatic struct table_algo *
2601198092Srdivackyfind_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
2602198092Srdivacky{
2603198092Srdivacky	int i, l;
2604198092Srdivacky	struct table_algo *ta;
2605198092Srdivacky
2606198092Srdivacky	if (ti->type > IPFW_TABLE_MAXTYPE)
2607198092Srdivacky		return (NULL);
2608198092Srdivacky
2609193326Sed	/* Search by index */
2610198092Srdivacky	if (ti->atype != 0) {
2611193326Sed		if (ti->atype > tcfg->algo_count)
2612193326Sed			return (NULL);
2613221345Sdim		return (tcfg->algo[ti->atype]);
2614193326Sed	}
2615193326Sed
2616193326Sed	if (name == NULL) {
2617193326Sed		/* Return default algorithm for given type if set */
2618208600Srdivacky		return (tcfg->def_algo[ti->type]);
2619208600Srdivacky	}
2620193326Sed
2621193326Sed	/* Search by name */
2622193326Sed	/* TODO: better search */
2623198092Srdivacky	for (i = 1; i <= tcfg->algo_count; i++) {
2624193326Sed		ta = tcfg->algo[i];
2625193326Sed
2626193326Sed		/*
2627226633Sdim		 * One can supply additional algorithm
2628226633Sdim		 * parameters so we compare only the first word
2629226633Sdim		 * of supplied name:
2630193326Sed		 * 'addr:chash hsize=32'
2631193326Sed		 * '^^^^^^^^^'
2632198092Srdivacky		 *
2633193326Sed		 */
2634193326Sed		l = strlen(ta->name);
2635193326Sed		if (strncmp(name, ta->name, l) != 0)
2636198092Srdivacky			continue;
2637193326Sed		if (name[l] != '\0' && name[l] != ' ')
2638193326Sed			continue;
2639193326Sed		/* Check if we're requesting proper table type */
2640221345Sdim		if (ti->type != 0 && ti->type != ta->type)
2641193326Sed			return (NULL);
2642193326Sed		return (ta);
2643234353Sdim	}
2644193326Sed
2645224145Sdim	return (NULL);
2646224145Sdim}
2647224145Sdim
2648224145Sdim/*
2649224145Sdim * Register new table algo @ta.
2650224145Sdim * Stores algo id inside @idx.
2651224145Sdim *
2652224145Sdim * Returns 0 on success.
2653224145Sdim */
2654224145Sdimint
2655226633Sdimipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
2656226633Sdim    int *idx)
2657234353Sdim{
2658234353Sdim	struct tables_config *tcfg;
2659234353Sdim	struct table_algo *ta_new;
2660234353Sdim	size_t sz;
2661193326Sed
2662193326Sed	if (size > sizeof(struct table_algo))
2663226633Sdim		return (EINVAL);
2664193326Sed
2665193326Sed	/* Check for the required on-stack size for add/del */
2666221345Sdim	sz = roundup2(ta->ta_buf_size, sizeof(void *));
2667221345Sdim	if (sz > TA_BUF_SZ)
2668234353Sdim		return (EINVAL);
2669221345Sdim
2670221345Sdim	KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
2671193326Sed
2672	/* Copy algorithm data to stable storage. */
2673	ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
2674	memcpy(ta_new, ta, size);
2675
2676	tcfg = CHAIN_TO_TCFG(ch);
2677
2678	KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
2679
2680	tcfg->algo[++tcfg->algo_count] = ta_new;
2681	ta_new->idx = tcfg->algo_count;
2682
2683	/* Set algorithm as default one for given type */
2684	if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
2685	    tcfg->def_algo[ta_new->type] == NULL)
2686		tcfg->def_algo[ta_new->type] = ta_new;
2687
2688	*idx = ta_new->idx;
2689
2690	return (0);
2691}
2692
2693/*
2694 * Unregisters table algo using @idx as id.
2695 * XXX: It is NOT safe to call this function in any place
2696 * other than ipfw instance destroy handler.
2697 */
2698void
2699ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
2700{
2701	struct tables_config *tcfg;
2702	struct table_algo *ta;
2703
2704	tcfg = CHAIN_TO_TCFG(ch);
2705
2706	KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
2707	    idx, tcfg->algo_count));
2708
2709	ta = tcfg->algo[idx];
2710	KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
2711
2712	if (tcfg->def_algo[ta->type] == ta)
2713		tcfg->def_algo[ta->type] = NULL;
2714
2715	free(ta, M_IPFW);
2716}
2717
2718/*
2719 * Lists all table algorithms currently available.
2720 * Data layout (v0)(current):
2721 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
2722 * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
2723 *
2724 * Returns 0 on success
2725 */
2726static int
2727list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
2728    struct sockopt_data *sd)
2729{
2730	struct _ipfw_obj_lheader *olh;
2731	struct tables_config *tcfg;
2732	ipfw_ta_info *i;
2733	struct table_algo *ta;
2734	uint32_t count, n, size;
2735
2736	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
2737	if (olh == NULL)
2738		return (EINVAL);
2739	if (sd->valsize < olh->size)
2740		return (EINVAL);
2741
2742	IPFW_UH_RLOCK(ch);
2743	tcfg = CHAIN_TO_TCFG(ch);
2744	count = tcfg->algo_count;
2745	size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
2746
2747	/* Fill in header regadless of buffer size */
2748	olh->count = count;
2749	olh->objsize = sizeof(ipfw_ta_info);
2750
2751	if (size > olh->size) {
2752		olh->size = size;
2753		IPFW_UH_RUNLOCK(ch);
2754		return (ENOMEM);
2755	}
2756	olh->size = size;
2757
2758	for (n = 1; n <= count; n++) {
2759		i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
2760		KASSERT(i != 0, ("previously checked buffer is not enough"));
2761		ta = tcfg->algo[n];
2762		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
2763		i->type = ta->type;
2764		i->refcnt = ta->refcnt;
2765	}
2766
2767	IPFW_UH_RUNLOCK(ch);
2768
2769	return (0);
2770}
2771
2772/*
2773 * Tables rewriting code
2774 */
2775
2776/*
2777 * Determine table number and lookup type for @cmd.
2778 * Fill @tbl and @type with appropriate values.
2779 * Returns 0 for relevant opcodes, 1 otherwise.
2780 */
2781static int
2782classify_table_opcode(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
2783{
2784	ipfw_insn_if *cmdif;
2785	int skip;
2786	uint16_t v;
2787
2788	skip = 1;
2789
2790	switch (cmd->opcode) {
2791	case O_IP_SRC_LOOKUP:
2792	case O_IP_DST_LOOKUP:
2793		/* Basic IPv4/IPv6 or u32 lookups */
2794		*puidx = cmd->arg1;
2795		/* Assume ADDR by default */
2796		*ptype = IPFW_TABLE_ADDR;
2797		skip = 0;
2798
2799		if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
2800			/*
2801			 * generic lookup. The key must be
2802			 * in 32bit big-endian format.
2803			 */
2804			v = ((ipfw_insn_u32 *)cmd)->d[1];
2805			switch (v) {
2806			case 0:
2807			case 1:
2808				/* IPv4 src/dst */
2809				break;
2810			case 2:
2811			case 3:
2812				/* src/dst port */
2813				*ptype = IPFW_TABLE_NUMBER;
2814				break;
2815			case 4:
2816				/* uid/gid */
2817				*ptype = IPFW_TABLE_NUMBER;
2818				break;
2819			case 5:
2820				/* jid */
2821				*ptype = IPFW_TABLE_NUMBER;
2822				break;
2823			case 6:
2824				/* dscp */
2825				*ptype = IPFW_TABLE_NUMBER;
2826				break;
2827			}
2828		}
2829		break;
2830	case O_XMIT:
2831	case O_RECV:
2832	case O_VIA:
2833		/* Interface table, possibly */
2834		cmdif = (ipfw_insn_if *)cmd;
2835		if (cmdif->name[0] != '\1')
2836			break;
2837
2838		*ptype = IPFW_TABLE_INTERFACE;
2839		*puidx = cmdif->p.kidx;
2840		skip = 0;
2841		break;
2842	case O_IP_FLOW_LOOKUP:
2843		*puidx = cmd->arg1;
2844		*ptype = IPFW_TABLE_FLOW;
2845		skip = 0;
2846		break;
2847	}
2848
2849	return (skip);
2850}
2851
2852/*
2853 * Sets new table value for given opcode.
2854 * Assume the same opcodes as classify_table_opcode()
2855 */
2856static void
2857update_table_opcode(ipfw_insn *cmd, uint16_t idx)
2858{
2859	ipfw_insn_if *cmdif;
2860
2861	switch (cmd->opcode) {
2862	case O_IP_SRC_LOOKUP:
2863	case O_IP_DST_LOOKUP:
2864		/* Basic IPv4/IPv6 or u32 lookups */
2865		cmd->arg1 = idx;
2866		break;
2867	case O_XMIT:
2868	case O_RECV:
2869	case O_VIA:
2870		/* Interface table, possibly */
2871		cmdif = (ipfw_insn_if *)cmd;
2872		cmdif->p.kidx = idx;
2873		break;
2874	case O_IP_FLOW_LOOKUP:
2875		cmd->arg1 = idx;
2876		break;
2877	}
2878}
2879
2880/*
2881 * Checks table name for validity.
2882 * Enforce basic length checks, the rest
2883 * should be done in userland.
2884 *
2885 * Returns 0 if name is considered valid.
2886 */
2887int
2888ipfw_check_table_name(char *name)
2889{
2890	int nsize;
2891	ipfw_obj_ntlv *ntlv = NULL;
2892
2893	nsize = sizeof(ntlv->name);
2894
2895	if (strnlen(name, nsize) == nsize)
2896		return (EINVAL);
2897
2898	if (name[0] == '\0')
2899		return (EINVAL);
2900
2901	/*
2902	 * TODO: do some more complicated checks
2903	 */
2904
2905	return (0);
2906}
2907
2908/*
2909 * Find tablename TLV by @uid.
2910 * Check @tlvs for valid data inside.
2911 *
2912 * Returns pointer to found TLV or NULL.
2913 */
2914static ipfw_obj_ntlv *
2915find_name_tlv(void *tlvs, int len, uint16_t uidx)
2916{
2917	ipfw_obj_ntlv *ntlv;
2918	uintptr_t pa, pe;
2919	int l;
2920
2921	pa = (uintptr_t)tlvs;
2922	pe = pa + len;
2923	l = 0;
2924	for (; pa < pe; pa += l) {
2925		ntlv = (ipfw_obj_ntlv *)pa;
2926		l = ntlv->head.length;
2927
2928		if (l != sizeof(*ntlv))
2929			return (NULL);
2930
2931		if (ntlv->head.type != IPFW_TLV_TBL_NAME)
2932			continue;
2933
2934		if (ntlv->idx != uidx)
2935			continue;
2936
2937		if (ipfw_check_table_name(ntlv->name) != 0)
2938			return (NULL);
2939
2940		return (ntlv);
2941	}
2942
2943	return (NULL);
2944}
2945
2946/*
2947 * Finds table config based on either legacy index
2948 * or name in ntlv.
2949 * Note @ti structure contains unchecked data from userland.
2950 *
2951 * Returns pointer to table_config or NULL.
2952 */
2953static struct table_config *
2954find_table(struct namedobj_instance *ni, struct tid_info *ti)
2955{
2956	char *name, bname[16];
2957	struct named_object *no;
2958	ipfw_obj_ntlv *ntlv;
2959	uint32_t set;
2960
2961	if (ti->tlvs != NULL) {
2962		ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx);
2963		if (ntlv == NULL)
2964			return (NULL);
2965		name = ntlv->name;
2966
2967		/*
2968		 * Use set provided by @ti instead of @ntlv one.
2969		 * This is needed due to different sets behavior
2970		 * controlled by V_fw_tables_sets.
2971		 */
2972		set = ti->set;
2973	} else {
2974		snprintf(bname, sizeof(bname), "%d", ti->uidx);
2975		name = bname;
2976		set = 0;
2977	}
2978
2979	no = ipfw_objhash_lookup_name(ni, set, name);
2980
2981	return ((struct table_config *)no);
2982}
2983
2984/*
2985 * Allocate new table config structure using
2986 * specified @algo and @aname.
2987 *
2988 * Returns pointer to config or NULL.
2989 */
2990static struct table_config *
2991alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
2992    struct table_algo *ta, char *aname, uint8_t tflags)
2993{
2994	char *name, bname[16];
2995	struct table_config *tc;
2996	int error;
2997	ipfw_obj_ntlv *ntlv;
2998	uint32_t set;
2999
3000	if (ti->tlvs != NULL) {
3001		ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx);
3002		if (ntlv == NULL)
3003			return (NULL);
3004		name = ntlv->name;
3005		set = ntlv->set;
3006	} else {
3007		snprintf(bname, sizeof(bname), "%d", ti->uidx);
3008		name = bname;
3009		set = 0;
3010	}
3011
3012	tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
3013	tc->no.name = tc->tablename;
3014	tc->no.type = ta->type;
3015	tc->no.set = set;
3016	tc->tflags = tflags;
3017	tc->ta = ta;
3018	strlcpy(tc->tablename, name, sizeof(tc->tablename));
3019	/* Set "shared" value type by default */
3020	tc->vshared = 1;
3021
3022	if (ti->tlvs == NULL) {
3023		tc->no.compat = 1;
3024		tc->no.uidx = ti->uidx;
3025	}
3026
3027	/* Preallocate data structures for new tables */
3028	error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
3029	if (error != 0) {
3030		free(tc, M_IPFW);
3031		return (NULL);
3032	}
3033
3034	return (tc);
3035}
3036
3037/*
3038 * Destroys table state and config.
3039 */
3040static void
3041free_table_config(struct namedobj_instance *ni, struct table_config *tc)
3042{
3043
3044	KASSERT(tc->linked == 0, ("free() on linked config"));
3045
3046	/*
3047	 * We're using ta without any locking/referencing.
3048	 * TODO: fix this if we're going to use unloadable algos.
3049	 */
3050	tc->ta->destroy(tc->astate, &tc->ti_copy);
3051	free(tc, M_IPFW);
3052}
3053
3054/*
3055 * Links @tc to @chain table named instance.
3056 * Sets appropriate type/states in @chain table info.
3057 */
3058static void
3059link_table(struct ip_fw_chain *ch, struct table_config *tc)
3060{
3061	struct namedobj_instance *ni;
3062	struct table_info *ti;
3063	uint16_t kidx;
3064
3065	IPFW_UH_WLOCK_ASSERT(ch);
3066	IPFW_WLOCK_ASSERT(ch);
3067
3068	ni = CHAIN_TO_NI(ch);
3069	kidx = tc->no.kidx;
3070
3071	ipfw_objhash_add(ni, &tc->no);
3072
3073	ti = KIDX_TO_TI(ch, kidx);
3074	*ti = tc->ti_copy;
3075
3076	/* Notify algo on real @ti address */
3077	if (tc->ta->change_ti != NULL)
3078		tc->ta->change_ti(tc->astate, ti);
3079
3080	tc->linked = 1;
3081	tc->ta->refcnt++;
3082}
3083
3084/*
3085 * Unlinks @tc from @chain table named instance.
3086 * Zeroes states in @chain and stores them in @tc.
3087 */
3088static void
3089unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
3090{
3091	struct namedobj_instance *ni;
3092	struct table_info *ti;
3093	uint16_t kidx;
3094
3095	IPFW_UH_WLOCK_ASSERT(ch);
3096	IPFW_WLOCK_ASSERT(ch);
3097
3098	ni = CHAIN_TO_NI(ch);
3099	kidx = tc->no.kidx;
3100
3101	/* Clear state. @ti copy is already saved inside @tc */
3102	ipfw_objhash_del(ni, &tc->no);
3103	ti = KIDX_TO_TI(ch, kidx);
3104	memset(ti, 0, sizeof(struct table_info));
3105	tc->linked = 0;
3106	tc->ta->refcnt--;
3107
3108	/* Notify algo on real @ti address */
3109	if (tc->ta->change_ti != NULL)
3110		tc->ta->change_ti(tc->astate, NULL);
3111}
3112
3113struct swap_table_args {
3114	int set;
3115	int new_set;
3116	int mv;
3117};
3118
3119/*
3120 * Change set for each matching table.
3121 *
3122 * Ensure we dispatch each table once by setting/checking ochange
3123 * fields.
3124 */
3125static void
3126swap_table_set(struct namedobj_instance *ni, struct named_object *no,
3127    void *arg)
3128{
3129	struct table_config *tc;
3130	struct swap_table_args *sta;
3131
3132	tc = (struct table_config *)no;
3133	sta = (struct swap_table_args *)arg;
3134
3135	if (no->set != sta->set && (no->set != sta->new_set || sta->mv != 0))
3136		return;
3137
3138	if (tc->ochanged != 0)
3139		return;
3140
3141	tc->ochanged = 1;
3142	ipfw_objhash_del(ni, no);
3143	if (no->set == sta->set)
3144		no->set = sta->new_set;
3145	else
3146		no->set = sta->set;
3147	ipfw_objhash_add(ni, no);
3148}
3149
3150/*
3151 * Cleans up ochange field for all tables.
3152 */
3153static void
3154clean_table_set_data(struct namedobj_instance *ni, struct named_object *no,
3155    void *arg)
3156{
3157	struct table_config *tc;
3158	struct swap_table_args *sta;
3159
3160	tc = (struct table_config *)no;
3161	sta = (struct swap_table_args *)arg;
3162
3163	tc->ochanged = 0;
3164}
3165
3166/*
3167 * Swaps tables within two sets.
3168 */
3169void
3170ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t set,
3171    uint32_t new_set, int mv)
3172{
3173	struct swap_table_args sta;
3174
3175	IPFW_UH_WLOCK_ASSERT(ch);
3176
3177	sta.set = set;
3178	sta.new_set = new_set;
3179	sta.mv = mv;
3180
3181	ipfw_objhash_foreach(CHAIN_TO_NI(ch), swap_table_set, &sta);
3182	ipfw_objhash_foreach(CHAIN_TO_NI(ch), clean_table_set_data, &sta);
3183}
3184
3185/*
3186 * Move all tables which are reference by rules in @rr to set @new_set.
3187 * Makes sure that all relevant tables are referenced ONLLY by given rules.
3188 *
3189 * Retuns 0 on success,
3190 */
3191int
3192ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
3193    uint32_t new_set)
3194{
3195	struct ip_fw *rule;
3196	struct table_config *tc;
3197	struct named_object *no;
3198	struct namedobj_instance *ni;
3199	int bad, i, l, cmdlen;
3200	uint16_t kidx;
3201	uint8_t type;
3202	ipfw_insn *cmd;
3203
3204	IPFW_UH_WLOCK_ASSERT(ch);
3205
3206	ni = CHAIN_TO_NI(ch);
3207
3208	/* Stage 1: count number of references by given rules */
3209	for (i = 0; i < ch->n_rules - 1; i++) {
3210		rule = ch->map[i];
3211		if (ipfw_match_range(rule, rt) == 0)
3212			continue;
3213
3214		l = rule->cmd_len;
3215		cmd = rule->cmd;
3216		cmdlen = 0;
3217		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
3218			cmdlen = F_LEN(cmd);
3219			if (classify_table_opcode(cmd, &kidx, &type) != 0)
3220				continue;
3221			no = ipfw_objhash_lookup_kidx(ni, kidx);
3222			KASSERT(no != NULL,
3223			    ("objhash lookup failed on index %d", kidx));
3224			tc = (struct table_config *)no;
3225			tc->ocount++;
3226		}
3227
3228	}
3229
3230	/* Stage 2: verify "ownership" */
3231	bad = 0;
3232	for (i = 0; i < ch->n_rules - 1; i++) {
3233		rule = ch->map[i];
3234		if (ipfw_match_range(rule, rt) == 0)
3235			continue;
3236
3237		l = rule->cmd_len;
3238		cmd = rule->cmd;
3239		cmdlen = 0;
3240		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
3241			cmdlen = F_LEN(cmd);
3242			if (classify_table_opcode(cmd, &kidx, &type) != 0)
3243				continue;
3244			no = ipfw_objhash_lookup_kidx(ni, kidx);
3245			KASSERT(no != NULL,
3246			    ("objhash lookup failed on index %d", kidx));
3247			tc = (struct table_config *)no;
3248			if (tc->no.refcnt != tc->ocount) {
3249
3250				/*
3251				 * Number of references differ:
3252				 * Other rule(s) are holding reference to given
3253				 * table, so it is not possible to change its set.
3254				 *
3255				 * Note that refcnt may account
3256				 * references to some going-to-be-added rules.
3257				 * Since we don't know their numbers (and event
3258				 * if they will be added) it is perfectly OK
3259				 * to return error here.
3260				 */
3261				bad = 1;
3262				break;
3263			}
3264		}
3265
3266		if (bad != 0)
3267			break;
3268	}
3269
3270	/* Stage 3: change set or cleanup */
3271	for (i = 0; i < ch->n_rules - 1; i++) {
3272		rule = ch->map[i];
3273		if (ipfw_match_range(rule, rt) == 0)
3274			continue;
3275
3276		l = rule->cmd_len;
3277		cmd = rule->cmd;
3278		cmdlen = 0;
3279		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
3280			cmdlen = F_LEN(cmd);
3281			if (classify_table_opcode(cmd, &kidx, &type) != 0)
3282				continue;
3283			no = ipfw_objhash_lookup_kidx(ni, kidx);
3284			KASSERT(no != NULL,
3285			    ("objhash lookup failed on index %d", kidx));
3286			tc = (struct table_config *)no;
3287
3288			tc->ocount = 0;
3289			if (bad != 0)
3290				continue;
3291
3292			/* Actually change set. */
3293			ipfw_objhash_del(ni, no);
3294			no->set = new_set;
3295			ipfw_objhash_add(ni, no);
3296		}
3297	}
3298
3299	return (bad);
3300}
3301
3302/*
3303 * Finds and bumps refcount for tables referenced by given @rule.
3304 * Auto-creates non-existing tables.
3305 * Fills in @oib array with userland/kernel indexes.
3306 * First free oidx pointer is saved back in @oib.
3307 *
3308 * Returns 0 on success.
3309 */
3310static int
3311find_ref_rule_tables(struct ip_fw_chain *ch, struct ip_fw *rule,
3312    struct rule_check_info *ci, struct obj_idx **oib, struct tid_info *ti)
3313{
3314	struct table_config *tc;
3315	struct namedobj_instance *ni;
3316	struct named_object *no;
3317	int cmdlen, error, l, numnew;
3318	uint16_t kidx;
3319	ipfw_insn *cmd;
3320	struct obj_idx *pidx, *pidx_first, *p;
3321
3322	pidx_first = *oib;
3323	pidx = pidx_first;
3324	l = rule->cmd_len;
3325	cmd = rule->cmd;
3326	cmdlen = 0;
3327	error = 0;
3328	numnew = 0;
3329
3330	IPFW_UH_WLOCK(ch);
3331	ni = CHAIN_TO_NI(ch);
3332
3333	/* Increase refcount on each existing referenced table. */
3334	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
3335		cmdlen = F_LEN(cmd);
3336
3337		if (classify_table_opcode(cmd, &ti->uidx, &ti->type) != 0)
3338			continue;
3339
3340		pidx->uidx = ti->uidx;
3341		pidx->type = ti->type;
3342
3343		if ((tc = find_table(ni, ti)) != NULL) {
3344			if (tc->no.type != ti->type) {
3345				/* Incompatible types */
3346				error = EINVAL;
3347				break;
3348			}
3349
3350			/* Reference found table and save kidx */
3351			tc->no.refcnt++;
3352			pidx->kidx = tc->no.kidx;
3353			pidx++;
3354			continue;
3355		}
3356
3357		/*
3358		 * Compability stuff for old clients:
3359		 * prepare to manually create non-existing tables.
3360		 */
3361		pidx++;
3362		numnew++;
3363	}
3364
3365	if (error != 0) {
3366		/* Unref everything we have already done */
3367		for (p = *oib; p < pidx; p++) {
3368			if (p->kidx == 0)
3369				continue;
3370
3371			/* Find & unref by existing idx */
3372			no = ipfw_objhash_lookup_kidx(ni, p->kidx);
3373			KASSERT(no != NULL, ("Ref'd table %d disappeared",
3374			    p->kidx));
3375
3376			no->refcnt--;
3377		}
3378	}
3379
3380	IPFW_UH_WUNLOCK(ch);
3381
3382	if (numnew == 0) {
3383		*oib = pidx;
3384		return (error);
3385	}
3386
3387	/*
3388	 * Compatibility stuff: do actual creation for non-existing,
3389	 * but referenced tables.
3390	 */
3391	for (p = pidx_first; p < pidx; p++) {
3392		if (p->kidx != 0)
3393			continue;
3394
3395		ti->uidx = p->uidx;
3396		ti->type = p->type;
3397		ti->atype = 0;
3398
3399		error = create_table_compat(ch, ti, &kidx);
3400		if (error == 0) {
3401			p->kidx = kidx;
3402			continue;
3403		}
3404
3405		/* Error. We have to drop references */
3406		IPFW_UH_WLOCK(ch);
3407		for (p = pidx_first; p < pidx; p++) {
3408			if (p->kidx == 0)
3409				continue;
3410
3411			/* Find & unref by existing idx */
3412			no = ipfw_objhash_lookup_kidx(ni, p->kidx);
3413			KASSERT(no != NULL, ("Ref'd table %d disappeared",
3414			    p->kidx));
3415
3416			no->refcnt--;
3417		}
3418		IPFW_UH_WUNLOCK(ch);
3419
3420		return (error);
3421	}
3422
3423	*oib = pidx;
3424
3425	return (error);
3426}
3427
3428/*
3429 * Remove references from every table used in @rule.
3430 */
3431void
3432ipfw_unref_rule_tables(struct ip_fw_chain *chain, struct ip_fw *rule)
3433{
3434	int cmdlen, l;
3435	ipfw_insn *cmd;
3436	struct namedobj_instance *ni;
3437	struct named_object *no;
3438	uint16_t kidx;
3439	uint8_t type;
3440
3441	IPFW_UH_WLOCK_ASSERT(chain);
3442	ni = CHAIN_TO_NI(chain);
3443
3444	l = rule->cmd_len;
3445	cmd = rule->cmd;
3446	cmdlen = 0;
3447	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
3448		cmdlen = F_LEN(cmd);
3449
3450		if (classify_table_opcode(cmd, &kidx, &type) != 0)
3451			continue;
3452
3453		no = ipfw_objhash_lookup_kidx(ni, kidx);
3454
3455		KASSERT(no != NULL, ("table id %d not found", kidx));
3456		KASSERT(no->type == type, ("wrong type %d (%d) for table id %d",
3457		    no->type, type, kidx));
3458		KASSERT(no->refcnt > 0, ("refcount for table %d is %d",
3459		    kidx, no->refcnt));
3460
3461		no->refcnt--;
3462	}
3463}
3464
3465/*
3466 * Compatibility function for old ipfw(8) binaries.
3467 * Rewrites table kernel indices with userland ones.
3468 * Convert tables matching '/^\d+$/' to their atoi() value.
3469 * Use number 65535 for other tables.
3470 *
3471 * Returns 0 on success.
3472 */
3473int
3474ipfw_rewrite_table_kidx(struct ip_fw_chain *chain, struct ip_fw_rule0 *rule)
3475{
3476	int cmdlen, error, l;
3477	ipfw_insn *cmd;
3478	uint16_t kidx, uidx;
3479	uint8_t type;
3480	struct named_object *no;
3481	struct namedobj_instance *ni;
3482
3483	ni = CHAIN_TO_NI(chain);
3484	error = 0;
3485
3486	l = rule->cmd_len;
3487	cmd = rule->cmd;
3488	cmdlen = 0;
3489	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
3490		cmdlen = F_LEN(cmd);
3491
3492		if (classify_table_opcode(cmd, &kidx, &type) != 0)
3493			continue;
3494
3495		if ((no = ipfw_objhash_lookup_kidx(ni, kidx)) == NULL)
3496			return (1);
3497
3498		uidx = no->uidx;
3499		if (no->compat == 0) {
3500
3501			/*
3502			 * We are called via legacy opcode.
3503			 * Save error and show table as fake number
3504			 * not to make ipfw(8) hang.
3505			 */
3506			uidx = 65535;
3507			error = 2;
3508		}
3509
3510		update_table_opcode(cmd, uidx);
3511	}
3512
3513	return (error);
3514}
3515
3516/*
3517 * Checks is opcode is referencing table of appropriate type.
3518 * Adds reference count for found table if true.
3519 * Rewrites user-supplied opcode values with kernel ones.
3520 *
3521 * Returns 0 on success and appropriate error code otherwise.
3522 */
3523int
3524ipfw_rewrite_table_uidx(struct ip_fw_chain *chain,
3525    struct rule_check_info *ci)
3526{
3527	int cmdlen, error, l;
3528	ipfw_insn *cmd;
3529	uint16_t uidx;
3530	uint8_t type;
3531	struct namedobj_instance *ni;
3532	struct obj_idx *p, *pidx_first, *pidx_last;
3533	struct tid_info ti;
3534
3535	ni = CHAIN_TO_NI(chain);
3536
3537	/*
3538	 * Prepare an array for storing opcode indices.
3539	 * Use stack allocation by default.
3540	 */
3541	if (ci->table_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) {
3542		/* Stack */
3543		pidx_first = ci->obuf;
3544	} else
3545		pidx_first = malloc(ci->table_opcodes * sizeof(struct obj_idx),
3546		    M_IPFW, M_WAITOK | M_ZERO);
3547
3548	pidx_last = pidx_first;
3549	error = 0;
3550	type = 0;
3551	memset(&ti, 0, sizeof(ti));
3552
3553	/*
3554	 * Use default set for looking up tables (old way) or
3555	 * use set rule is assigned to (new way).
3556	 */
3557	ti.set = (V_fw_tables_sets != 0) ? ci->krule->set : 0;
3558	if (ci->ctlv != NULL) {
3559		ti.tlvs = (void *)(ci->ctlv + 1);
3560		ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv);
3561	}
3562
3563	/* Reference all used tables */
3564	error = find_ref_rule_tables(chain, ci->krule, ci, &pidx_last, &ti);
3565	if (error != 0)
3566		goto free;
3567
3568	IPFW_UH_WLOCK(chain);
3569
3570	/* Perform rule rewrite */
3571	l = ci->krule->cmd_len;
3572	cmd = ci->krule->cmd;
3573	cmdlen = 0;
3574	p = pidx_first;
3575	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
3576		cmdlen = F_LEN(cmd);
3577		if (classify_table_opcode(cmd, &uidx, &type) != 0)
3578			continue;
3579		update_table_opcode(cmd, p->kidx);
3580		p++;
3581	}
3582
3583	IPFW_UH_WUNLOCK(chain);
3584
3585free:
3586	if (pidx_first != ci->obuf)
3587		free(pidx_first, M_IPFW);
3588
3589	return (error);
3590}
3591
3592static struct ipfw_sopt_handler	scodes[] = {
3593	{ IP_FW_TABLE_XCREATE,	0,	HDIR_SET,	create_table },
3594	{ IP_FW_TABLE_XDESTROY,	0,	HDIR_SET,	flush_table_v0 },
3595	{ IP_FW_TABLE_XFLUSH,	0,	HDIR_SET,	flush_table_v0 },
3596	{ IP_FW_TABLE_XMODIFY,	0,	HDIR_BOTH,	modify_table },
3597	{ IP_FW_TABLE_XINFO,	0,	HDIR_GET,	describe_table },
3598	{ IP_FW_TABLES_XLIST,	0,	HDIR_GET,	list_tables },
3599	{ IP_FW_TABLE_XLIST,	0,	HDIR_GET,	dump_table_v0 },
3600	{ IP_FW_TABLE_XLIST,	1,	HDIR_GET,	dump_table_v1 },
3601	{ IP_FW_TABLE_XADD,	0,	HDIR_BOTH,	manage_table_ent_v0 },
3602	{ IP_FW_TABLE_XADD,	1,	HDIR_BOTH,	manage_table_ent_v1 },
3603	{ IP_FW_TABLE_XDEL,	0,	HDIR_BOTH,	manage_table_ent_v0 },
3604	{ IP_FW_TABLE_XDEL,	1,	HDIR_BOTH,	manage_table_ent_v1 },
3605	{ IP_FW_TABLE_XFIND,	0,	HDIR_GET,	find_table_entry },
3606	{ IP_FW_TABLE_XSWAP,	0,	HDIR_SET,	swap_table },
3607	{ IP_FW_TABLES_ALIST,	0,	HDIR_GET,	list_table_algo },
3608	{ IP_FW_TABLE_XGETSIZE,	0,	HDIR_GET,	get_table_size },
3609};
3610
3611static void
3612destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
3613    void *arg)
3614{
3615
3616	unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
3617	if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
3618		printf("Error unlinking kidx %d from table %s\n",
3619		    no->kidx, no->name);
3620	free_table_config(ni, (struct table_config *)no);
3621}
3622
3623/*
3624 * Shuts tables module down.
3625 */
3626void
3627ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
3628{
3629
3630	IPFW_DEL_SOPT_HANDLER(last, scodes);
3631
3632	/* Remove all tables from working set */
3633	IPFW_UH_WLOCK(ch);
3634	IPFW_WLOCK(ch);
3635	ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
3636	IPFW_WUNLOCK(ch);
3637	IPFW_UH_WUNLOCK(ch);
3638
3639	/* Free pointers itself */
3640	free(ch->tablestate, M_IPFW);
3641
3642	ipfw_table_value_destroy(ch, last);
3643	ipfw_table_algo_destroy(ch);
3644
3645	ipfw_objhash_destroy(CHAIN_TO_NI(ch));
3646	free(CHAIN_TO_TCFG(ch), M_IPFW);
3647}
3648
3649/*
3650 * Starts tables module.
3651 */
3652int
3653ipfw_init_tables(struct ip_fw_chain *ch, int first)
3654{
3655	struct tables_config *tcfg;
3656
3657	/* Allocate pointers */
3658	ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
3659	    M_IPFW, M_WAITOK | M_ZERO);
3660
3661	tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
3662	tcfg->namehash = ipfw_objhash_create(V_fw_tables_max);
3663	ch->tblcfg = tcfg;
3664
3665	ipfw_table_value_init(ch, first);
3666	ipfw_table_algo_init(ch);
3667
3668	IPFW_ADD_SOPT_HANDLER(first, scodes);
3669	return (0);
3670}
3671
3672
3673
3674