1179595Sbenno/*-
2179595Sbenno * Copyright (c) 2014 Yandex LLC
3179595Sbenno * Copyright (c) 2014 Alexander V. Chernikov
4179595Sbenno *
5179595Sbenno * Redistribution and use in source and binary forms, with or without
6179595Sbenno * modification, are permitted provided that the following conditions
7179595Sbenno * are met:
8179595Sbenno * 1. Redistributions of source code must retain the above copyright
9179595Sbenno *    notice, this list of conditions and the following disclaimer.
10179595Sbenno * 2. Redistributions in binary form must reproduce the above copyright
11179595Sbenno *    notice, this list of conditions and the following disclaimer in the
12179595Sbenno *    documentation and/or other materials provided with the distribution.
13179595Sbenno *
14179595Sbenno * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15179595Sbenno * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16179595Sbenno * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17179595Sbenno * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18179595Sbenno * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19179595Sbenno * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20179595Sbenno * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21179595Sbenno * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22179595Sbenno * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23179595Sbenno * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24179595Sbenno * SUCH DAMAGE.
25179595Sbenno */
26179595Sbenno
27179595Sbenno#include <sys/cdefs.h>
28179595Sbenno__FBSDID("$FreeBSD: releng/11.0/sys/netpfil/ipfw/ip_fw_table_value.c 299152 2016-05-06 03:18:51Z ae $");
29179595Sbenno
30179595Sbenno/*
31179595Sbenno * Multi-field value support for ipfw tables.
32179595Sbenno *
33179595Sbenno * This file contains necessary functions to convert
34179595Sbenno * large multi-field values into u32 indices suitable to be fed
35179595Sbenno * to various table algorithms. Other machinery like proper refcounting,
36179595Sbenno * internal structures resizing are also kept here.
37179595Sbenno */
38179595Sbenno
39179595Sbenno#include "opt_ipfw.h"
40179595Sbenno
41179595Sbenno#include <sys/param.h>
42179595Sbenno#include <sys/systm.h>
43179595Sbenno#include <sys/malloc.h>
44179595Sbenno#include <sys/kernel.h>
45179595Sbenno#include <sys/hash.h>
46179595Sbenno#include <sys/lock.h>
47179595Sbenno#include <sys/rwlock.h>
48179595Sbenno#include <sys/rmlock.h>
49179595Sbenno#include <sys/socket.h>
50179595Sbenno#include <sys/socketvar.h>
51179595Sbenno#include <sys/queue.h>
52179595Sbenno#include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
53294883Sjhibbits
54179595Sbenno#include <netinet/in.h>
55179595Sbenno#include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
56179595Sbenno#include <netinet/ip_fw.h>
57179595Sbenno
58179595Sbenno#include <netpfil/ipfw/ip_fw_private.h>
59179595Sbenno#include <netpfil/ipfw/ip_fw_table.h>
60294883Sjhibbits
61179595Sbennostatic uint32_t hash_table_value(struct namedobj_instance *ni, const void *key,
62179595Sbenno    uint32_t kopt);
63179595Sbennostatic int cmp_table_value(struct named_object *no, const void *key,
64179595Sbenno    uint32_t kopt);
65179595Sbenno
66179595Sbennostatic int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
67179595Sbenno    struct sockopt_data *sd);
68179595Sbenno
69179595Sbennostatic struct ipfw_sopt_handler	scodes[] = {
70179595Sbenno	{ IP_FW_TABLE_VLIST,	0,	HDIR_GET,	list_table_values },
71179595Sbenno};
72179595Sbenno
73179595Sbenno#define	CHAIN_TO_VI(chain)	(CHAIN_TO_TCFG(chain)->valhash)
74179595Sbenno
75179595Sbennostruct table_val_link
76179595Sbenno{
77179595Sbenno	struct named_object	no;
78179595Sbenno	struct table_value	*pval;	/* Pointer to real table value */
79179595Sbenno};
80179595Sbenno#define	VALDATA_START_SIZE	64	/* Allocate 64-items array by default */
81179595Sbenno
82179595Sbennostruct vdump_args {
83179595Sbenno	struct ip_fw_chain *ch;
84179595Sbenno	struct sockopt_data *sd;
85179595Sbenno	struct table_value *pval;
86179595Sbenno	int error;
87179595Sbenno};
88179595Sbenno
89179595Sbenno
90179595Sbennostatic uint32_t
91179595Sbennohash_table_value(struct namedobj_instance *ni, const void *key, uint32_t kopt)
92179595Sbenno{
93179595Sbenno
94179595Sbenno	return (hash32_buf(key, 56, 0));
95257337Snwhitehorn}
96179595Sbenno
97179595Sbennostatic int
98179595Sbennocmp_table_value(struct named_object *no, const void *key, uint32_t kopt)
99179595Sbenno{
100179595Sbenno
101179595Sbenno	return (memcmp(((struct table_val_link *)no)->pval, key, 56));
102179595Sbenno}
103179595Sbenno
104179595Sbennostatic void
105179595Sbennomask_table_value(struct table_value *src, struct table_value *dst,
106179595Sbenno    uint32_t mask)
107179595Sbenno{
108179595Sbenno#define	_MCPY(f, b)	if ((mask & (b)) != 0) { dst->f = src->f; }
109179595Sbenno
110179595Sbenno	memset(dst, 0, sizeof(*dst));
111179595Sbenno	_MCPY(tag, IPFW_VTYPE_TAG);
112179595Sbenno	_MCPY(pipe, IPFW_VTYPE_PIPE);
113179595Sbenno	_MCPY(divert, IPFW_VTYPE_DIVERT);
114179595Sbenno	_MCPY(skipto, IPFW_VTYPE_SKIPTO);
115179595Sbenno	_MCPY(netgraph, IPFW_VTYPE_NETGRAPH);
116179595Sbenno	_MCPY(fib, IPFW_VTYPE_FIB);
117179595Sbenno	_MCPY(nat, IPFW_VTYPE_NAT);
118179595Sbenno	_MCPY(dscp, IPFW_VTYPE_DSCP);
119179595Sbenno	_MCPY(nh4, IPFW_VTYPE_NH4);
120179595Sbenno	_MCPY(nh6, IPFW_VTYPE_NH6);
121179595Sbenno	_MCPY(zoneid, IPFW_VTYPE_NH6);
122179595Sbenno#undef	_MCPY
123179595Sbenno}
124179595Sbenno
125179595Sbennostatic void
126179595Sbennoget_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc, int vshared,
127179595Sbenno    struct table_value **ptv, struct namedobj_instance **pvi)
128179595Sbenno{
129179595Sbenno	struct table_value *pval;
130179595Sbenno	struct namedobj_instance *vi;
131179595Sbenno
132179595Sbenno	if (vshared != 0) {
133179595Sbenno		pval = (struct table_value *)ch->valuestate;
134179595Sbenno		vi = CHAIN_TO_VI(ch);
135179595Sbenno	} else {
136179595Sbenno		pval = NULL;
137179595Sbenno		vi = NULL;
138179595Sbenno		//pval = (struct table_value *)&tc->ti.data;
139179595Sbenno	}
140179595Sbenno
141179595Sbenno	if (ptv != NULL)
142179595Sbenno		*ptv = pval;
143179595Sbenno	if (pvi != NULL)
144179595Sbenno		*pvi = vi;
145179595Sbenno}
146179595Sbenno
147179595Sbenno/*
148179595Sbenno * Update pointers to real vaues after @pval change.
149179595Sbenno */
150179595Sbennostatic int
151179595Sbennoupdate_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
152179595Sbenno{
153179595Sbenno	struct vdump_args *da;
154179595Sbenno	struct table_val_link *ptv;
155179595Sbenno	struct table_value *pval;
156179595Sbenno
157179595Sbenno	da = (struct vdump_args *)arg;
158179595Sbenno	ptv = (struct table_val_link *)no;
159179595Sbenno
160179595Sbenno	pval = da->pval;
161179595Sbenno	ptv->pval = &pval[ptv->no.kidx];
162297199Sjhibbits	ptv->no.name = (char *)&pval[ptv->no.kidx];
163179595Sbenno	return (0);
164297199Sjhibbits}
165179595Sbenno
166179595Sbenno/*
167179595Sbenno * Grows value storage shared among all tables.
168179595Sbenno * Drops/reacquires UH locks.
169179595Sbenno * Notifies other running adds on @ch shared storage resize.
170179595Sbenno * Note function does not guarantee that free space
171179595Sbenno * will be available after invocation, so one caller needs
172179595Sbenno * to roll cycle himself.
173179595Sbenno *
174179595Sbenno * Returns 0 if case of no errors.
175179595Sbenno */
176226832Skevlostatic int
177179595Sbennoresize_shared_value_storage(struct ip_fw_chain *ch)
178179595Sbenno{
179179595Sbenno	struct tables_config *tcfg;
180226832Skevlo	struct namedobj_instance *vi;
181226832Skevlo	struct table_value *pval, *valuestate, *old_valuestate;
182226832Skevlo	void *new_idx;
183226832Skevlo	struct vdump_args da;
184179595Sbenno	int new_blocks;
185179595Sbenno	int val_size, val_size_old;
186179595Sbenno
187179595Sbenno	IPFW_UH_WLOCK_ASSERT(ch);
188179701Skevlo
189179701Skevlo	valuestate = NULL;
190179701Skevlo	new_idx = NULL;
191179701Skevlo
192179701Skevlo	pval = (struct table_value *)ch->valuestate;
193179701Skevlo	vi = CHAIN_TO_VI(ch);
194179595Sbenno	tcfg = CHAIN_TO_TCFG(ch);
195179595Sbenno
196179595Sbenno	val_size = tcfg->val_size * 2;
197179595Sbenno
198179595Sbenno	if (val_size == (1 << 30))
199179595Sbenno		return (ENOSPC);
200179595Sbenno
201179595Sbenno	IPFW_UH_WUNLOCK(ch);
202179595Sbenno
203179595Sbenno	valuestate = malloc(sizeof(struct table_value) * val_size, M_IPFW,
204179595Sbenno	    M_WAITOK | M_ZERO);
205179595Sbenno	ipfw_objhash_bitmap_alloc(val_size, (void *)&new_idx,
206179595Sbenno	    &new_blocks);
207179595Sbenno
208179595Sbenno	IPFW_UH_WLOCK(ch);
209179595Sbenno
210179595Sbenno	/*
211179595Sbenno	 * Check if we still need to resize
212179595Sbenno	 */
213179595Sbenno	if (tcfg->val_size >= val_size)
214179595Sbenno		goto done;
215179595Sbenno
216179595Sbenno	/* Update pointers and notify everyone we're changing @ch */
217179595Sbenno	pval = (struct table_value *)ch->valuestate;
218179595Sbenno	rollback_toperation_state(ch, ch);
219179595Sbenno
220179595Sbenno	/* Good. Let's merge */
221179595Sbenno	memcpy(valuestate, pval, sizeof(struct table_value) * tcfg->val_size);
222179595Sbenno	ipfw_objhash_bitmap_merge(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
223179595Sbenno
224179595Sbenno	IPFW_WLOCK(ch);
225179595Sbenno	/* Change pointers */
226179595Sbenno	old_valuestate = ch->valuestate;
227294883Sjhibbits	ch->valuestate = valuestate;
228179595Sbenno	valuestate = old_valuestate;
229179595Sbenno	ipfw_objhash_bitmap_swap(CHAIN_TO_VI(ch), &new_idx, &new_blocks);
230179595Sbenno
231179595Sbenno	val_size_old = tcfg->val_size;
232179595Sbenno	tcfg->val_size = val_size;
233179595Sbenno	val_size = val_size_old;
234179595Sbenno	IPFW_WUNLOCK(ch);
235179595Sbenno	/* Update pointers to reflect resize */
236179595Sbenno	memset(&da, 0, sizeof(da));
237179595Sbenno	da.pval = (struct table_value *)ch->valuestate;
238179595Sbenno	ipfw_objhash_foreach(vi, update_tvalue, &da);
239179595Sbenno
240179595Sbennodone:
241179595Sbenno	free(valuestate, M_IPFW);
242179595Sbenno	ipfw_objhash_bitmap_free(new_idx, new_blocks);
243179595Sbenno
244179595Sbenno	return (0);
245179595Sbenno}
246179595Sbenno
247179595Sbenno/*
248179595Sbenno * Drops reference for table value with index @kidx, stored in @pval and
249179595Sbenno * @vi. Frees value if it has no references.
250179595Sbenno */
251179595Sbennostatic void
252179595Sbennounref_table_value(struct namedobj_instance *vi, struct table_value *pval,
253179595Sbenno    uint32_t kidx)
254179595Sbenno{
255179595Sbenno	struct table_val_link *ptvl;
256179595Sbenno
257179595Sbenno	KASSERT(pval[kidx].refcnt > 0, ("Refcount is 0 on kidx %d", kidx));
258179595Sbenno	if (--pval[kidx].refcnt > 0)
259179595Sbenno		return;
260179595Sbenno
261179595Sbenno	/* Last reference, delete item */
262179595Sbenno	ptvl = (struct table_val_link *)ipfw_objhash_lookup_kidx(vi, kidx);
263179595Sbenno	KASSERT(ptvl != NULL, ("lookup on value kidx %d failed", kidx));
264179595Sbenno	ipfw_objhash_del(vi, &ptvl->no);
265179595Sbenno	ipfw_objhash_free_idx(vi, kidx);
266179595Sbenno	free(ptvl, M_IPFW);
267179595Sbenno}
268179595Sbenno
269179595Sbennostruct flush_args {
270179595Sbenno	struct ip_fw_chain *ch;
271179595Sbenno	struct table_algo *ta;
272179595Sbenno	struct table_info *ti;
273179595Sbenno	void *astate;
274179595Sbenno	ipfw_obj_tentry tent;
275179595Sbenno};
276179595Sbenno
277179595Sbennostatic int
278179595Sbennounref_table_value_cb(void *e, void *arg)
279179595Sbenno{
280179595Sbenno	struct flush_args *fa;
281179595Sbenno	struct ip_fw_chain *ch;
282179595Sbenno	struct table_algo *ta;
283179595Sbenno	ipfw_obj_tentry *tent;
284179595Sbenno	int error;
285179595Sbenno
286179595Sbenno	fa = (struct flush_args *)arg;
287179595Sbenno
288179595Sbenno	ta = fa->ta;
289179595Sbenno	memset(&fa->tent, 0, sizeof(fa->tent));
290179595Sbenno	tent = &fa->tent;
291179595Sbenno	error = ta->dump_tentry(fa->astate, fa->ti, e, tent);
292179595Sbenno	if (error != 0)
293179595Sbenno		return (error);
294179595Sbenno
295179595Sbenno	ch = fa->ch;
296179595Sbenno
297179595Sbenno	unref_table_value(CHAIN_TO_VI(ch),
298179595Sbenno	    (struct table_value *)ch->valuestate, tent->v.kidx);
299179595Sbenno
300179595Sbenno	return (0);
301179595Sbenno}
302179595Sbenno
303179595Sbenno/*
304179595Sbenno * Drop references for each value used in @tc.
305179595Sbenno */
306179595Sbennovoid
307179595Sbennoipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc,
308179595Sbenno    struct table_algo *ta, void *astate, struct table_info *ti)
309179595Sbenno{
310179595Sbenno	struct flush_args fa;
311179595Sbenno
312179595Sbenno	IPFW_UH_WLOCK_ASSERT(ch);
313179595Sbenno
314179595Sbenno	memset(&fa, 0, sizeof(fa));
315179595Sbenno	fa.ch = ch;
316179595Sbenno	fa.ta = ta;
317179595Sbenno	fa.astate = astate;
318179595Sbenno	fa.ti = ti;
319179595Sbenno
320179595Sbenno	ta->foreach(astate, ti, unref_table_value_cb, &fa);
321179595Sbenno}
322179595Sbenno
323179595Sbenno/*
324179701Skevlo * Table operation state handler.
325179701Skevlo * Called when we are going to change something in @tc which
326179701Skevlo * may lead to inconsistencies in on-going table data addition.
327179595Sbenno *
328179701Skevlo * Here we rollback all already committed state (table values, currently)
329179595Sbenno * and set "modified" field to non-zero value to indicate
330179701Skevlo * that we need to restart original operation.
331179701Skevlo */
332179701Skevlovoid
333179595Sbennorollback_table_values(struct tableop_state *ts)
334179701Skevlo{
335179701Skevlo	struct ip_fw_chain *ch;
336179701Skevlo	struct table_value *pval;
337179701Skevlo	struct tentry_info *ptei;
338179595Sbenno	struct namedobj_instance *vi;
339179595Sbenno	int i;
340179595Sbenno
341179595Sbenno	ch = ts->ch;
342179595Sbenno
343179595Sbenno	IPFW_UH_WLOCK_ASSERT(ch);
344179595Sbenno
345179595Sbenno	/* Get current table value pointer */
346179595Sbenno	get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi);
347179595Sbenno
348179595Sbenno	for (i = 0; i < ts->count; i++) {
349179595Sbenno		ptei = &ts->tei[i];
350179595Sbenno
351179595Sbenno		if (ptei->value == 0)
352179595Sbenno			continue;
353179595Sbenno
354294883Sjhibbits		unref_table_value(vi, pval, ptei->value);
355179595Sbenno	}
356179595Sbenno}
357179595Sbenno
358179595Sbenno/*
359179595Sbenno * Allocate new value index in either shared or per-table array.
360179595Sbenno * Function may drop/reacquire UH lock.
361179595Sbenno *
362179595Sbenno * Returns 0 on success.
363179595Sbenno */
364179595Sbennostatic int
365179595Sbennoalloc_table_vidx(struct ip_fw_chain *ch, struct tableop_state *ts,
366179595Sbenno    struct namedobj_instance *vi, uint16_t *pvidx)
367179595Sbenno{
368179595Sbenno	int error, vlimit;
369179595Sbenno	uint16_t vidx;
370179595Sbenno
371179595Sbenno	IPFW_UH_WLOCK_ASSERT(ch);
372179595Sbenno
373179595Sbenno	error = ipfw_objhash_alloc_idx(vi, &vidx);
374179595Sbenno	if (error != 0) {
375179595Sbenno
376179595Sbenno		/*
377179595Sbenno		 * We need to resize array. This involves
378179595Sbenno		 * lock/unlock, so we need to check "modified"
379179595Sbenno		 * state.
380179595Sbenno		 */
381179595Sbenno		ts->opstate.func(ts->tc, &ts->opstate);
382179595Sbenno		error = resize_shared_value_storage(ch);
383179595Sbenno		return (error); /* ts->modified should be set, we will restart */
384179595Sbenno	}
385179595Sbenno
386179595Sbenno	vlimit = ts->ta->vlimit;
387179595Sbenno	if (vlimit != 0 && vidx >= vlimit) {
388179595Sbenno
389179595Sbenno		/*
390179595Sbenno		 * Algorithm is not able to store given index.
391179595Sbenno		 * We have to rollback state, start using
392179595Sbenno		 * per-table value array or return error
393297000Sjhibbits		 * if we're already using it.
394179595Sbenno		 *
395179595Sbenno		 * TODO: do not rollback state if
396179595Sbenno		 * atomicity is not required.
397179595Sbenno		 */
398		if (ts->vshared != 0) {
399			/* shared -> per-table  */
400			return (ENOSPC); /* TODO: proper error */
401		}
402
403		/* per-table. Fail for now. */
404		return (ENOSPC); /* TODO: proper error */
405	}
406
407	*pvidx = vidx;
408	return (0);
409}
410
411/*
412 * Drops value reference for unused values (updates, deletes, partially
413 * successful adds or rollbacks).
414 */
415void
416ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc,
417    struct tentry_info *tei, uint32_t count, int rollback)
418{
419	int i;
420	struct tentry_info *ptei;
421	struct table_value *pval;
422	struct namedobj_instance *vi;
423
424	/*
425	 * We have two slightly different ADD cases here:
426	 * either (1) we are successful / partially successful,
427	 * in that case we need
428	 * * to ignore ADDED entries values
429	 * * rollback every other values (either UPDATED since
430	 *   old value has been stored there, or some failure like
431	 *   EXISTS or LIMIT or simply "ignored" case.
432	 *
433	 * (2): atomic rollback of partially successful operation
434	 * in that case we simply need to unref all entries.
435	 *
436	 * DELETE case is simpler: no atomic support there, so
437	 * we simply unref all non-zero values.
438	 */
439
440	/*
441	 * Get current table value pointers.
442	 * XXX: Properly read vshared
443	 */
444	get_value_ptrs(ch, tc, 1, &pval, &vi);
445
446	for (i = 0; i < count; i++) {
447		ptei = &tei[i];
448
449		if (ptei->value == 0) {
450
451			/*
452			 * We may be deleting non-existing record.
453			 * Skip.
454			 */
455			continue;
456		}
457
458		if ((ptei->flags & TEI_FLAGS_ADDED) != 0 && rollback == 0) {
459			ptei->value = 0;
460			continue;
461		}
462
463		unref_table_value(vi, pval, ptei->value);
464		ptei->value = 0;
465	}
466}
467
468/*
469 * Main function used to link values of entries going to be added,
470 * to the index. Since we may perform many UH locks drops/acquires,
471 * handle changes by checking tablestate "modified" field.
472 *
473 * Success: return 0.
474 */
475int
476ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts)
477{
478	int error, i, found;
479	struct namedobj_instance *vi;
480	struct table_config *tc;
481	struct tentry_info *tei, *ptei;
482	uint32_t count, vlimit;
483	uint16_t vidx;
484	struct table_val_link *ptv;
485	struct table_value tval, *pval;
486
487	/*
488	 * Stage 1: reference all existing values and
489	 * save their indices.
490	 */
491	IPFW_UH_WLOCK_ASSERT(ch);
492	get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi);
493
494	error = 0;
495	found = 0;
496	vlimit = ts->ta->vlimit;
497	vidx = 0;
498	tc = ts->tc;
499	tei = ts->tei;
500	count = ts->count;
501	for (i = 0; i < count; i++) {
502		ptei = &tei[i];
503		ptei->value = 0; /* Ensure value is always 0 in the beginning */
504		mask_table_value(ptei->pvalue, &tval, ts->vmask);
505		ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
506		    (char *)&tval);
507		if (ptv == NULL)
508			continue;
509		/* Deal with vlimit later */
510		if (vlimit > 0 && vlimit <= ptv->no.kidx)
511			continue;
512
513		/* Value found. Bump refcount */
514		ptv->pval->refcnt++;
515		ptei->value = ptv->no.kidx;
516		found++;
517	}
518
519	if (ts->count == found) {
520		/* We've found all values , no need ts create new ones */
521		return (0);
522	}
523
524	/*
525	 * we have added some state here, let's attach operation
526	 * state ts the list ts be able ts rollback if necessary.
527	 */
528	add_toperation_state(ch, ts);
529	/* Ensure table won't disappear */
530	tc_ref(tc);
531	IPFW_UH_WUNLOCK(ch);
532
533	/*
534	 * Stage 2: allocate objects for non-existing values.
535	 */
536	for (i = 0; i < count; i++) {
537		ptei = &tei[i];
538		if (ptei->value != 0)
539			continue;
540		if (ptei->ptv != NULL)
541			continue;
542		ptei->ptv = malloc(sizeof(struct table_val_link), M_IPFW,
543		    M_WAITOK | M_ZERO);
544	}
545
546	/*
547	 * Stage 3: allocate index numbers for new values
548	 * and link them to index.
549	 */
550	IPFW_UH_WLOCK(ch);
551	tc_unref(tc);
552	del_toperation_state(ch, ts);
553	if (ts->modified != 0) {
554
555		/*
556		 * In general, we should free all state/indexes here
557		 * and return. However, we keep allocated state instead
558		 * to ensure we achieve some progress on each restart.
559		 */
560		return (0);
561	}
562
563	KASSERT(pval == ch->valuestate, ("resize_storage() notify failure"));
564
565	/* Let's try to link values */
566	for (i = 0; i < count; i++) {
567		ptei = &tei[i];
568
569		/* Check if record has appeared */
570		mask_table_value(ptei->pvalue, &tval, ts->vmask);
571		ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0,
572		    (char *)&tval);
573		if (ptv != NULL) {
574			ptv->pval->refcnt++;
575			ptei->value = ptv->no.kidx;
576			continue;
577		}
578
579		/* May perform UH unlock/lock */
580		error = alloc_table_vidx(ch, ts, vi, &vidx);
581		if (error != 0) {
582			ts->opstate.func(ts->tc, &ts->opstate);
583			return (error);
584		}
585		/* value storage resize has happened, return */
586		if (ts->modified != 0)
587			return (0);
588
589		/* Finally, we have allocated valid index, let's add entry */
590		ptei->value = vidx;
591		ptv = (struct table_val_link *)ptei->ptv;
592		ptei->ptv = NULL;
593
594		ptv->no.kidx = vidx;
595		ptv->no.name = (char *)&pval[vidx];
596		ptv->pval = &pval[vidx];
597		memcpy(ptv->pval, &tval, sizeof(struct table_value));
598		pval[vidx].refcnt = 1;
599		ipfw_objhash_add(vi, &ptv->no);
600	}
601
602	return (0);
603}
604
605/*
606 * Compatibility function used to import data from old
607 * IP_FW_TABLE_ADD / IP_FW_TABLE_XADD opcodes.
608 */
609void
610ipfw_import_table_value_legacy(uint32_t value, struct table_value *v)
611{
612
613	memset(v, 0, sizeof(*v));
614	v->tag = value;
615	v->pipe = value;
616	v->divert = value;
617	v->skipto = value;
618	v->netgraph = value;
619	v->fib = value;
620	v->nat = value;
621	v->nh4 = value; /* host format */
622	v->dscp = value;
623	v->limit = value;
624}
625
626/*
627 * Export data to legacy table dumps opcodes.
628 */
629uint32_t
630ipfw_export_table_value_legacy(struct table_value *v)
631{
632
633	/*
634	 * TODO: provide more compatibility depending on
635	 * vmask value.
636	 */
637	return (v->tag);
638}
639
640/*
641 * Imports table value from current userland format.
642 * Saves value in kernel format to the same place.
643 */
644void
645ipfw_import_table_value_v1(ipfw_table_value *iv)
646{
647	struct table_value v;
648
649	memset(&v, 0, sizeof(v));
650	v.tag = iv->tag;
651	v.pipe = iv->pipe;
652	v.divert = iv->divert;
653	v.skipto = iv->skipto;
654	v.netgraph = iv->netgraph;
655	v.fib = iv->fib;
656	v.nat = iv->nat;
657	v.dscp = iv->dscp;
658	v.nh4 = iv->nh4;
659	v.nh6 = iv->nh6;
660	v.limit = iv->limit;
661	v.zoneid = iv->zoneid;
662
663	memcpy(iv, &v, sizeof(ipfw_table_value));
664}
665
666/*
667 * Export real table value @v to current userland format.
668 * Note that @v and @piv may point to the same memory.
669 */
670void
671ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *piv)
672{
673	ipfw_table_value iv;
674
675	memset(&iv, 0, sizeof(iv));
676	iv.tag = v->tag;
677	iv.pipe = v->pipe;
678	iv.divert = v->divert;
679	iv.skipto = v->skipto;
680	iv.netgraph = v->netgraph;
681	iv.fib = v->fib;
682	iv.nat = v->nat;
683	iv.dscp = v->dscp;
684	iv.limit = v->limit;
685	iv.nh4 = v->nh4;
686	iv.nh6 = v->nh6;
687	iv.zoneid = v->zoneid;
688
689	memcpy(piv, &iv, sizeof(iv));
690}
691
692/*
693 * Exports real value data into ipfw_table_value structure.
694 * Utilizes "spare1" field to store kernel index.
695 */
696static int
697dump_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
698{
699	struct vdump_args *da;
700	struct table_val_link *ptv;
701	struct table_value *v;
702
703	da = (struct vdump_args *)arg;
704	ptv = (struct table_val_link *)no;
705
706	v = (struct table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v));
707	/* Out of memory, returning */
708	if (v == NULL) {
709		da->error = ENOMEM;
710		return (ENOMEM);
711	}
712
713	memcpy(v, ptv->pval, sizeof(*v));
714	v->spare1 = ptv->no.kidx;
715	return (0);
716}
717
718/*
719 * Dumps all shared/table value data
720 * Data layout (v1)(current):
721 * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
722 * Reply: [ ipfw_obj_lheader ipfw_table_value x N ]
723 *
724 * Returns 0 on success
725 */
726static int
727list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
728    struct sockopt_data *sd)
729{
730	struct _ipfw_obj_lheader *olh;
731	struct namedobj_instance *vi;
732	struct vdump_args da;
733	uint32_t count, size;
734
735	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
736	if (olh == NULL)
737		return (EINVAL);
738	if (sd->valsize < olh->size)
739		return (EINVAL);
740
741	IPFW_UH_RLOCK(ch);
742	vi = CHAIN_TO_VI(ch);
743
744	count = ipfw_objhash_count(vi);
745	size = count * sizeof(ipfw_table_value) + sizeof(ipfw_obj_lheader);
746
747	/* Fill in header regadless of buffer size */
748	olh->count = count;
749	olh->objsize = sizeof(ipfw_table_value);
750
751	if (size > olh->size) {
752		olh->size = size;
753		IPFW_UH_RUNLOCK(ch);
754		return (ENOMEM);
755	}
756	olh->size = size;
757
758	/*
759	 * Do the actual value dump
760	 */
761	memset(&da, 0, sizeof(da));
762	da.ch = ch;
763	da.sd = sd;
764	ipfw_objhash_foreach(vi, dump_tvalue, &da);
765
766	IPFW_UH_RUNLOCK(ch);
767
768	return (0);
769}
770
771void
772ipfw_table_value_init(struct ip_fw_chain *ch, int first)
773{
774	struct tables_config *tcfg;
775
776	ch->valuestate = malloc(VALDATA_START_SIZE * sizeof(struct table_value),
777	    M_IPFW, M_WAITOK | M_ZERO);
778
779	tcfg = ch->tblcfg;
780
781	tcfg->val_size = VALDATA_START_SIZE;
782	tcfg->valhash = ipfw_objhash_create(tcfg->val_size);
783	ipfw_objhash_set_funcs(tcfg->valhash, hash_table_value,
784	    cmp_table_value);
785
786	IPFW_ADD_SOPT_HANDLER(first, scodes);
787}
788
789static int
790destroy_value(struct namedobj_instance *ni, struct named_object *no,
791    void *arg)
792{
793
794	free(no, M_IPFW);
795	return (0);
796}
797
798void
799ipfw_table_value_destroy(struct ip_fw_chain *ch, int last)
800{
801
802	IPFW_DEL_SOPT_HANDLER(last, scodes);
803
804	free(ch->valuestate, M_IPFW);
805	ipfw_objhash_foreach(CHAIN_TO_VI(ch), destroy_value, ch);
806	ipfw_objhash_destroy(CHAIN_TO_VI(ch));
807}
808
809