ip_sync.c revision 1.2
1/*	$NetBSD: ip_sync.c,v 1.2 2012/02/15 17:55:04 riz Exp $	*/
2
3/*
4 * Copyright (C) 1995-1998 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#if defined(KERNEL) || defined(_KERNEL)
9# undef KERNEL
10# undef _KERNEL
11# define        KERNEL	1
12# define        _KERNEL	1
13#endif
14#include <sys/errno.h>
15#include <sys/types.h>
16#include <sys/param.h>
17#include <sys/file.h>
18#if !defined(_KERNEL) && !defined(__KERNEL__)
19# include <stdio.h>
20# include <stdlib.h>
21# include <string.h>
22# define _KERNEL
23# define KERNEL
24# ifdef __OpenBSD__
25struct file;
26# endif
27# include <sys/uio.h>
28# undef _KERNEL
29# undef KERNEL
30#else
31# include <sys/systm.h>
32# if !defined(__SVR4) && !defined(__svr4__)
33#  include <sys/mbuf.h>
34# endif
35# include <sys/select.h>
36# if __FreeBSD_version >= 500000
37#  include <sys/selinfo.h>
38# endif
39#endif
40#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
41# include <sys/proc.h>
42#endif
43#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
44# include <sys/filio.h>
45# include <sys/fcntl.h>
46# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
47#  include "opt_ipfilter.h"
48# endif
49#else
50# include <sys/ioctl.h>
51#endif
52#include <sys/time.h>
53#if !defined(linux)
54# include <sys/protosw.h>
55#endif
56#include <sys/socket.h>
57#if defined(__SVR4) || defined(__svr4__)
58# include <sys/filio.h>
59# include <sys/byteorder.h>
60# ifdef _KERNEL
61#  include <sys/dditypes.h>
62# endif
63# include <sys/stream.h>
64# include <sys/kmem.h>
65#endif
66
67#include <net/if.h>
68#ifdef sun
69# include <net/af.h>
70#endif
71#include <netinet/in.h>
72#include <netinet/in_systm.h>
73#include <netinet/ip.h>
74#include <netinet/tcp.h>
75#if !defined(linux)
76# include <netinet/ip_var.h>
77#endif
78#if !defined(__hpux) && !defined(linux)
79# include <netinet/tcp_fsm.h>
80#endif
81#include <netinet/udp.h>
82#include <netinet/ip_icmp.h>
83#include "netinet/ip_compat.h"
84#include <netinet/tcpip.h>
85#include "netinet/ip_fil.h"
86#include "netinet/ip_nat.h"
87#include "netinet/ip_frag.h"
88#include "netinet/ip_state.h"
89#include "netinet/ip_proxy.h"
90#include "netinet/ip_sync.h"
91#ifdef  USE_INET6
92#include <netinet/icmp6.h>
93#endif
94#if (__FreeBSD_version >= 300000)
95# include <sys/malloc.h>
96# if defined(_KERNEL) && !defined(IPFILTER_LKM)
97#  include <sys/libkern.h>
98#  include <sys/systm.h>
99# endif
100#endif
101/* END OF INCLUDES */
102
103#if !defined(lint)
104static const char rcsid[] = "@(#)Id: ip_sync.c,v 2.40.2.17 2009/12/27 06:55:22 darrenr Exp";
105#endif
106
107#define	SYNC_STATETABSZ	256
108#define	SYNC_NATTABSZ	256
109
110#ifdef	IPFILTER_SYNC
111# if SOLARIS && defined(_KERNEL)
112extern	struct pollhead	iplpollhead[IPL_LOGSIZE];
113# endif
114
115ipfmutex_t	ipf_syncadd, ipsl_mutex;
116ipfrwlock_t	ipf_syncstate, ipf_syncnat;
117#if SOLARIS && defined(_KERNEL)
118kcondvar_t	ipslwait;
119#endif
120synclist_t	*syncstatetab[SYNC_STATETABSZ];
121synclist_t	*syncnattab[SYNC_NATTABSZ];
122synclogent_t	synclog[SYNCLOG_SZ];
123syncupdent_t	syncupd[SYNCLOG_SZ];
124u_int		ipf_syncnum = 1;
125u_int		ipf_syncwrap = 0;
126u_int		sl_idx = 0,	/* next available sync log entry */
127		su_idx = 0,	/* next available sync update entry */
128		sl_tail = 0,	/* next sync log entry to read */
129		su_tail = 0;	/* next sync update entry to read */
130int		ipf_sync_debug = 0;
131
132
133# if !defined(sparc) && !defined(__hppa)
134void ipfsync_tcporder(int, struct tcpdata *);
135void ipfsync_natorder(int, struct nat *);
136void ipfsync_storder(int, struct ipstate *);
137# endif
138
139
140/* ------------------------------------------------------------------------ */
141/* Function:    ipfsync_init                                                */
142/* Returns:     int - 0 == success, -1 == failure                           */
143/* Parameters:  Nil                                                         */
144/*                                                                          */
145/* Initialise all of the locks required for the sync code and initialise    */
146/* any data structures, as required.                                        */
147/* ------------------------------------------------------------------------ */
148int
149ipfsync_init(void)
150{
151	RWLOCK_INIT(&ipf_syncstate, "add things to state sync table");
152	RWLOCK_INIT(&ipf_syncnat, "add things to nat sync table");
153	MUTEX_INIT(&ipf_syncadd, "add things to sync table");
154	MUTEX_INIT(&ipsl_mutex, "add things to sync table");
155# if SOLARIS && defined(_KERNEL)
156	cv_init(&ipslwait, "ipsl condvar", CV_DRIVER, NULL);
157# endif
158
159	bzero((char *)syncnattab, sizeof(syncnattab));
160	bzero((char *)syncstatetab, sizeof(syncstatetab));
161
162	return 0;
163}
164
165
166# if !defined(sparc) && !defined(__hppa)
167/* ------------------------------------------------------------------------ */
168/* Function:    ipfsync_tcporder                                            */
169/* Returns:     Nil                                                         */
170/* Parameters:  way(I) - direction of byte order conversion.                */
171/*              td(IO) - pointer to data to be converted.                   */
172/*                                                                          */
173/* Do byte swapping on values in the TCP state information structure that   */
174/* need to be used at both ends by the host in their native byte order.     */
175/* ------------------------------------------------------------------------ */
176void
177ipfsync_tcporder(int way, tcpdata_t *td)
178{
179	if (way) {
180		td->td_maxwin = htons(td->td_maxwin);
181		td->td_end = htonl(td->td_end);
182		td->td_maxend = htonl(td->td_maxend);
183	} else {
184		td->td_maxwin = ntohs(td->td_maxwin);
185		td->td_end = ntohl(td->td_end);
186		td->td_maxend = ntohl(td->td_maxend);
187	}
188}
189
190
191/* ------------------------------------------------------------------------ */
192/* Function:    ipfsync_natorder                                            */
193/* Returns:     Nil                                                         */
194/* Parameters:  way(I)  - direction of byte order conversion.               */
195/*              nat(IO) - pointer to data to be converted.                  */
196/*                                                                          */
197/* Do byte swapping on values in the NAT data structure that need to be     */
198/* used at both ends by the host in their native byte order.                */
199/* ------------------------------------------------------------------------ */
200void
201ipfsync_natorder(int way, nat_t *n)
202{
203	if (way) {
204		n->nat_age = htonl(n->nat_age);
205		n->nat_flags = htonl(n->nat_flags);
206		n->nat_ipsumd = htonl(n->nat_ipsumd);
207		n->nat_use = htonl(n->nat_use);
208		n->nat_dir = htonl(n->nat_dir);
209	} else {
210		n->nat_age = ntohl(n->nat_age);
211		n->nat_flags = ntohl(n->nat_flags);
212		n->nat_ipsumd = ntohl(n->nat_ipsumd);
213		n->nat_use = ntohl(n->nat_use);
214		n->nat_dir = ntohl(n->nat_dir);
215	}
216}
217
218
219/* ------------------------------------------------------------------------ */
220/* Function:    ipfsync_storder                                             */
221/* Returns:     Nil                                                         */
222/* Parameters:  way(I)  - direction of byte order conversion.               */
223/*              ips(IO) - pointer to data to be converted.                  */
224/*                                                                          */
225/* Do byte swapping on values in the IP state data structure that need to   */
226/* be used at both ends by the host in their native byte order.             */
227/* ------------------------------------------------------------------------ */
228void
229ipfsync_storder(int way, ipstate_t *ips)
230{
231	ipfsync_tcporder(way, &ips->is_tcp.ts_data[0]);
232	ipfsync_tcporder(way, &ips->is_tcp.ts_data[1]);
233
234	if (way) {
235		ips->is_hv = htonl(ips->is_hv);
236		ips->is_die = htonl(ips->is_die);
237		ips->is_pass = htonl(ips->is_pass);
238		ips->is_flags = htonl(ips->is_flags);
239		ips->is_opt[0] = htonl(ips->is_opt[0]);
240		ips->is_opt[1] = htonl(ips->is_opt[1]);
241		ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
242		ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
243		ips->is_sec = htons(ips->is_sec);
244		ips->is_secmsk = htons(ips->is_secmsk);
245		ips->is_auth = htons(ips->is_auth);
246		ips->is_authmsk = htons(ips->is_authmsk);
247		ips->is_s0[0] = htonl(ips->is_s0[0]);
248		ips->is_s0[1] = htonl(ips->is_s0[1]);
249		ips->is_smsk[0] = htons(ips->is_smsk[0]);
250		ips->is_smsk[1] = htons(ips->is_smsk[1]);
251	} else {
252		ips->is_hv = ntohl(ips->is_hv);
253		ips->is_die = ntohl(ips->is_die);
254		ips->is_pass = ntohl(ips->is_pass);
255		ips->is_flags = ntohl(ips->is_flags);
256		ips->is_opt[0] = ntohl(ips->is_opt[0]);
257		ips->is_opt[1] = ntohl(ips->is_opt[1]);
258		ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
259		ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
260		ips->is_sec = ntohs(ips->is_sec);
261		ips->is_secmsk = ntohs(ips->is_secmsk);
262		ips->is_auth = ntohs(ips->is_auth);
263		ips->is_authmsk = ntohs(ips->is_authmsk);
264		ips->is_s0[0] = ntohl(ips->is_s0[0]);
265		ips->is_s0[1] = ntohl(ips->is_s0[1]);
266		ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
267		ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
268	}
269}
270# else /* !defined(sparc) && !defined(__hppa) */
271#  define	ipfsync_tcporder(x,y)
272#  define	ipfsync_natorder(x,y)
273#  define	ipfsync_storder(x,y)
274# endif /* !defined(sparc) && !defined(__hppa) */
275
276/* enable this for debugging */
277
278# ifdef _KERNEL
279/* ------------------------------------------------------------------------ */
280/* Function:    ipfsync_write                                               */
281/* Returns:     int    - 0 == success, else error value.                    */
282/* Parameters:  uio(I) - pointer to information about data to write         */
283/*                                                                          */
284/* Moves data from user space into the kernel and uses it for updating data */
285/* structures in the state/NAT tables.                                      */
286/* ------------------------------------------------------------------------ */
287int
288ipfsync_write(struct uio *uio)
289{
290	synchdr_t sh;
291
292	/*
293	 * THIS MUST BE SUFFICIENT LARGE TO STORE
294	 * ANY POSSIBLE DATA TYPE
295	 */
296	char data[2048];
297
298	int err = 0;
299
300#  if (BSD >= 199306) || defined(__FreeBSD__) || defined(__osf__)
301	uio->uio_rw = UIO_WRITE;
302#  endif
303
304	/* Try to get bytes */
305	while (uio->uio_resid > 0) {
306
307		if (uio->uio_resid >= sizeof(sh)) {
308
309			err = UIOMOVE(&sh, sizeof(sh), UIO_WRITE, uio);
310
311			if (err) {
312				if (ipf_sync_debug > 2)
313					printf("uiomove(header) failed: %d\n",
314						err);
315				return err;
316			}
317
318			/* convert to host order */
319			sh.sm_magic = ntohl(sh.sm_magic);
320			sh.sm_len = ntohl(sh.sm_len);
321			sh.sm_num = ntohl(sh.sm_num);
322
323			if (ipf_sync_debug > 8)
324				printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
325					sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
326					sh.sm_table, sh.sm_rev, sh.sm_len,
327					sh.sm_magic);
328
329			if (sh.sm_magic != SYNHDRMAGIC) {
330				if (ipf_sync_debug > 2)
331					printf("uiomove(header) invalud %s\n",
332						"magic");
333				return EINVAL;
334			}
335
336			if (sh.sm_v != 4 && sh.sm_v != 6) {
337				if (ipf_sync_debug > 2)
338					printf("uiomove(header) invalid %s\n",
339						"protocol");
340				return EINVAL;
341			}
342
343			if (sh.sm_cmd > SMC_MAXCMD) {
344				if (ipf_sync_debug > 2)
345					printf("uiomove(header) invalid %s\n",
346						"command");
347				return EINVAL;
348			}
349
350
351			if (sh.sm_table > SMC_MAXTBL) {
352				if (ipf_sync_debug > 2)
353					printf("uiomove(header) invalid %s\n",
354						"table");
355				return EINVAL;
356			}
357
358		} else {
359			/* unsufficient data, wait until next call */
360			if (ipf_sync_debug > 2)
361				printf("uiomove(header) insufficient data");
362			return EAGAIN;
363	 	}
364
365
366		/*
367		 * We have a header, so try to read the amount of data
368		 * needed for the request
369		 */
370
371		/* not supported */
372		if (sh.sm_len == 0) {
373			if (ipf_sync_debug > 2)
374				printf("uiomove(data zero length %s\n",
375					"not supported");
376			return EINVAL;
377		}
378
379		if (uio->uio_resid >= sh.sm_len) {
380
381			err = UIOMOVE(data, sh.sm_len, UIO_WRITE, uio);
382
383			if (err) {
384				if (ipf_sync_debug > 2)
385					printf("uiomove(data) failed: %d\n",
386						err);
387				return err;
388			}
389
390			if (ipf_sync_debug > 7)
391				printf("uiomove(data) %d bytes read\n",
392					sh.sm_len);
393
394			if (sh.sm_table == SMC_STATE)
395				err = ipfsync_state(&sh, data);
396			else if (sh.sm_table == SMC_NAT)
397				err = ipfsync_nat(&sh, data);
398			if (ipf_sync_debug > 7)
399				printf("[%d] Finished with error %d\n",
400					sh.sm_num, err);
401
402		} else {
403			/* insufficient data, wait until next call */
404			if (ipf_sync_debug > 2)
405				printf("uiomove(data) %s %d bytes, got %d\n",
406					"insufficient data, need",
407					sh.sm_len, uio->uio_resid);
408			return EAGAIN;
409		}
410	}
411
412	/* no more data */
413	return 0;
414}
415
416
417/* ------------------------------------------------------------------------ */
418/* Function:    ipfsync_read                                                */
419/* Returns:     int    - 0 == success, else error value.                    */
420/* Parameters:  uio(O) - pointer to information about where to store data   */
421/*                                                                          */
422/* This function is called when a user program wants to read some data      */
423/* for pending state/NAT updates.  If no data is available, the caller is   */
424/* put to sleep, pending a wakeup from the "lower half" of this code.       */
425/* ------------------------------------------------------------------------ */
426int
427ipfsync_read(struct uio *uio)
428{
429	syncupdent_t *su;
430	synclogent_t *sl;
431	int err = 0;
432
433	if ((uio->uio_resid & 3) || (uio->uio_resid < 8))
434		return EINVAL;
435
436#  if (BSD >= 199306) || defined(__FreeBSD__) || defined(__osf__)
437	uio->uio_rw = UIO_READ;
438#  endif
439
440	MUTEX_ENTER(&ipsl_mutex);
441	while ((sl_tail == sl_idx) && (su_tail == su_idx)) {
442#  if SOLARIS && defined(_KERNEL)
443		if (!cv_wait_sig(&ipslwait, &ipsl_mutex)) {
444			MUTEX_EXIT(&ipsl_mutex);
445			return EINTR;
446		}
447#  else
448#   ifdef __hpux
449		{
450		lock_t *l;
451
452		l = get_sleep_lock(&sl_tail);
453		err = sleep(&sl_tail, PZERO+1);
454		if (err) {
455			MUTEX_EXIT(&ipsl_mutex);
456			return EINTR;
457		}
458		spinunlock(l);
459		}
460#   else /* __hpux */
461#    ifdef __osf__
462		err = mpsleep(&sl_tail, PSUSP|PCATCH,  "ipl sleep", 0,
463			      &ipsl_mutex, MS_LOCK_SIMPLE);
464		if (err)
465			return EINTR;
466#    else
467		MUTEX_EXIT(&ipsl_mutex);
468		err = SLEEP(&sl_tail, "ipl sleep");
469		if (err)
470			return EINTR;
471		MUTEX_ENTER(&ipsl_mutex);
472#    endif /* __osf__ */
473#   endif /* __hpux */
474#  endif /* SOLARIS */
475	}
476	MUTEX_EXIT(&ipsl_mutex);
477
478	READ_ENTER(&ipf_syncstate);
479	while ((sl_tail < sl_idx)  && (uio->uio_resid > sizeof(*sl))) {
480		sl = synclog + sl_tail++;
481		err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
482		if (err != 0)
483			break;
484	}
485
486	while ((su_tail < su_idx)  && (uio->uio_resid > sizeof(*su))) {
487		su = syncupd + su_tail;
488		su_tail++;
489		err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
490		if (err != 0)
491			break;
492		if (su->sup_hdr.sm_sl != NULL)
493			su->sup_hdr.sm_sl->sl_idx = -1;
494	}
495
496	MUTEX_ENTER(&ipf_syncadd);
497	if (su_tail == su_idx)
498		su_tail = su_idx = 0;
499	if (sl_tail == sl_idx)
500		sl_tail = sl_idx = 0;
501	MUTEX_EXIT(&ipf_syncadd);
502	RWLOCK_EXIT(&ipf_syncstate);
503	return err;
504}
505
506
507/* ------------------------------------------------------------------------ */
508/* Function:    ipfsync_state                                               */
509/* Returns:     int    - 0 == success, else error value.                    */
510/* Parameters:  sp(I)  - pointer to sync packet data header                 */
511/*              uio(I) - pointer to user data for further information       */
512/*                                                                          */
513/* Updates the state table according to information passed in the sync      */
514/* header.  As required, more data is fetched from the uio structure but    */
515/* varies depending on the contents of the sync header.  This function can  */
516/* create a new state entry or update one.  Deletion is left to the state   */
517/* structures being timed out correctly.                                    */
518/* ------------------------------------------------------------------------ */
519int
520ipfsync_state(synchdr_t *sp, void *data)
521{
522	synctcp_update_t su;
523	ipstate_t *is, sn;
524	synclist_t *sl;
525	frentry_t *fr;
526	u_int hv;
527	int err = 0;
528
529	hv = sp->sm_num & (SYNC_STATETABSZ - 1);
530
531	switch (sp->sm_cmd)
532	{
533	case SMC_CREATE :
534
535		bcopy(data, &sn, sizeof(sn));
536		KMALLOC(is, ipstate_t *);
537		if (is == NULL) {
538			err = ENOMEM;
539			break;
540		}
541
542		KMALLOC(sl, synclist_t *);
543		if (sl == NULL) {
544			err = ENOMEM;
545			KFREE(is);
546			break;
547		}
548
549		bzero((char *)is, offsetof(ipstate_t, is_die));
550		bcopy((char *)&sn.is_die, (char *)&is->is_die,
551		      sizeof(*is) - offsetof(ipstate_t, is_die));
552		ipfsync_storder(0, is);
553
554		/*
555		 * We need to find the same rule on the slave as was used on
556		 * the master to create this state entry.
557		 */
558		READ_ENTER(&ipf_mutex);
559		fr = fr_getrulen(IPL_LOGIPF, sn.is_group, sn.is_rulen);
560		if (fr != NULL) {
561			MUTEX_ENTER(&fr->fr_lock);
562			fr->fr_ref++;
563			fr->fr_statecnt++;
564			MUTEX_EXIT(&fr->fr_lock);
565		}
566		RWLOCK_EXIT(&ipf_mutex);
567
568		if (ipf_sync_debug > 4)
569			printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
570
571		is->is_rule = fr;
572		is->is_sync = sl;
573
574		sl->sl_idx = -1;
575		sl->sl_ips = is;
576		bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
577
578		WRITE_ENTER(&ipf_syncstate);
579		WRITE_ENTER(&ipf_state);
580
581		sl->sl_pnext = syncstatetab + hv;
582		sl->sl_next = syncstatetab[hv];
583		if (syncstatetab[hv] != NULL)
584			syncstatetab[hv]->sl_pnext = &sl->sl_next;
585		syncstatetab[hv] = sl;
586		MUTEX_DOWNGRADE(&ipf_syncstate);
587		fr_stinsert(is, sp->sm_rev);
588		/*
589		 * Do not initialise the interface pointers for the state
590		 * entry as the full complement of interface names may not
591		 * be present.
592		 *
593		 * Put this state entry on its timeout queue.
594		 */
595		/*fr_setstatequeue(is, sp->sm_rev);*/
596		break;
597
598	case SMC_UPDATE :
599		bcopy(data, &su, sizeof(su));
600
601		if (ipf_sync_debug > 4)
602			printf("[%d] Update age %lu state %d/%d \n",
603				sp->sm_num, su.stu_age, su.stu_state[0],
604				su.stu_state[1]);
605
606		READ_ENTER(&ipf_syncstate);
607		for (sl = syncstatetab[hv]; (sl != NULL); sl = sl->sl_next)
608			if (sl->sl_hdr.sm_num == sp->sm_num)
609				break;
610		if (sl == NULL) {
611			if (ipf_sync_debug > 1)
612				printf("[%d] State not found - can't update\n",
613					sp->sm_num);
614			RWLOCK_EXIT(&ipf_syncstate);
615			err = ENOENT;
616			break;
617		}
618
619		READ_ENTER(&ipf_state);
620
621		if (ipf_sync_debug > 6)
622			printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
623				sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
624				sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
625				sl->sl_hdr.sm_rev);
626
627		is = sl->sl_ips;
628
629		MUTEX_ENTER(&is->is_lock);
630		switch (sp->sm_p)
631		{
632		case IPPROTO_TCP :
633			/* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
634			is->is_send = su.stu_data[0].td_end;
635			is->is_maxsend = su.stu_data[0].td_maxend;
636			is->is_maxswin = su.stu_data[0].td_maxwin;
637			is->is_state[0] = su.stu_state[0];
638			is->is_dend = su.stu_data[1].td_end;
639			is->is_maxdend = su.stu_data[1].td_maxend;
640			is->is_maxdwin = su.stu_data[1].td_maxwin;
641			is->is_state[1] = su.stu_state[1];
642			break;
643		default :
644			break;
645		}
646
647		if (ipf_sync_debug > 6)
648			printf("[%d] Setting timers for state\n", sp->sm_num);
649
650		fr_setstatequeue(is, sp->sm_rev);
651
652		MUTEX_EXIT(&is->is_lock);
653		break;
654
655	default :
656		err = EINVAL;
657		break;
658	}
659
660	if (err == 0) {
661		RWLOCK_EXIT(&ipf_state);
662		RWLOCK_EXIT(&ipf_syncstate);
663	}
664
665	if (ipf_sync_debug > 6)
666		printf("[%d] Update completed with error %d\n",
667			sp->sm_num, err);
668
669	return err;
670}
671# endif /* _KERNEL */
672
673
674/* ------------------------------------------------------------------------ */
675/* Function:    ipfsync_del                                                 */
676/* Returns:     Nil                                                         */
677/* Parameters:  sl(I) - pointer to synclist object to delete                */
678/*                                                                          */
679/* Deletes an object from the synclist table and free's its memory.         */
680/* ------------------------------------------------------------------------ */
681void
682ipfsync_del(synclist_t *sl)
683{
684	WRITE_ENTER(&ipf_syncstate);
685	*sl->sl_pnext = sl->sl_next;
686	if (sl->sl_next != NULL)
687		sl->sl_next->sl_pnext = sl->sl_pnext;
688	if (sl->sl_idx != -1)
689		syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
690	RWLOCK_EXIT(&ipf_syncstate);
691	KFREE(sl);
692}
693
694
695/* ------------------------------------------------------------------------ */
696/* Function:    ipfsync_nat                                                 */
697/* Returns:     int    - 0 == success, else error value.                    */
698/* Parameters:  sp(I)  - pointer to sync packet data header                 */
699/*              uio(I) - pointer to user data for further information       */
700/*                                                                          */
701/* Updates the NAT  table according to information passed in the sync       */
702/* header.  As required, more data is fetched from the uio structure but    */
703/* varies depending on the contents of the sync header.  This function can  */
704/* create a new NAT entry or update one.  Deletion is left to the NAT       */
705/* structures being timed out correctly.                                    */
706/* ------------------------------------------------------------------------ */
707int
708ipfsync_nat(synchdr_t *sp, void *data)
709{
710	syncupdent_t su;
711	nat_t *n, *nat;
712	synclist_t *sl;
713	u_int hv = 0;
714	int err;
715
716	READ_ENTER(&ipf_syncnat);
717
718	switch (sp->sm_cmd)
719	{
720	case SMC_CREATE :
721		KMALLOC(n, nat_t *);
722		if (n == NULL) {
723			err = ENOMEM;
724			break;
725		}
726
727		KMALLOC(sl, synclist_t *);
728		if (sl == NULL) {
729			err = ENOMEM;
730			KFREE(n);
731			break;
732		}
733
734		nat = (nat_t *)data;
735		bzero((char *)n, offsetof(nat_t, nat_age));
736		bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
737		      sizeof(*n) - offsetof(nat_t, nat_age));
738		ipfsync_natorder(0, n);
739		n->nat_sync = sl;
740
741		sl->sl_idx = -1;
742		sl->sl_ipn = n;
743		sl->sl_num = ntohl(sp->sm_num);
744
745		WRITE_ENTER(&ipf_nat);
746		sl->sl_pnext = syncnattab + hv;
747		sl->sl_next = syncnattab[hv];
748		if (syncnattab[hv] != NULL)
749			syncnattab[hv]->sl_pnext = &sl->sl_next;
750		syncnattab[hv] = sl;
751		nat_insert(n, sl->sl_rev);
752		RWLOCK_EXIT(&ipf_nat);
753		break;
754
755	case SMC_UPDATE :
756		bcopy(data, &su, sizeof(su));
757
758		for (sl = syncnattab[hv]; (sl != NULL); sl = sl->sl_next)
759			if (sl->sl_hdr.sm_num == sp->sm_num)
760				break;
761		if (sl == NULL) {
762			err = ENOENT;
763			break;
764		}
765
766		READ_ENTER(&ipf_nat);
767
768		nat = sl->sl_ipn;
769
770		MUTEX_ENTER(&nat->nat_lock);
771		fr_setnatqueue(nat, sl->sl_rev);
772		MUTEX_EXIT(&nat->nat_lock);
773
774		RWLOCK_EXIT(&ipf_nat);
775
776		break;
777
778	default :
779		err = EINVAL;
780		break;
781	}
782
783	RWLOCK_EXIT(&ipf_syncnat);
784	return 0;
785}
786
787
788/* ------------------------------------------------------------------------ */
789/* Function:    ipfsync_new                                                 */
790/* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
791/*                            data structure.                               */
792/* Parameters:  tab(I) - type of synclist_t to create                       */
793/*              fin(I) - pointer to packet information                      */
794/*              ptr(I) - pointer to owning object                           */
795/*                                                                          */
796/* Creates a new sync table entry and notifies any sleepers that it's there */
797/* waiting to be processed.                                                 */
798/* ------------------------------------------------------------------------ */
799synclist_t *
800ipfsync_new(int tab, fr_info_t *fin, void *ptr)
801{
802	synclist_t *sl, *ss;
803	synclogent_t *sle;
804	u_int hv, sz;
805
806	if (sl_idx == SYNCLOG_SZ)
807		return NULL;
808	KMALLOC(sl, synclist_t *);
809	if (sl == NULL)
810		return NULL;
811
812	MUTEX_ENTER(&ipf_syncadd);
813	/*
814	 * Get a unique number for this synclist_t.  The number is only meant
815	 * to be unique for the lifetime of the structure and may be reused
816	 * later.
817	 */
818	ipf_syncnum++;
819	if (ipf_syncnum == 0) {
820		ipf_syncnum = 1;
821		ipf_syncwrap = 1;
822	}
823
824	/*
825	 * Use the synch number of the object as the hash key.  Should end up
826	 * with relatively even distribution over time.
827	 * XXX - an attacker could lunch an DoS attack, of sorts, if they are
828	 * the only one causing new table entries by only keeping open every
829	 * nth connection they make, where n is a value in the interval
830	 * [0, SYNC_STATETABSZ-1].
831	 */
832	 if (tab == SMC_STATE) {
833		hv = ipf_syncnum & (SYNC_STATETABSZ - 1);
834		while (ipf_syncwrap != 0) {
835			for (ss = syncstatetab[hv]; ss; ss = ss->sl_next)
836				if (ss->sl_hdr.sm_num == ipf_syncnum)
837					break;
838			if (ss == NULL)
839				break;
840			ipf_syncnum++;
841			hv = ipf_syncnum & (SYNC_STATETABSZ - 1);
842		}
843		sl->sl_pnext = syncstatetab + hv;
844		sl->sl_next = syncstatetab[hv];
845		syncstatetab[hv] = sl;
846	} else {
847		hv = ipf_syncnum & (SYNC_NATTABSZ - 1);
848		while (ipf_syncwrap != 0) {
849			for (ss = syncnattab[hv]; ss; ss = ss->sl_next)
850				if (ss->sl_hdr.sm_num == ipf_syncnum)
851					break;
852			if (ss == NULL)
853				break;
854			ipf_syncnum++;
855			hv = ipf_syncnum & (SYNC_STATETABSZ - 1);
856		}
857		sl->sl_pnext = syncnattab + hv;
858		sl->sl_next = syncnattab[hv];
859		syncnattab[hv] = sl;
860	}
861	sl->sl_num = ipf_syncnum;
862	MUTEX_EXIT(&ipf_syncadd);
863
864	sl->sl_magic = htonl(SYNHDRMAGIC);
865	sl->sl_v = fin->fin_v;
866	sl->sl_p = fin->fin_p;
867	sl->sl_cmd = SMC_CREATE;
868	sl->sl_idx = -1;
869	sl->sl_table = tab;
870	sl->sl_rev = fin->fin_rev;
871	if (tab == SMC_STATE) {
872		sl->sl_ips = ptr;
873		sz = sizeof(*sl->sl_ips);
874	} else {
875		sl->sl_ipn = ptr;
876		sz = sizeof(*sl->sl_ipn);
877	}
878	sl->sl_len = sz;
879
880	/*
881	 * Create the log entry to be read by a user daemon.  When it has been
882	 * finished and put on the queue, send a signal to wakeup any waiters.
883	 */
884	MUTEX_ENTER(&ipf_syncadd);
885	sle = synclog + sl_idx++;
886	bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
887	      sizeof(sle->sle_hdr));
888	sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
889	sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
890	if (ptr != NULL) {
891		bcopy((char *)ptr, (char *)&sle->sle_un, sz);
892		if (tab == SMC_STATE) {
893			ipfsync_storder(1, &sle->sle_un.sleu_ips);
894		} else if (tab == SMC_NAT) {
895			ipfsync_natorder(1, &sle->sle_un.sleu_ipn);
896		}
897	}
898	MUTEX_EXIT(&ipf_syncadd);
899
900	MUTEX_ENTER(&ipsl_mutex);
901# if SOLARIS
902#  ifdef _KERNEL
903	cv_signal(&ipslwait);
904	pollwakeup(&iplpollhead[IPL_LOGSYNC], POLLIN|POLLRDNORM);
905#  endif
906	MUTEX_EXIT(&ipsl_mutex);
907# else
908	MUTEX_EXIT(&ipsl_mutex);
909#  ifdef _KERNEL
910	WAKEUP(&sl_tail, 0);
911	POLLWAKEUP(IPL_LOGSYNC);
912#  endif
913# endif
914	return sl;
915}
916
917
918/* ------------------------------------------------------------------------ */
919/* Function:    ipfsync_update                                              */
920/* Returns:     Nil                                                         */
921/* Parameters:  tab(I) - type of synclist_t to create                       */
922/*              fin(I) - pointer to packet information                      */
923/*              sl(I)  - pointer to synchronisation object                  */
924/*                                                                          */
925/* For outbound packets, only, create an sync update record for the user    */
926/* process to read.                                                         */
927/* ------------------------------------------------------------------------ */
928void
929ipfsync_update(int tab, fr_info_t *fin, synclist_t *sl)
930{
931	synctcp_update_t *st;
932	syncupdent_t *slu;
933	ipstate_t *ips;
934	nat_t *nat;
935
936	if (fin->fin_out == 0 || sl == NULL)
937		return;
938
939	WRITE_ENTER(&ipf_syncstate);
940	MUTEX_ENTER(&ipf_syncadd);
941	if (sl->sl_idx == -1) {
942		slu = syncupd + su_idx;
943		sl->sl_idx = su_idx++;
944		bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
945		      sizeof(slu->sup_hdr));
946		slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
947		slu->sup_hdr.sm_sl = sl;
948		slu->sup_hdr.sm_cmd = SMC_UPDATE;
949		slu->sup_hdr.sm_table = tab;
950		slu->sup_hdr.sm_num = htonl(sl->sl_num);
951		slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
952		slu->sup_hdr.sm_rev = fin->fin_rev;
953# if 0
954		if (fin->fin_p == IPPROTO_TCP) {
955			st->stu_len[0] = 0;
956			st->stu_len[1] = 0;
957		}
958# endif
959	} else
960		slu = syncupd + sl->sl_idx;
961	MUTEX_EXIT(&ipf_syncadd);
962	MUTEX_DOWNGRADE(&ipf_syncstate);
963
964	/*
965	 * Only TCP has complex timeouts, others just use default timeouts.
966	 * For TCP, we only need to track the connection state and window.
967	 */
968	if (fin->fin_p == IPPROTO_TCP) {
969		st = &slu->sup_tcp;
970		if (tab == SMC_STATE) {
971			ips = sl->sl_ips;
972			st->stu_age = htonl(ips->is_die);
973			st->stu_data[0].td_end = ips->is_send;
974			st->stu_data[0].td_maxend = ips->is_maxsend;
975			st->stu_data[0].td_maxwin = ips->is_maxswin;
976			st->stu_state[0] = ips->is_state[0];
977			st->stu_data[1].td_end = ips->is_dend;
978			st->stu_data[1].td_maxend = ips->is_maxdend;
979			st->stu_data[1].td_maxwin = ips->is_maxdwin;
980			st->stu_state[1] = ips->is_state[1];
981		} else if (tab == SMC_NAT) {
982			nat = sl->sl_ipn;
983			st->stu_age = htonl(nat->nat_age);
984		}
985	}
986	RWLOCK_EXIT(&ipf_syncstate);
987
988	MUTEX_ENTER(&ipsl_mutex);
989# if SOLARIS
990#  ifdef _KERNEL
991	cv_signal(&ipslwait);
992	pollwakeup(&iplpollhead[IPL_LOGSYNC], POLLIN|POLLRDNORM);
993#  endif
994	MUTEX_EXIT(&ipsl_mutex);
995# else
996	MUTEX_EXIT(&ipsl_mutex);
997#  ifdef _KERNEL
998	WAKEUP(&sl_tail, 0);
999	POLLWAKEUP(IPL_LOGSYNC);
1000#  endif
1001# endif
1002}
1003
1004
1005/* ------------------------------------------------------------------------ */
1006/* Function:    fr_sync_ioctl                                               */
1007/* Returns:     int - 0 == success, != 0 == failure                         */
1008/* Parameters:  data(I) - pointer to ioctl data                             */
1009/*              cmd(I)  - ioctl command integer                             */
1010/*              mode(I) - file mode bits used with open                     */
1011/*                                                                          */
1012/* This function currently does not handle any ioctls and so just returns   */
1013/* EINVAL on all occasions.                                                 */
1014/* ------------------------------------------------------------------------ */
1015int
1016fr_sync_ioctl(caddr_t data, ioctlcmd_t cmd, int mode, int uid, void *ctx)
1017{
1018	return EINVAL;
1019}
1020
1021
1022int
1023ipfsync_canread(void)
1024{
1025	return !((sl_tail == sl_idx) && (su_tail == su_idx));
1026}
1027
1028
1029int
1030ipfsync_canwrite(void)
1031{
1032	return 1;
1033}
1034#endif /* IPFILTER_SYNC */
1035