ip_sync.c revision 172776
1/*	$FreeBSD: head/sys/contrib/ipfilter/netinet/ip_sync.c 172776 2007-10-18 21:52:14Z darrenr $	*/
2
3/*
4 * Copyright (C) 1995-1998 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 */
8#if defined(KERNEL) || defined(_KERNEL)
9# undef KERNEL
10# undef _KERNEL
11# define        KERNEL	1
12# define        _KERNEL	1
13#endif
14#include <sys/errno.h>
15#include <sys/types.h>
16#include <sys/param.h>
17#include <sys/file.h>
18#if !defined(_KERNEL) && !defined(__KERNEL__)
19# include <stdio.h>
20# include <stdlib.h>
21# include <string.h>
22# define _KERNEL
23# define KERNEL
24# ifdef __OpenBSD__
25struct file;
26# endif
27# include <sys/uio.h>
28# undef _KERNEL
29# undef KERNEL
30#else
31# include <sys/systm.h>
32# if !defined(__SVR4) && !defined(__svr4__)
33#  include <sys/mbuf.h>
34# endif
35#endif
36#if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
37# include <sys/proc.h>
38#endif
39#if defined(_KERNEL) && (__FreeBSD_version >= 220000)
40# include <sys/filio.h>
41# include <sys/fcntl.h>
42# if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM)
43#  include "opt_ipfilter.h"
44# endif
45#else
46# include <sys/ioctl.h>
47#endif
48#include <sys/time.h>
49#if !defined(linux)
50# include <sys/protosw.h>
51#endif
52#include <sys/socket.h>
53#if defined(__SVR4) || defined(__svr4__)
54# include <sys/filio.h>
55# include <sys/byteorder.h>
56# ifdef _KERNEL
57#  include <sys/dditypes.h>
58# endif
59# include <sys/stream.h>
60# include <sys/kmem.h>
61#endif
62
63#include <net/if.h>
64#ifdef sun
65# include <net/af.h>
66#endif
67#include <net/route.h>
68#include <netinet/in.h>
69#include <netinet/in_systm.h>
70#include <netinet/ip.h>
71#include <netinet/tcp.h>
72#if !defined(linux)
73# include <netinet/ip_var.h>
74#endif
75#if !defined(__hpux) && !defined(linux)
76# include <netinet/tcp_fsm.h>
77#endif
78#include <netinet/udp.h>
79#include <netinet/ip_icmp.h>
80#include "netinet/ip_compat.h"
81#include <netinet/tcpip.h>
82#include "netinet/ip_fil.h"
83#include "netinet/ip_nat.h"
84#include "netinet/ip_frag.h"
85#include "netinet/ip_state.h"
86#include "netinet/ip_proxy.h"
87#include "netinet/ip_sync.h"
88#ifdef  USE_INET6
89#include <netinet/icmp6.h>
90#endif
91#if (__FreeBSD_version >= 300000)
92# include <sys/malloc.h>
93# if defined(_KERNEL) && !defined(IPFILTER_LKM)
94#  include <sys/libkern.h>
95#  include <sys/systm.h>
96# endif
97#endif
98/* END OF INCLUDES */
99
100#if !defined(lint)
101static const char rcsid[] = "@(#)$Id: ip_sync.c,v 2.40.2.9 2007/06/02 21:22:28 darrenr Exp $";
102#endif
103
104#define	SYNC_STATETABSZ	256
105#define	SYNC_NATTABSZ	256
106
107#ifdef	IPFILTER_SYNC
108ipfmutex_t	ipf_syncadd, ipsl_mutex;
109ipfrwlock_t	ipf_syncstate, ipf_syncnat;
110#if SOLARIS && defined(_KERNEL)
111kcondvar_t	ipslwait;
112#endif
113synclist_t	*syncstatetab[SYNC_STATETABSZ];
114synclist_t	*syncnattab[SYNC_NATTABSZ];
115synclogent_t	synclog[SYNCLOG_SZ];
116syncupdent_t	syncupd[SYNCLOG_SZ];
117u_int		ipf_syncnum = 1;
118u_int		ipf_syncwrap = 0;
119u_int		sl_idx = 0,	/* next available sync log entry */
120		su_idx = 0,	/* next available sync update entry */
121		sl_tail = 0,	/* next sync log entry to read */
122		su_tail = 0;	/* next sync update entry to read */
123int		ipf_sync_debug = 0;
124
125
126# if !defined(sparc) && !defined(__hppa)
127void ipfsync_tcporder __P((int, struct tcpdata *));
128void ipfsync_natorder __P((int, struct nat *));
129void ipfsync_storder __P((int, struct ipstate *));
130# endif
131
132
133/* ------------------------------------------------------------------------ */
134/* Function:    ipfsync_init                                                */
135/* Returns:     int - 0 == success, -1 == failure                           */
136/* Parameters:  Nil                                                         */
137/*                                                                          */
138/* Initialise all of the locks required for the sync code and initialise    */
139/* any data structures, as required.                                        */
140/* ------------------------------------------------------------------------ */
141int ipfsync_init()
142{
143	RWLOCK_INIT(&ipf_syncstate, "add things to state sync table");
144	RWLOCK_INIT(&ipf_syncnat, "add things to nat sync table");
145	MUTEX_INIT(&ipf_syncadd, "add things to sync table");
146	MUTEX_INIT(&ipsl_mutex, "add things to sync table");
147# if SOLARIS && defined(_KERNEL)
148	cv_init(&ipslwait, "ipsl condvar", CV_DRIVER, NULL);
149# endif
150
151	bzero((char *)syncnattab, sizeof(syncnattab));
152	bzero((char *)syncstatetab, sizeof(syncstatetab));
153
154	return 0;
155}
156
157
158# if !defined(sparc) && !defined(__hppa)
159/* ------------------------------------------------------------------------ */
160/* Function:    ipfsync_tcporder                                            */
161/* Returns:     Nil                                                         */
162/* Parameters:  way(I) - direction of byte order conversion.                */
163/*              td(IO) - pointer to data to be converted.                   */
164/*                                                                          */
165/* Do byte swapping on values in the TCP state information structure that   */
166/* need to be used at both ends by the host in their native byte order.     */
167/* ------------------------------------------------------------------------ */
168void ipfsync_tcporder(way, td)
169int way;
170tcpdata_t *td;
171{
172	if (way) {
173		td->td_maxwin = htons(td->td_maxwin);
174		td->td_end = htonl(td->td_end);
175		td->td_maxend = htonl(td->td_maxend);
176	} else {
177		td->td_maxwin = ntohs(td->td_maxwin);
178		td->td_end = ntohl(td->td_end);
179		td->td_maxend = ntohl(td->td_maxend);
180	}
181}
182
183
184/* ------------------------------------------------------------------------ */
185/* Function:    ipfsync_natorder                                            */
186/* Returns:     Nil                                                         */
187/* Parameters:  way(I)  - direction of byte order conversion.               */
188/*              nat(IO) - pointer to data to be converted.                  */
189/*                                                                          */
190/* Do byte swapping on values in the NAT data structure that need to be     */
191/* used at both ends by the host in their native byte order.                */
192/* ------------------------------------------------------------------------ */
193void ipfsync_natorder(way, n)
194int way;
195nat_t *n;
196{
197	if (way) {
198		n->nat_age = htonl(n->nat_age);
199		n->nat_flags = htonl(n->nat_flags);
200		n->nat_ipsumd = htonl(n->nat_ipsumd);
201		n->nat_use = htonl(n->nat_use);
202		n->nat_dir = htonl(n->nat_dir);
203	} else {
204		n->nat_age = ntohl(n->nat_age);
205		n->nat_flags = ntohl(n->nat_flags);
206		n->nat_ipsumd = ntohl(n->nat_ipsumd);
207		n->nat_use = ntohl(n->nat_use);
208		n->nat_dir = ntohl(n->nat_dir);
209	}
210}
211
212
213/* ------------------------------------------------------------------------ */
214/* Function:    ipfsync_storder                                             */
215/* Returns:     Nil                                                         */
216/* Parameters:  way(I)  - direction of byte order conversion.               */
217/*              ips(IO) - pointer to data to be converted.                  */
218/*                                                                          */
219/* Do byte swapping on values in the IP state data structure that need to   */
220/* be used at both ends by the host in their native byte order.             */
221/* ------------------------------------------------------------------------ */
222void ipfsync_storder(way, ips)
223int way;
224ipstate_t *ips;
225{
226	ipfsync_tcporder(way, &ips->is_tcp.ts_data[0]);
227	ipfsync_tcporder(way, &ips->is_tcp.ts_data[1]);
228
229	if (way) {
230		ips->is_hv = htonl(ips->is_hv);
231		ips->is_die = htonl(ips->is_die);
232		ips->is_pass = htonl(ips->is_pass);
233		ips->is_flags = htonl(ips->is_flags);
234		ips->is_opt[0] = htonl(ips->is_opt[0]);
235		ips->is_opt[1] = htonl(ips->is_opt[1]);
236		ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
237		ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
238		ips->is_sec = htons(ips->is_sec);
239		ips->is_secmsk = htons(ips->is_secmsk);
240		ips->is_auth = htons(ips->is_auth);
241		ips->is_authmsk = htons(ips->is_authmsk);
242		ips->is_s0[0] = htonl(ips->is_s0[0]);
243		ips->is_s0[1] = htonl(ips->is_s0[1]);
244		ips->is_smsk[0] = htons(ips->is_smsk[0]);
245		ips->is_smsk[1] = htons(ips->is_smsk[1]);
246	} else {
247		ips->is_hv = ntohl(ips->is_hv);
248		ips->is_die = ntohl(ips->is_die);
249		ips->is_pass = ntohl(ips->is_pass);
250		ips->is_flags = ntohl(ips->is_flags);
251		ips->is_opt[0] = ntohl(ips->is_opt[0]);
252		ips->is_opt[1] = ntohl(ips->is_opt[1]);
253		ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
254		ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
255		ips->is_sec = ntohs(ips->is_sec);
256		ips->is_secmsk = ntohs(ips->is_secmsk);
257		ips->is_auth = ntohs(ips->is_auth);
258		ips->is_authmsk = ntohs(ips->is_authmsk);
259		ips->is_s0[0] = ntohl(ips->is_s0[0]);
260		ips->is_s0[1] = ntohl(ips->is_s0[1]);
261		ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
262		ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
263	}
264}
265# else /* !defined(sparc) && !defined(__hppa) */
266#  define	ipfsync_tcporder(x,y)
267#  define	ipfsync_natorder(x,y)
268#  define	ipfsync_storder(x,y)
269# endif /* !defined(sparc) && !defined(__hppa) */
270
271/* enable this for debugging */
272
273# ifdef _KERNEL
274/* ------------------------------------------------------------------------ */
275/* Function:    ipfsync_write                                               */
276/* Returns:     int    - 0 == success, else error value.                    */
277/* Parameters:  uio(I) - pointer to information about data to write         */
278/*                                                                          */
279/* Moves data from user space into the kernel and uses it for updating data */
280/* structures in the state/NAT tables.                                      */
281/* ------------------------------------------------------------------------ */
282int ipfsync_write(uio)
283struct uio *uio;
284{
285	synchdr_t sh;
286
287	/*
288	 * THIS MUST BE SUFFICIENT LARGE TO STORE
289	 * ANY POSSIBLE DATA TYPE
290	 */
291	char data[2048];
292
293	int err = 0;
294
295#  if (BSD >= 199306) || defined(__FreeBSD__) || defined(__osf__)
296	uio->uio_rw = UIO_WRITE;
297#  endif
298
299	/* Try to get bytes */
300	while (uio->uio_resid > 0) {
301
302		if (uio->uio_resid >= sizeof(sh)) {
303
304			err = UIOMOVE(&sh, sizeof(sh), UIO_WRITE, uio);
305
306			if (err) {
307				if (ipf_sync_debug > 2)
308					printf("uiomove(header) failed: %d\n",
309						err);
310				return err;
311			}
312
313			/* convert to host order */
314			sh.sm_magic = ntohl(sh.sm_magic);
315			sh.sm_len = ntohl(sh.sm_len);
316			sh.sm_num = ntohl(sh.sm_num);
317
318			if (ipf_sync_debug > 8)
319				printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
320					sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
321					sh.sm_table, sh.sm_rev, sh.sm_len,
322					sh.sm_magic);
323
324			if (sh.sm_magic != SYNHDRMAGIC) {
325				if (ipf_sync_debug > 2)
326					printf("uiomove(header) invalud %s\n",
327						"magic");
328				return EINVAL;
329			}
330
331			if (sh.sm_v != 4 && sh.sm_v != 6) {
332				if (ipf_sync_debug > 2)
333					printf("uiomove(header) invalid %s\n",
334						"protocol");
335				return EINVAL;
336			}
337
338			if (sh.sm_cmd > SMC_MAXCMD) {
339				if (ipf_sync_debug > 2)
340					printf("uiomove(header) invalid %s\n",
341						"command");
342				return EINVAL;
343			}
344
345
346			if (sh.sm_table > SMC_MAXTBL) {
347				if (ipf_sync_debug > 2)
348					printf("uiomove(header) invalid %s\n",
349						"table");
350				return EINVAL;
351			}
352
353		} else {
354			/* unsufficient data, wait until next call */
355			if (ipf_sync_debug > 2)
356				printf("uiomove(header) insufficient data");
357			return EAGAIN;
358	 	}
359
360
361		/*
362		 * We have a header, so try to read the amount of data
363		 * needed for the request
364		 */
365
366		/* not supported */
367		if (sh.sm_len == 0) {
368			if (ipf_sync_debug > 2)
369				printf("uiomove(data zero length %s\n",
370					"not supported");
371			return EINVAL;
372		}
373
374		if (uio->uio_resid >= sh.sm_len) {
375
376			err = UIOMOVE(data, sh.sm_len, UIO_WRITE, uio);
377
378			if (err) {
379				if (ipf_sync_debug > 2)
380					printf("uiomove(data) failed: %d\n",
381						err);
382				return err;
383			}
384
385			if (ipf_sync_debug > 7)
386				printf("uiomove(data) %d bytes read\n",
387					sh.sm_len);
388
389			if (sh.sm_table == SMC_STATE)
390				err = ipfsync_state(&sh, data);
391			else if (sh.sm_table == SMC_NAT)
392				err = ipfsync_nat(&sh, data);
393			if (ipf_sync_debug > 7)
394				printf("[%d] Finished with error %d\n",
395					sh.sm_num, err);
396
397		} else {
398			/* insufficient data, wait until next call */
399			if (ipf_sync_debug > 2)
400				printf("uiomove(data) %s %d bytes, got %d\n",
401					"insufficient data, need",
402					sh.sm_len, uio->uio_resid);
403			return EAGAIN;
404		}
405	}
406
407	/* no more data */
408	return 0;
409}
410
411
412/* ------------------------------------------------------------------------ */
413/* Function:    ipfsync_read                                                */
414/* Returns:     int    - 0 == success, else error value.                    */
415/* Parameters:  uio(O) - pointer to information about where to store data   */
416/*                                                                          */
417/* This function is called when a user program wants to read some data      */
418/* for pending state/NAT updates.  If no data is available, the caller is   */
419/* put to sleep, pending a wakeup from the "lower half" of this code.       */
420/* ------------------------------------------------------------------------ */
421int ipfsync_read(uio)
422struct uio *uio;
423{
424	syncupdent_t *su;
425	synclogent_t *sl;
426	int err = 0;
427
428	if ((uio->uio_resid & 3) || (uio->uio_resid < 8))
429		return EINVAL;
430
431#  if (BSD >= 199306) || defined(__FreeBSD__) || defined(__osf__)
432	uio->uio_rw = UIO_READ;
433#  endif
434
435	MUTEX_ENTER(&ipsl_mutex);
436	while ((sl_tail == sl_idx) && (su_tail == su_idx)) {
437#  if SOLARIS && defined(_KERNEL)
438		if (!cv_wait_sig(&ipslwait, &ipsl_mutex)) {
439			MUTEX_EXIT(&ipsl_mutex);
440			return EINTR;
441		}
442#  else
443#   ifdef __hpux
444		{
445		lock_t *l;
446
447		l = get_sleep_lock(&sl_tail);
448		err = sleep(&sl_tail, PZERO+1);
449		if (err) {
450			MUTEX_EXIT(&ipsl_mutex);
451			return EINTR;
452		}
453		spinunlock(l);
454		}
455#   else /* __hpux */
456#    ifdef __osf__
457		err = mpsleep(&sl_tail, PSUSP|PCATCH,  "ipl sleep", 0,
458			      &ipsl_mutex, MS_LOCK_SIMPLE);
459		if (err)
460			return EINTR;
461#    else
462		MUTEX_EXIT(&ipsl_mutex);
463		err = SLEEP(&sl_tail, "ipl sleep");
464		if (err)
465			return EINTR;
466		MUTEX_ENTER(&ipsl_mutex);
467#    endif /* __osf__ */
468#   endif /* __hpux */
469#  endif /* SOLARIS */
470	}
471	MUTEX_EXIT(&ipsl_mutex);
472
473	READ_ENTER(&ipf_syncstate);
474	while ((sl_tail < sl_idx)  && (uio->uio_resid > sizeof(*sl))) {
475		sl = synclog + sl_tail++;
476		err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
477		if (err != 0)
478			break;
479	}
480
481	while ((su_tail < su_idx)  && (uio->uio_resid > sizeof(*su))) {
482		su = syncupd + su_tail;
483		su_tail++;
484		err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
485		if (err != 0)
486			break;
487		if (su->sup_hdr.sm_sl != NULL)
488			su->sup_hdr.sm_sl->sl_idx = -1;
489	}
490
491	MUTEX_ENTER(&ipf_syncadd);
492	if (su_tail == su_idx)
493		su_tail = su_idx = 0;
494	if (sl_tail == sl_idx)
495		sl_tail = sl_idx = 0;
496	MUTEX_EXIT(&ipf_syncadd);
497	RWLOCK_EXIT(&ipf_syncstate);
498	return err;
499}
500
501
502/* ------------------------------------------------------------------------ */
503/* Function:    ipfsync_state                                               */
504/* Returns:     int    - 0 == success, else error value.                    */
505/* Parameters:  sp(I)  - pointer to sync packet data header                 */
506/*              uio(I) - pointer to user data for further information       */
507/*                                                                          */
508/* Updates the state table according to information passed in the sync      */
509/* header.  As required, more data is fetched from the uio structure but    */
510/* varies depending on the contents of the sync header.  This function can  */
511/* create a new state entry or update one.  Deletion is left to the state   */
512/* structures being timed out correctly.                                    */
513/* ------------------------------------------------------------------------ */
514int ipfsync_state(sp, data)
515synchdr_t *sp;
516void *data;
517{
518	synctcp_update_t su;
519	ipstate_t *is, sn;
520	synclist_t *sl;
521	frentry_t *fr;
522	u_int hv;
523	int err = 0;
524
525	hv = sp->sm_num & (SYNC_STATETABSZ - 1);
526
527	switch (sp->sm_cmd)
528	{
529	case SMC_CREATE :
530
531		bcopy(data, &sn, sizeof(sn));
532		KMALLOC(is, ipstate_t *);
533		if (is == NULL) {
534			err = ENOMEM;
535			break;
536		}
537
538		KMALLOC(sl, synclist_t *);
539		if (sl == NULL) {
540			err = ENOMEM;
541			KFREE(is);
542			break;
543		}
544
545		bzero((char *)is, offsetof(ipstate_t, is_die));
546		bcopy((char *)&sn.is_die, (char *)&is->is_die,
547		      sizeof(*is) - offsetof(ipstate_t, is_die));
548		ipfsync_storder(0, is);
549
550		/*
551		 * We need to find the same rule on the slave as was used on
552		 * the master to create this state entry.
553		 */
554		READ_ENTER(&ipf_mutex);
555		fr = fr_getrulen(IPL_LOGIPF, sn.is_group, sn.is_rulen);
556		if (fr != NULL) {
557			MUTEX_ENTER(&fr->fr_lock);
558			fr->fr_ref++;
559			fr->fr_statecnt++;
560			MUTEX_EXIT(&fr->fr_lock);
561		}
562		RWLOCK_EXIT(&ipf_mutex);
563
564		if (ipf_sync_debug > 4)
565			printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
566
567		is->is_rule = fr;
568		is->is_sync = sl;
569
570		sl->sl_idx = -1;
571		sl->sl_ips = is;
572		bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
573
574		WRITE_ENTER(&ipf_syncstate);
575		WRITE_ENTER(&ipf_state);
576
577		sl->sl_pnext = syncstatetab + hv;
578		sl->sl_next = syncstatetab[hv];
579		if (syncstatetab[hv] != NULL)
580			syncstatetab[hv]->sl_pnext = &sl->sl_next;
581		syncstatetab[hv] = sl;
582		MUTEX_DOWNGRADE(&ipf_syncstate);
583		fr_stinsert(is, sp->sm_rev);
584		/*
585		 * Do not initialise the interface pointers for the state
586		 * entry as the full complement of interface names may not
587		 * be present.
588		 *
589		 * Put this state entry on its timeout queue.
590		 */
591		/*fr_setstatequeue(is, sp->sm_rev);*/
592		break;
593
594	case SMC_UPDATE :
595		bcopy(data, &su, sizeof(su));
596
597		if (ipf_sync_debug > 4)
598			printf("[%d] Update age %lu state %d/%d \n",
599				sp->sm_num, su.stu_age, su.stu_state[0],
600				su.stu_state[1]);
601
602		READ_ENTER(&ipf_syncstate);
603		for (sl = syncstatetab[hv]; (sl != NULL); sl = sl->sl_next)
604			if (sl->sl_hdr.sm_num == sp->sm_num)
605				break;
606		if (sl == NULL) {
607			if (ipf_sync_debug > 1)
608				printf("[%d] State not found - can't update\n",
609					sp->sm_num);
610			RWLOCK_EXIT(&ipf_syncstate);
611			err = ENOENT;
612			break;
613		}
614
615		READ_ENTER(&ipf_state);
616
617		if (ipf_sync_debug > 6)
618			printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
619				sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
620				sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
621				sl->sl_hdr.sm_rev);
622
623		is = sl->sl_ips;
624
625		MUTEX_ENTER(&is->is_lock);
626		switch (sp->sm_p)
627		{
628		case IPPROTO_TCP :
629			/* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
630			is->is_send = su.stu_data[0].td_end;
631			is->is_maxsend = su.stu_data[0].td_maxend;
632			is->is_maxswin = su.stu_data[0].td_maxwin;
633			is->is_state[0] = su.stu_state[0];
634			is->is_dend = su.stu_data[1].td_end;
635			is->is_maxdend = su.stu_data[1].td_maxend;
636			is->is_maxdwin = su.stu_data[1].td_maxwin;
637			is->is_state[1] = su.stu_state[1];
638			break;
639		default :
640			break;
641		}
642
643		if (ipf_sync_debug > 6)
644			printf("[%d] Setting timers for state\n", sp->sm_num);
645
646		fr_setstatequeue(is, sp->sm_rev);
647
648		MUTEX_EXIT(&is->is_lock);
649		break;
650
651	default :
652		err = EINVAL;
653		break;
654	}
655
656	if (err == 0) {
657		RWLOCK_EXIT(&ipf_state);
658		RWLOCK_EXIT(&ipf_syncstate);
659	}
660
661	if (ipf_sync_debug > 6)
662		printf("[%d] Update completed with error %d\n",
663			sp->sm_num, err);
664
665	return err;
666}
667# endif /* _KERNEL */
668
669
670/* ------------------------------------------------------------------------ */
671/* Function:    ipfsync_del                                                 */
672/* Returns:     Nil                                                         */
673/* Parameters:  sl(I) - pointer to synclist object to delete                */
674/*                                                                          */
675/* Deletes an object from the synclist table and free's its memory.         */
676/* ------------------------------------------------------------------------ */
677void ipfsync_del(sl)
678synclist_t *sl;
679{
680	WRITE_ENTER(&ipf_syncstate);
681	*sl->sl_pnext = sl->sl_next;
682	if (sl->sl_next != NULL)
683		sl->sl_next->sl_pnext = sl->sl_pnext;
684	if (sl->sl_idx != -1)
685		syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
686	RWLOCK_EXIT(&ipf_syncstate);
687	KFREE(sl);
688}
689
690
691/* ------------------------------------------------------------------------ */
692/* Function:    ipfsync_nat                                                 */
693/* Returns:     int    - 0 == success, else error value.                    */
694/* Parameters:  sp(I)  - pointer to sync packet data header                 */
695/*              uio(I) - pointer to user data for further information       */
696/*                                                                          */
697/* Updates the NAT  table according to information passed in the sync       */
698/* header.  As required, more data is fetched from the uio structure but    */
699/* varies depending on the contents of the sync header.  This function can  */
700/* create a new NAT entry or update one.  Deletion is left to the NAT       */
701/* structures being timed out correctly.                                    */
702/* ------------------------------------------------------------------------ */
703int ipfsync_nat(sp, data)
704synchdr_t *sp;
705void *data;
706{
707	syncupdent_t su;
708	nat_t *n, *nat;
709	synclist_t *sl;
710	u_int hv = 0;
711	int err;
712
713	READ_ENTER(&ipf_syncstate);
714
715	switch (sp->sm_cmd)
716	{
717	case SMC_CREATE :
718		KMALLOC(n, nat_t *);
719		if (n == NULL) {
720			err = ENOMEM;
721			break;
722		}
723
724		KMALLOC(sl, synclist_t *);
725		if (sl == NULL) {
726			err = ENOMEM;
727			KFREE(n);
728			break;
729		}
730
731		nat = (nat_t *)data;
732		bzero((char *)n, offsetof(nat_t, nat_age));
733		bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
734		      sizeof(*n) - offsetof(nat_t, nat_age));
735		ipfsync_natorder(0, n);
736		n->nat_sync = sl;
737
738		sl->sl_idx = -1;
739		sl->sl_ipn = n;
740		sl->sl_num = ntohl(sp->sm_num);
741
742		WRITE_ENTER(&ipf_nat);
743		sl->sl_pnext = syncstatetab + hv;
744		sl->sl_next = syncstatetab[hv];
745		if (syncstatetab[hv] != NULL)
746			syncstatetab[hv]->sl_pnext = &sl->sl_next;
747		syncstatetab[hv] = sl;
748		nat_insert(n, sl->sl_rev);
749		RWLOCK_EXIT(&ipf_nat);
750		break;
751
752	case SMC_UPDATE :
753		bcopy(data, &su, sizeof(su));
754
755		READ_ENTER(&ipf_syncstate);
756		for (sl = syncstatetab[hv]; (sl != NULL); sl = sl->sl_next)
757			if (sl->sl_hdr.sm_num == sp->sm_num)
758				break;
759		if (sl == NULL) {
760			err = ENOENT;
761			break;
762		}
763
764		READ_ENTER(&ipf_nat);
765
766		nat = sl->sl_ipn;
767
768		MUTEX_ENTER(&nat->nat_lock);
769		fr_setnatqueue(nat, sl->sl_rev);
770		MUTEX_EXIT(&nat->nat_lock);
771
772		RWLOCK_EXIT(&ipf_nat);
773
774		break;
775
776	default :
777		err = EINVAL;
778		break;
779	}
780
781	RWLOCK_EXIT(&ipf_syncstate);
782	return 0;
783}
784
785
786/* ------------------------------------------------------------------------ */
787/* Function:    ipfsync_new                                                 */
788/* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
789/*                            data structure.                               */
790/* Parameters:  tab(I) - type of synclist_t to create                       */
791/*              fin(I) - pointer to packet information                      */
792/*              ptr(I) - pointer to owning object                           */
793/*                                                                          */
794/* Creates a new sync table entry and notifies any sleepers that it's there */
795/* waiting to be processed.                                                 */
796/* ------------------------------------------------------------------------ */
797synclist_t *ipfsync_new(tab, fin, ptr)
798int tab;
799fr_info_t *fin;
800void *ptr;
801{
802	synclist_t *sl, *ss;
803	synclogent_t *sle;
804	u_int hv, sz;
805
806	if (sl_idx == SYNCLOG_SZ)
807		return NULL;
808	KMALLOC(sl, synclist_t *);
809	if (sl == NULL)
810		return NULL;
811
812	MUTEX_ENTER(&ipf_syncadd);
813	/*
814	 * Get a unique number for this synclist_t.  The number is only meant
815	 * to be unique for the lifetime of the structure and may be reused
816	 * later.
817	 */
818	ipf_syncnum++;
819	if (ipf_syncnum == 0) {
820		ipf_syncnum = 1;
821		ipf_syncwrap = 1;
822	}
823
824	hv = ipf_syncnum & (SYNC_STATETABSZ - 1);
825	while (ipf_syncwrap != 0) {
826		for (ss = syncstatetab[hv]; ss; ss = ss->sl_next)
827			if (ss->sl_hdr.sm_num == ipf_syncnum)
828				break;
829		if (ss == NULL)
830			break;
831		ipf_syncnum++;
832		hv = ipf_syncnum & (SYNC_STATETABSZ - 1);
833	}
834	/*
835	 * Use the synch number of the object as the hash key.  Should end up
836	 * with relatively even distribution over time.
837	 * XXX - an attacker could lunch an DoS attack, of sorts, if they are
838	 * the only one causing new table entries by only keeping open every
839	 * nth connection they make, where n is a value in the interval
840	 * [0, SYNC_STATETABSZ-1].
841	 */
842	sl->sl_pnext = syncstatetab + hv;
843	sl->sl_next = syncstatetab[hv];
844	syncstatetab[hv] = sl;
845	sl->sl_num = ipf_syncnum;
846	MUTEX_EXIT(&ipf_syncadd);
847
848	sl->sl_magic = htonl(SYNHDRMAGIC);
849	sl->sl_v = fin->fin_v;
850	sl->sl_p = fin->fin_p;
851	sl->sl_cmd = SMC_CREATE;
852	sl->sl_idx = -1;
853	sl->sl_table = tab;
854	sl->sl_rev = fin->fin_rev;
855	if (tab == SMC_STATE) {
856		sl->sl_ips = ptr;
857		sz = sizeof(*sl->sl_ips);
858	} else if (tab == SMC_NAT) {
859		sl->sl_ipn = ptr;
860		sz = sizeof(*sl->sl_ipn);
861	} else {
862		ptr = NULL;
863		sz = 0;
864	}
865	sl->sl_len = sz;
866
867	/*
868	 * Create the log entry to be read by a user daemon.  When it has been
869	 * finished and put on the queue, send a signal to wakeup any waiters.
870	 */
871	MUTEX_ENTER(&ipf_syncadd);
872	sle = synclog + sl_idx++;
873	bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
874	      sizeof(sle->sle_hdr));
875	sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
876	sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
877	if (ptr != NULL) {
878		bcopy((char *)ptr, (char *)&sle->sle_un, sz);
879		if (tab == SMC_STATE) {
880			ipfsync_storder(1, &sle->sle_un.sleu_ips);
881		} else if (tab == SMC_NAT) {
882			ipfsync_natorder(1, &sle->sle_un.sleu_ipn);
883		}
884	}
885	MUTEX_EXIT(&ipf_syncadd);
886
887	MUTEX_ENTER(&ipsl_mutex);
888# if SOLARIS
889#  ifdef _KERNEL
890	cv_signal(&ipslwait);
891#  endif
892	MUTEX_EXIT(&ipsl_mutex);
893# else
894	MUTEX_EXIT(&ipsl_mutex);
895#  ifdef _KERNEL
896	wakeup(&sl_tail);
897#  endif
898# endif
899	return sl;
900}
901
902
903/* ------------------------------------------------------------------------ */
904/* Function:    ipfsync_update                                              */
905/* Returns:     Nil                                                         */
906/* Parameters:  tab(I) - type of synclist_t to create                       */
907/*              fin(I) - pointer to packet information                      */
908/*              sl(I)  - pointer to synchronisation object                  */
909/*                                                                          */
910/* For outbound packets, only, create an sync update record for the user    */
911/* process to read.                                                         */
912/* ------------------------------------------------------------------------ */
913void ipfsync_update(tab, fin, sl)
914int tab;
915fr_info_t *fin;
916synclist_t *sl;
917{
918	synctcp_update_t *st;
919	syncupdent_t *slu;
920	ipstate_t *ips;
921	nat_t *nat;
922
923	if (fin->fin_out == 0 || sl == NULL)
924		return;
925
926	WRITE_ENTER(&ipf_syncstate);
927	MUTEX_ENTER(&ipf_syncadd);
928	if (sl->sl_idx == -1) {
929		slu = syncupd + su_idx;
930		sl->sl_idx = su_idx++;
931		bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
932		      sizeof(slu->sup_hdr));
933		slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
934		slu->sup_hdr.sm_sl = sl;
935		slu->sup_hdr.sm_cmd = SMC_UPDATE;
936		slu->sup_hdr.sm_table = tab;
937		slu->sup_hdr.sm_num = htonl(sl->sl_num);
938		slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
939		slu->sup_hdr.sm_rev = fin->fin_rev;
940# if 0
941		if (fin->fin_p == IPPROTO_TCP) {
942			st->stu_len[0] = 0;
943			st->stu_len[1] = 0;
944		}
945# endif
946	} else
947		slu = syncupd + sl->sl_idx;
948	MUTEX_EXIT(&ipf_syncadd);
949	MUTEX_DOWNGRADE(&ipf_syncstate);
950
951	/*
952	 * Only TCP has complex timeouts, others just use default timeouts.
953	 * For TCP, we only need to track the connection state and window.
954	 */
955	if (fin->fin_p == IPPROTO_TCP) {
956		st = &slu->sup_tcp;
957		if (tab == SMC_STATE) {
958			ips = sl->sl_ips;
959			st->stu_age = htonl(ips->is_die);
960			st->stu_data[0].td_end = ips->is_send;
961			st->stu_data[0].td_maxend = ips->is_maxsend;
962			st->stu_data[0].td_maxwin = ips->is_maxswin;
963			st->stu_state[0] = ips->is_state[0];
964			st->stu_data[1].td_end = ips->is_dend;
965			st->stu_data[1].td_maxend = ips->is_maxdend;
966			st->stu_data[1].td_maxwin = ips->is_maxdwin;
967			st->stu_state[1] = ips->is_state[1];
968		} else if (tab == SMC_NAT) {
969			nat = sl->sl_ipn;
970			st->stu_age = htonl(nat->nat_age);
971		}
972	}
973	RWLOCK_EXIT(&ipf_syncstate);
974
975	MUTEX_ENTER(&ipsl_mutex);
976# if SOLARIS
977#  ifdef _KERNEL
978	cv_signal(&ipslwait);
979#  endif
980	MUTEX_EXIT(&ipsl_mutex);
981# else
982	MUTEX_EXIT(&ipsl_mutex);
983#  ifdef _KERNEL
984	wakeup(&sl_tail);
985#  endif
986# endif
987}
988
989
990/* ------------------------------------------------------------------------ */
991/* Function:    fr_sync_ioctl                                               */
992/* Returns:     int - 0 == success, != 0 == failure                         */
993/* Parameters:  data(I) - pointer to ioctl data                             */
994/*              cmd(I)  - ioctl command integer                             */
995/*              mode(I) - file mode bits used with open                     */
996/*                                                                          */
997/* This function currently does not handle any ioctls and so just returns   */
998/* EINVAL on all occasions.                                                 */
999/* ------------------------------------------------------------------------ */
1000int fr_sync_ioctl(data, cmd, mode, uid, ctx)
1001caddr_t data;
1002ioctlcmd_t cmd;
1003int mode, uid;
1004void *ctx;
1005{
1006	return EINVAL;
1007}
1008
1009
1010int ipfsync_canread()
1011{
1012	return !((sl_tail == sl_idx) && (su_tail == su_idx));
1013}
1014
1015
1016int ipfsync_canwrite()
1017{
1018	return 1;
1019}
1020#endif /* IPFILTER_SYNC */
1021