1/*
2 * Copyright (C) 2004-2009  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2002  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id: socket.h,v 1.85.58.3 2009/01/29 22:40:35 jinmei Exp $ */
19
20#ifndef ISC_SOCKET_H
21#define ISC_SOCKET_H 1
22
23/*****
24 ***** Module Info
25 *****/
26
27/*! \file isc/socket.h
28 * \brief Provides TCP and UDP sockets for network I/O.  The sockets are event
29 * sources in the task system.
30 *
31 * When I/O completes, a completion event for the socket is posted to the
32 * event queue of the task which requested the I/O.
33 *
34 * \li MP:
35 *	The module ensures appropriate synchronization of data structures it
36 *	creates and manipulates.
37 *	Clients of this module must not be holding a socket's task's lock when
38 *	making a call that affects that socket.  Failure to follow this rule
39 *	can result in deadlock.
40 *	The caller must ensure that isc_socketmgr_destroy() is called only
41 *	once for a given manager.
42 *
43 * \li Reliability:
44 *	No anticipated impact.
45 *
46 * \li Resources:
47 *	TBS
48 *
49 * \li Security:
50 *	No anticipated impact.
51 *
52 * \li Standards:
53 *	None.
54 */
55
56/***
57 *** Imports
58 ***/
59
60#include <isc/lang.h>
61#include <isc/types.h>
62#include <isc/event.h>
63#include <isc/eventclass.h>
64#include <isc/time.h>
65#include <isc/region.h>
66#include <isc/sockaddr.h>
67#include <isc/xml.h>
68
69ISC_LANG_BEGINDECLS
70
71/***
72 *** Constants
73 ***/
74
75/*%
76 * Maximum number of buffers in a scatter/gather read/write.  The operating
77 * system in use must support at least this number (plus one on some.)
78 */
79#define ISC_SOCKET_MAXSCATTERGATHER	8
80
81/*%
82 * In isc_socket_bind() set socket option SO_REUSEADDR prior to calling
83 * bind() if a non zero port is specified (AF_INET and AF_INET6).
84 */
85#define ISC_SOCKET_REUSEADDRESS		0x01U
86
87/*%
88 * Statistics counters.  Used as isc_statscounter_t values.
89 */
90enum {
91	isc_sockstatscounter_udp4open = 0,
92	isc_sockstatscounter_udp6open = 1,
93	isc_sockstatscounter_tcp4open = 2,
94	isc_sockstatscounter_tcp6open = 3,
95	isc_sockstatscounter_unixopen = 4,
96
97	isc_sockstatscounter_udp4openfail = 5,
98	isc_sockstatscounter_udp6openfail = 6,
99	isc_sockstatscounter_tcp4openfail = 7,
100	isc_sockstatscounter_tcp6openfail = 8,
101	isc_sockstatscounter_unixopenfail = 9,
102
103	isc_sockstatscounter_udp4close = 10,
104	isc_sockstatscounter_udp6close = 11,
105	isc_sockstatscounter_tcp4close = 12,
106	isc_sockstatscounter_tcp6close = 13,
107	isc_sockstatscounter_unixclose = 14,
108	isc_sockstatscounter_fdwatchclose = 15,
109
110	isc_sockstatscounter_udp4bindfail = 16,
111	isc_sockstatscounter_udp6bindfail = 17,
112	isc_sockstatscounter_tcp4bindfail = 18,
113	isc_sockstatscounter_tcp6bindfail = 19,
114	isc_sockstatscounter_unixbindfail = 20,
115	isc_sockstatscounter_fdwatchbindfail = 21,
116
117	isc_sockstatscounter_udp4connect = 22,
118	isc_sockstatscounter_udp6connect = 23,
119	isc_sockstatscounter_tcp4connect = 24,
120	isc_sockstatscounter_tcp6connect = 25,
121	isc_sockstatscounter_unixconnect = 26,
122	isc_sockstatscounter_fdwatchconnect = 27,
123
124	isc_sockstatscounter_udp4connectfail = 28,
125	isc_sockstatscounter_udp6connectfail = 29,
126	isc_sockstatscounter_tcp4connectfail = 30,
127	isc_sockstatscounter_tcp6connectfail = 31,
128	isc_sockstatscounter_unixconnectfail = 32,
129	isc_sockstatscounter_fdwatchconnectfail = 33,
130
131	isc_sockstatscounter_tcp4accept = 34,
132	isc_sockstatscounter_tcp6accept = 35,
133	isc_sockstatscounter_unixaccept = 36,
134
135	isc_sockstatscounter_tcp4acceptfail = 37,
136	isc_sockstatscounter_tcp6acceptfail = 38,
137	isc_sockstatscounter_unixacceptfail = 39,
138
139	isc_sockstatscounter_udp4sendfail = 40,
140	isc_sockstatscounter_udp6sendfail = 41,
141	isc_sockstatscounter_tcp4sendfail = 42,
142	isc_sockstatscounter_tcp6sendfail = 43,
143	isc_sockstatscounter_unixsendfail = 44,
144	isc_sockstatscounter_fdwatchsendfail = 45,
145
146	isc_sockstatscounter_udp4recvfail = 46,
147	isc_sockstatscounter_udp6recvfail = 47,
148	isc_sockstatscounter_tcp4recvfail = 48,
149	isc_sockstatscounter_tcp6recvfail = 49,
150	isc_sockstatscounter_unixrecvfail = 50,
151	isc_sockstatscounter_fdwatchrecvfail = 51,
152
153	isc_sockstatscounter_max = 52
154};
155
156/***
157 *** Types
158 ***/
159
160struct isc_socketevent {
161	ISC_EVENT_COMMON(isc_socketevent_t);
162	isc_result_t		result;		/*%< OK, EOF, whatever else */
163	unsigned int		minimum;	/*%< minimum i/o for event */
164	unsigned int		n;		/*%< bytes read or written */
165	unsigned int		offset;		/*%< offset into buffer list */
166	isc_region_t		region;		/*%< for single-buffer i/o */
167	isc_bufferlist_t	bufferlist;	/*%< list of buffers */
168	isc_sockaddr_t		address;	/*%< source address */
169	isc_time_t		timestamp;	/*%< timestamp of packet recv */
170	struct in6_pktinfo	pktinfo;	/*%< ipv6 pktinfo */
171	isc_uint32_t		attributes;	/*%< see below */
172	isc_eventdestructor_t   destroy;	/*%< original destructor */
173};
174
175typedef struct isc_socket_newconnev isc_socket_newconnev_t;
176struct isc_socket_newconnev {
177	ISC_EVENT_COMMON(isc_socket_newconnev_t);
178	isc_socket_t *		newsocket;
179	isc_result_t		result;		/*%< OK, EOF, whatever else */
180	isc_sockaddr_t		address;	/*%< source address */
181};
182
183typedef struct isc_socket_connev isc_socket_connev_t;
184struct isc_socket_connev {
185	ISC_EVENT_COMMON(isc_socket_connev_t);
186	isc_result_t		result;		/*%< OK, EOF, whatever else */
187};
188
189/*@{*/
190/*!
191 * _ATTACHED:	Internal use only.
192 * _TRUNC:	Packet was truncated on receive.
193 * _CTRUNC:	Packet control information was truncated.  This can
194 *		indicate that the packet is not complete, even though
195 *		all the data is valid.
196 * _TIMESTAMP:	The timestamp member is valid.
197 * _PKTINFO:	The pktinfo member is valid.
198 * _MULTICAST:	The UDP packet was received via a multicast transmission.
199 */
200#define ISC_SOCKEVENTATTR_ATTACHED		0x80000000U /* internal */
201#define ISC_SOCKEVENTATTR_TRUNC			0x00800000U /* public */
202#define ISC_SOCKEVENTATTR_CTRUNC		0x00400000U /* public */
203#define ISC_SOCKEVENTATTR_TIMESTAMP		0x00200000U /* public */
204#define ISC_SOCKEVENTATTR_PKTINFO		0x00100000U /* public */
205#define ISC_SOCKEVENTATTR_MULTICAST		0x00080000U /* public */
206/*@}*/
207
208#define ISC_SOCKEVENT_ANYEVENT  (0)
209#define ISC_SOCKEVENT_RECVDONE	(ISC_EVENTCLASS_SOCKET + 1)
210#define ISC_SOCKEVENT_SENDDONE	(ISC_EVENTCLASS_SOCKET + 2)
211#define ISC_SOCKEVENT_NEWCONN	(ISC_EVENTCLASS_SOCKET + 3)
212#define ISC_SOCKEVENT_CONNECT	(ISC_EVENTCLASS_SOCKET + 4)
213
214/*
215 * Internal events.
216 */
217#define ISC_SOCKEVENT_INTR	(ISC_EVENTCLASS_SOCKET + 256)
218#define ISC_SOCKEVENT_INTW	(ISC_EVENTCLASS_SOCKET + 257)
219
220typedef enum {
221	isc_sockettype_udp = 1,
222	isc_sockettype_tcp = 2,
223	isc_sockettype_unix = 3,
224	isc_sockettype_fdwatch = 4
225} isc_sockettype_t;
226
227/*@{*/
228/*!
229 * How a socket should be shutdown in isc_socket_shutdown() calls.
230 */
231#define ISC_SOCKSHUT_RECV	0x00000001	/*%< close read side */
232#define ISC_SOCKSHUT_SEND	0x00000002	/*%< close write side */
233#define ISC_SOCKSHUT_ALL	0x00000003	/*%< close them all */
234/*@}*/
235
236/*@{*/
237/*!
238 * What I/O events to cancel in isc_socket_cancel() calls.
239 */
240#define ISC_SOCKCANCEL_RECV	0x00000001	/*%< cancel recv */
241#define ISC_SOCKCANCEL_SEND	0x00000002	/*%< cancel send */
242#define ISC_SOCKCANCEL_ACCEPT	0x00000004	/*%< cancel accept */
243#define ISC_SOCKCANCEL_CONNECT	0x00000008	/*%< cancel connect */
244#define ISC_SOCKCANCEL_ALL	0x0000000f	/*%< cancel everything */
245/*@}*/
246
247/*@{*/
248/*!
249 * Flags for isc_socket_send() and isc_socket_recv() calls.
250 */
251#define ISC_SOCKFLAG_IMMEDIATE	0x00000001	/*%< send event only if needed */
252#define ISC_SOCKFLAG_NORETRY	0x00000002	/*%< drop failed UDP sends */
253/*@}*/
254
255/*@{*/
256/*!
257 * Flags for fdwatchcreate.
258 */
259#define ISC_SOCKFDWATCH_READ	0x00000001	/*%< watch for readable */
260#define ISC_SOCKFDWATCH_WRITE	0x00000002	/*%< watch for writable */
261/*@}*/
262
263/***
264 *** Socket and Socket Manager Functions
265 ***
266 *** Note: all Ensures conditions apply only if the result is success for
267 *** those functions which return an isc_result.
268 ***/
269
270isc_result_t
271isc_socket_fdwatchcreate(isc_socketmgr_t *manager,
272			 int fd,
273			 int flags,
274			 isc_sockfdwatch_t callback,
275			 void *cbarg,
276			 isc_task_t *task,
277			 isc_socket_t **socketp);
278/*%<
279 * Create a new file descriptor watch socket managed by 'manager'.
280 *
281 * Note:
282 *
283 *\li   'fd' is the already-opened file descriptor.
284 *\li	This function is not available on Windows.
285 *\li	The callback function is called "in-line" - this means the function
286 *	needs to return as fast as possible, as all other I/O will be suspended
287 *	until the callback completes.
288 *
289 * Requires:
290 *
291 *\li	'manager' is a valid manager
292 *
293 *\li	'socketp' is a valid pointer, and *socketp == NULL
294 *
295 *\li	'fd' be opened.
296 *
297 * Ensures:
298 *
299 *	'*socketp' is attached to the newly created fdwatch socket
300 *
301 * Returns:
302 *
303 *\li	#ISC_R_SUCCESS
304 *\li	#ISC_R_NOMEMORY
305 *\li	#ISC_R_NORESOURCES
306 *\li	#ISC_R_UNEXPECTED
307 */
308
309isc_result_t
310isc_socket_create(isc_socketmgr_t *manager,
311		  int pf,
312		  isc_sockettype_t type,
313		  isc_socket_t **socketp);
314/*%<
315 * Create a new 'type' socket managed by 'manager'.
316 *
317 * For isc_sockettype_fdwatch sockets you should use isc_socket_fdwatchcreate()
318 * rather than isc_socket_create().
319 *
320 * Note:
321 *
322 *\li	'pf' is the desired protocol family, e.g. PF_INET or PF_INET6.
323 *
324 * Requires:
325 *
326 *\li	'manager' is a valid manager
327 *
328 *\li	'socketp' is a valid pointer, and *socketp == NULL
329 *
330 *\li	'type' is not isc_sockettype_fdwatch
331 *
332 * Ensures:
333 *
334 *	'*socketp' is attached to the newly created socket
335 *
336 * Returns:
337 *
338 *\li	#ISC_R_SUCCESS
339 *\li	#ISC_R_NOMEMORY
340 *\li	#ISC_R_NORESOURCES
341 *\li	#ISC_R_UNEXPECTED
342 */
343
344void
345isc_socket_cancel(isc_socket_t *sock, isc_task_t *task,
346		  unsigned int how);
347/*%<
348 * Cancel pending I/O of the type specified by "how".
349 *
350 * Note: if "task" is NULL, then the cancel applies to all tasks using the
351 * socket.
352 *
353 * Requires:
354 *
355 * \li	"socket" is a valid socket
356 *
357 * \li	"task" is NULL or a valid task
358 *
359 * "how" is a bitmask describing the type of cancelation to perform.
360 * The type ISC_SOCKCANCEL_ALL will cancel all pending I/O on this
361 * socket.
362 *
363 * \li ISC_SOCKCANCEL_RECV:
364 *	Cancel pending isc_socket_recv() calls.
365 *
366 * \li ISC_SOCKCANCEL_SEND:
367 *	Cancel pending isc_socket_send() and isc_socket_sendto() calls.
368 *
369 * \li ISC_SOCKCANCEL_ACCEPT:
370 *	Cancel pending isc_socket_accept() calls.
371 *
372 * \li ISC_SOCKCANCEL_CONNECT:
373 *	Cancel pending isc_socket_connect() call.
374 */
375
376void
377isc_socket_shutdown(isc_socket_t *sock, unsigned int how);
378/*%<
379 * Shutdown 'socket' according to 'how'.
380 *
381 * Requires:
382 *
383 * \li	'socket' is a valid socket.
384 *
385 * \li	'task' is NULL or is a valid task.
386 *
387 * \li	If 'how' is 'ISC_SOCKSHUT_RECV' or 'ISC_SOCKSHUT_ALL' then
388 *
389 *		The read queue must be empty.
390 *
391 *		No further read requests may be made.
392 *
393 * \li	If 'how' is 'ISC_SOCKSHUT_SEND' or 'ISC_SOCKSHUT_ALL' then
394 *
395 *		The write queue must be empty.
396 *
397 *		No further write requests may be made.
398 */
399
400void
401isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp);
402/*%<
403 * Attach *socketp to socket.
404 *
405 * Requires:
406 *
407 * \li	'socket' is a valid socket.
408 *
409 * \li	'socketp' points to a NULL socket.
410 *
411 * Ensures:
412 *
413 * \li	*socketp is attached to socket.
414 */
415
416void
417isc_socket_detach(isc_socket_t **socketp);
418/*%<
419 * Detach *socketp from its socket.
420 *
421 * Requires:
422 *
423 * \li	'socketp' points to a valid socket.
424 *
425 * \li	If '*socketp' is the last reference to the socket,
426 *	then:
427 *
428 *		There must be no pending I/O requests.
429 *
430 * Ensures:
431 *
432 * \li	*socketp is NULL.
433 *
434 * \li	If '*socketp' is the last reference to the socket,
435 *	then:
436 *
437 *		The socket will be shutdown (both reading and writing)
438 *		for all tasks.
439 *
440 *		All resources used by the socket have been freed
441 */
442
443isc_result_t
444isc_socket_open(isc_socket_t *sock);
445/*%<
446 * Open a new socket file descriptor of the given socket structure.  It simply
447 * opens a new descriptor; all of the other parameters including the socket
448 * type are inherited from the existing socket.  This function is provided to
449 * avoid overhead of destroying and creating sockets when many short-lived
450 * sockets are frequently opened and closed.  When the efficiency is not an
451 * issue, it should be safer to detach the unused socket and re-create a new
452 * one.  This optimization may not be available for some systems, in which
453 * case this function will return ISC_R_NOTIMPLEMENTED and must not be used.
454 *
455 * isc_socket_open() should not be called on sockets created by
456 * isc_socket_fdwatchcreate().
457 *
458 * Requires:
459 *
460 * \li	there must be no other reference to this socket.
461 *
462 * \li	'socket' is a valid and previously closed by isc_socket_close()
463 *
464 * \li  'sock->type' is not isc_sockettype_fdwatch
465 *
466 * Returns:
467 *	Same as isc_socket_create().
468 * \li	ISC_R_NOTIMPLEMENTED
469 */
470
471isc_result_t
472isc_socket_close(isc_socket_t *sock);
473/*%<
474 * Close a socket file descriptor of the given socket structure.  This function
475 * is provided as an alternative to destroying an unused socket when overhead
476 * destroying/re-creating sockets can be significant, and is expected to be
477 * used with isc_socket_open().  This optimization may not be available for some
478 * systems, in which case this function will return ISC_R_NOTIMPLEMENTED and
479 * must not be used.
480 *
481 * isc_socket_close() should not be called on sockets created by
482 * isc_socket_fdwatchcreate().
483 *
484 * Requires:
485 *
486 * \li	The socket must have a valid descriptor.
487 *
488 * \li	There must be no other reference to this socket.
489 *
490 * \li	There must be no pending I/O requests.
491 *
492 * \li  'sock->type' is not isc_sockettype_fdwatch
493 *
494 * Returns:
495 * \li	#ISC_R_NOTIMPLEMENTED
496 */
497
498isc_result_t
499isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *addressp,
500		unsigned int options);
501/*%<
502 * Bind 'socket' to '*addressp'.
503 *
504 * Requires:
505 *
506 * \li	'socket' is a valid socket
507 *
508 * \li	'addressp' points to a valid isc_sockaddr.
509 *
510 * Returns:
511 *
512 * \li	ISC_R_SUCCESS
513 * \li	ISC_R_NOPERM
514 * \li	ISC_R_ADDRNOTAVAIL
515 * \li	ISC_R_ADDRINUSE
516 * \li	ISC_R_BOUND
517 * \li	ISC_R_UNEXPECTED
518 */
519
520isc_result_t
521isc_socket_filter(isc_socket_t *sock, const char *filter);
522/*%<
523 * Inform the kernel that it should perform accept filtering.
524 * If filter is NULL the current filter will be removed.:w
525 */
526
527isc_result_t
528isc_socket_listen(isc_socket_t *sock, unsigned int backlog);
529/*%<
530 * Set listen mode on the socket.  After this call, the only function that
531 * can be used (other than attach and detach) is isc_socket_accept().
532 *
533 * Notes:
534 *
535 * \li	'backlog' is as in the UNIX system call listen() and may be
536 *	ignored by non-UNIX implementations.
537 *
538 * \li	If 'backlog' is zero, a reasonable system default is used, usually
539 *	SOMAXCONN.
540 *
541 * Requires:
542 *
543 * \li	'socket' is a valid, bound TCP socket or a valid, bound UNIX socket.
544 *
545 * Returns:
546 *
547 * \li	ISC_R_SUCCESS
548 * \li	ISC_R_UNEXPECTED
549 */
550
551isc_result_t
552isc_socket_accept(isc_socket_t *sock,
553		  isc_task_t *task, isc_taskaction_t action, const void *arg);
554/*%<
555 * Queue accept event.  When a new connection is received, the task will
556 * get an ISC_SOCKEVENT_NEWCONN event with the sender set to the listen
557 * socket.  The new socket structure is sent inside the isc_socket_newconnev_t
558 * event type, and is attached to the task 'task'.
559 *
560 * REQUIRES:
561 * \li	'socket' is a valid TCP socket that isc_socket_listen() was called
562 *	on.
563 *
564 * \li	'task' is a valid task
565 *
566 * \li	'action' is a valid action
567 *
568 * RETURNS:
569 * \li	ISC_R_SUCCESS
570 * \li	ISC_R_NOMEMORY
571 * \li	ISC_R_UNEXPECTED
572 */
573
574isc_result_t
575isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addressp,
576		   isc_task_t *task, isc_taskaction_t action,
577		   const void *arg);
578/*%<
579 * Connect 'socket' to peer with address *saddr.  When the connection
580 * succeeds, or when an error occurs, a CONNECT event with action 'action'
581 * and arg 'arg' will be posted to the event queue for 'task'.
582 *
583 * Requires:
584 *
585 * \li	'socket' is a valid TCP socket
586 *
587 * \li	'addressp' points to a valid isc_sockaddr
588 *
589 * \li	'task' is a valid task
590 *
591 * \li	'action' is a valid action
592 *
593 * Returns:
594 *
595 * \li	ISC_R_SUCCESS
596 * \li	ISC_R_NOMEMORY
597 * \li	ISC_R_UNEXPECTED
598 *
599 * Posted event's result code:
600 *
601 * \li	ISC_R_SUCCESS
602 * \li	ISC_R_TIMEDOUT
603 * \li	ISC_R_CONNREFUSED
604 * \li	ISC_R_NETUNREACH
605 * \li	ISC_R_UNEXPECTED
606 */
607
608isc_result_t
609isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp);
610/*%<
611 * Get the name of the peer connected to 'socket'.
612 *
613 * Requires:
614 *
615 * \li	'socket' is a valid TCP socket.
616 *
617 * Returns:
618 *
619 * \li	ISC_R_SUCCESS
620 * \li	ISC_R_TOOSMALL
621 * \li	ISC_R_UNEXPECTED
622 */
623
624isc_result_t
625isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp);
626/*%<
627 * Get the name of 'socket'.
628 *
629 * Requires:
630 *
631 * \li	'socket' is a valid socket.
632 *
633 * Returns:
634 *
635 * \li	ISC_R_SUCCESS
636 * \li	ISC_R_TOOSMALL
637 * \li	ISC_R_UNEXPECTED
638 */
639
640/*@{*/
641isc_result_t
642isc_socket_recv(isc_socket_t *sock, isc_region_t *region,
643		unsigned int minimum,
644		isc_task_t *task, isc_taskaction_t action, const void *arg);
645isc_result_t
646isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
647		 unsigned int minimum,
648		 isc_task_t *task, isc_taskaction_t action, const void *arg);
649
650isc_result_t
651isc_socket_recv2(isc_socket_t *sock, isc_region_t *region,
652		 unsigned int minimum, isc_task_t *task,
653		 isc_socketevent_t *event, unsigned int flags);
654
655/*!
656 * Receive from 'socket', storing the results in region.
657 *
658 * Notes:
659 *
660 *\li	Let 'length' refer to the length of 'region' or to the sum of all
661 *	available regions in the list of buffers '*buflist'.
662 *
663 *\li	If 'minimum' is non-zero and at least that many bytes are read,
664 *	the completion event will be posted to the task 'task.'  If minimum
665 *	is zero, the exact number of bytes requested in the region must
666 * 	be read for an event to be posted.  This only makes sense for TCP
667 *	connections, and is always set to 1 byte for UDP.
668 *
669 *\li	The read will complete when the desired number of bytes have been
670 *	read, if end-of-input occurs, or if an error occurs.  A read done
671 *	event with the given 'action' and 'arg' will be posted to the
672 *	event queue of 'task'.
673 *
674 *\li	The caller may not modify 'region', the buffers which are passed
675 *	into this function, or any data they refer to until the completion
676 *	event is received.
677 *
678 *\li	For isc_socket_recvv():
679 *	On successful completion, '*buflist' will be empty, and the list of
680 *	all buffers will be returned in the done event's 'bufferlist'
681 *	member.  On error return, '*buflist' will be unchanged.
682 *
683 *\li	For isc_socket_recv2():
684 *	'event' is not NULL, and the non-socket specific fields are
685 *	expected to be initialized.
686 *
687 *\li	For isc_socket_recv2():
688 *	The only defined value for 'flags' is ISC_SOCKFLAG_IMMEDIATE.  If
689 *	set and the operation completes, the return value will be
690 *	ISC_R_SUCCESS and the event will be filled in and not sent.  If the
691 *	operation does not complete, the return value will be
692 *	ISC_R_INPROGRESS and the event will be sent when the operation
693 *	completes.
694 *
695 * Requires:
696 *
697 *\li	'socket' is a valid, bound socket.
698 *
699 *\li	For isc_socket_recv():
700 *	'region' is a valid region
701 *
702 *\li	For isc_socket_recvv():
703 *	'buflist' is non-NULL, and '*buflist' contain at least one buffer.
704 *
705 *\li	'task' is a valid task
706 *
707 *\li	For isc_socket_recv() and isc_socket_recvv():
708 *	action != NULL and is a valid action
709 *
710 *\li	For isc_socket_recv2():
711 *	event != NULL
712 *
713 * Returns:
714 *
715 *\li	#ISC_R_SUCCESS
716 *\li	#ISC_R_INPROGRESS
717 *\li	#ISC_R_NOMEMORY
718 *\li	#ISC_R_UNEXPECTED
719 *
720 * Event results:
721 *
722 *\li	#ISC_R_SUCCESS
723 *\li	#ISC_R_UNEXPECTED
724 *\li	XXX needs other net-type errors
725 */
726/*@}*/
727
728/*@{*/
729isc_result_t
730isc_socket_send(isc_socket_t *sock, isc_region_t *region,
731		isc_task_t *task, isc_taskaction_t action, const void *arg);
732isc_result_t
733isc_socket_sendto(isc_socket_t *sock, isc_region_t *region,
734		  isc_task_t *task, isc_taskaction_t action, const void *arg,
735		  isc_sockaddr_t *address, struct in6_pktinfo *pktinfo);
736isc_result_t
737isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
738		 isc_task_t *task, isc_taskaction_t action, const void *arg);
739isc_result_t
740isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
741		   isc_task_t *task, isc_taskaction_t action, const void *arg,
742		   isc_sockaddr_t *address, struct in6_pktinfo *pktinfo);
743isc_result_t
744isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region,
745		   isc_task_t *task,
746		   isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
747		   isc_socketevent_t *event, unsigned int flags);
748
749/*!
750 * Send the contents of 'region' to the socket's peer.
751 *
752 * Notes:
753 *
754 *\li	Shutting down the requestor's task *may* result in any
755 *	still pending writes being dropped or completed, depending on the
756 *	underlying OS implementation.
757 *
758 *\li	If 'action' is NULL, then no completion event will be posted.
759 *
760 *\li	The caller may not modify 'region', the buffers which are passed
761 *	into this function, or any data they refer to until the completion
762 *	event is received.
763 *
764 *\li	For isc_socket_sendv() and isc_socket_sendtov():
765 *	On successful completion, '*buflist' will be empty, and the list of
766 *	all buffers will be returned in the done event's 'bufferlist'
767 *	member.  On error return, '*buflist' will be unchanged.
768 *
769 *\li	For isc_socket_sendto2():
770 *	'event' is not NULL, and the non-socket specific fields are
771 *	expected to be initialized.
772 *
773 *\li	For isc_socket_sendto2():
774 *	The only defined values for 'flags' are ISC_SOCKFLAG_IMMEDIATE
775 *	and ISC_SOCKFLAG_NORETRY.
776 *
777 *\li	If ISC_SOCKFLAG_IMMEDIATE is set and the operation completes, the
778 *	return value will be ISC_R_SUCCESS and the event will be filled
779 *	in and not sent.  If the operation does not complete, the return
780 *	value will be ISC_R_INPROGRESS and the event will be sent when
781 *	the operation completes.
782 *
783 *\li	ISC_SOCKFLAG_NORETRY can only be set for UDP sockets.  If set
784 *	and the send operation fails due to a transient error, the send
785 *	will not be retried and the error will be indicated in the event.
786 *	Using this option along with ISC_SOCKFLAG_IMMEDIATE allows the caller
787 *	to specify a region that is allocated on the stack.
788 *
789 * Requires:
790 *
791 *\li	'socket' is a valid, bound socket.
792 *
793 *\li	For isc_socket_send():
794 *	'region' is a valid region
795 *
796 *\li	For isc_socket_sendv() and isc_socket_sendtov():
797 *	'buflist' is non-NULL, and '*buflist' contain at least one buffer.
798 *
799 *\li	'task' is a valid task
800 *
801 *\li	For isc_socket_sendv(), isc_socket_sendtov(), isc_socket_send(), and
802 *	isc_socket_sendto():
803 *	action == NULL or is a valid action
804 *
805 *\li	For isc_socket_sendto2():
806 *	event != NULL
807 *
808 * Returns:
809 *
810 *\li	#ISC_R_SUCCESS
811 *\li	#ISC_R_INPROGRESS
812 *\li	#ISC_R_NOMEMORY
813 *\li	#ISC_R_UNEXPECTED
814 *
815 * Event results:
816 *
817 *\li	#ISC_R_SUCCESS
818 *\li	#ISC_R_UNEXPECTED
819 *\li	XXX needs other net-type errors
820 */
821/*@}*/
822
823isc_result_t
824isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp);
825
826isc_result_t
827isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
828		      unsigned int maxsocks);
829/*%<
830 * Create a socket manager.  If "maxsocks" is non-zero, it specifies the
831 * maximum number of sockets that the created manager should handle.
832 * isc_socketmgr_create() is equivalent of isc_socketmgr_create2() with
833 * "maxsocks" being zero.
834 *
835 * Notes:
836 *
837 *\li	All memory will be allocated in memory context 'mctx'.
838 *
839 * Requires:
840 *
841 *\li	'mctx' is a valid memory context.
842 *
843 *\li	'managerp' points to a NULL isc_socketmgr_t.
844 *
845 * Ensures:
846 *
847 *\li	'*managerp' is a valid isc_socketmgr_t.
848 *
849 * Returns:
850 *
851 *\li	#ISC_R_SUCCESS
852 *\li	#ISC_R_NOMEMORY
853 *\li	#ISC_R_UNEXPECTED
854 *\li	#ISC_R_NOTIMPLEMENTED
855 */
856
857isc_result_t
858isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp);
859/*%<
860 * Returns in "*nsockp" the maximum number of sockets this manager may open.
861 *
862 * Requires:
863 *
864 *\li	'*manager' is a valid isc_socketmgr_t.
865 *\li	'nsockp' is not NULL.
866 *
867 * Returns:
868 *
869 *\li	#ISC_R_SUCCESS
870 *\li	#ISC_R_NOTIMPLEMENTED
871 */
872
873void
874isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats);
875/*%<
876 * Set a general socket statistics counter set 'stats' for 'manager'.
877 *
878 * Requires:
879 * \li	'manager' is valid, hasn't opened any socket, and doesn't have
880 *	stats already set.
881 *
882 *\li	stats is a valid statistics supporting socket statistics counters
883 *	(see above).
884 */
885
886void
887isc_socketmgr_destroy(isc_socketmgr_t **managerp);
888/*%<
889 * Destroy a socket manager.
890 *
891 * Notes:
892 *
893 *\li	This routine blocks until there are no sockets left in the manager,
894 *	so if the caller holds any socket references using the manager, it
895 *	must detach them before calling isc_socketmgr_destroy() or it will
896 *	block forever.
897 *
898 * Requires:
899 *
900 *\li	'*managerp' is a valid isc_socketmgr_t.
901 *
902 *\li	All sockets managed by this manager are fully detached.
903 *
904 * Ensures:
905 *
906 *\li	*managerp == NULL
907 *
908 *\li	All resources used by the manager have been freed.
909 */
910
911isc_sockettype_t
912isc_socket_gettype(isc_socket_t *sock);
913/*%<
914 * Returns the socket type for "sock."
915 *
916 * Requires:
917 *
918 *\li	"sock" is a valid socket.
919 */
920
921/*@{*/
922isc_boolean_t
923isc_socket_isbound(isc_socket_t *sock);
924
925void
926isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes);
927/*%<
928 * If the socket is an IPv6 socket set/clear the IPV6_IPV6ONLY socket
929 * option if the host OS supports this option.
930 *
931 * Requires:
932 *\li	'sock' is a valid socket.
933 */
934/*@}*/
935
936void
937isc_socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active);
938
939/*%<
940 * Cleanup UNIX domain sockets in the file-system.  If 'active' is true
941 * then just unlink the socket.  If 'active' is false try to determine
942 * if there is a listener of the socket or not.  If no listener is found
943 * then unlink socket.
944 *
945 * Prior to unlinking the path is tested to see if it a socket.
946 *
947 * Note: there are a number of race conditions which cannot be avoided
948 *       both in the filesystem and any application using UNIX domain
949 *	 sockets (e.g. socket is tested between bind() and listen(),
950 *	 the socket is deleted and replaced in the file-system between
951 *	 stat() and unlink()).
952 */
953
954isc_result_t
955isc_socket_permunix(isc_sockaddr_t *sockaddr, isc_uint32_t perm,
956		    isc_uint32_t owner, isc_uint32_t group);
957/*%<
958 * Set ownership and file permissions on the UNIX domain socket.
959 *
960 * Note: On Solaris and SunOS this secures the directory containing
961 *       the socket as Solaris and SunOS do not honour the filesystem
962 *	 permissions on the socket.
963 *
964 * Requires:
965 * \li	'sockaddr' to be a valid UNIX domain sockaddr.
966 *
967 * Returns:
968 * \li	#ISC_R_SUCCESS
969 * \li	#ISC_R_FAILURE
970 */
971
972void isc_socket_setname(isc_socket_t *socket, const char *name, void *tag);
973/*%<
974 * Set the name and optional tag for a socket.  This allows tracking of the
975 * owner or purpose for this socket, and is useful for tracing and statistics
976 * reporting.
977 */
978
979const char *isc_socket_getname(isc_socket_t *socket);
980/*%<
981 * Get the name associated with a socket, if any.
982 */
983
984void *isc_socket_gettag(isc_socket_t *socket);
985/*%<
986 * Get the tag associated with a socket, if any.
987 */
988
989void
990isc__socketmgr_setreserved(isc_socketmgr_t *mgr, isc_uint32_t);
991/*%<
992 * Temporary.  For use by named only.
993 */
994
995#ifdef HAVE_LIBXML2
996
997void
998isc_socketmgr_renderxml(isc_socketmgr_t *mgr, xmlTextWriterPtr writer);
999/*%<
1000 * Render internal statistics and other state into the XML document.
1001 */
1002
1003#endif /* HAVE_LIBXML2 */
1004
1005ISC_LANG_ENDDECLS
1006
1007#endif /* ISC_SOCKET_H */
1008