socket.h revision 135446
1/*
2 * Copyright (C) 2004  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2002  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id: socket.h,v 1.54.12.4 2004/03/08 09:04:53 marka Exp $ */
19
20#ifndef ISC_SOCKET_H
21#define ISC_SOCKET_H 1
22
23/*****
24 ***** Module Info
25 *****/
26
27/*
28 * Sockets
29 *
30 * Provides TCP and UDP sockets for network I/O.  The sockets are event
31 * sources in the task system.
32 *
33 * When I/O completes, a completion event for the socket is posted to the
34 * event queue of the task which requested the I/O.
35 *
36 * MP:
37 *	The module ensures appropriate synchronization of data structures it
38 *	creates and manipulates.
39 *
40 *	Clients of this module must not be holding a socket's task's lock when
41 *	making a call that affects that socket.  Failure to follow this rule
42 *	can result in deadlock.
43 *
44 *	The caller must ensure that isc_socketmgr_destroy() is called only
45 *	once for a given manager.
46 *
47 * Reliability:
48 *	No anticipated impact.
49 *
50 * Resources:
51 *	<TBS>
52 *
53 * Security:
54 *	No anticipated impact.
55 *
56 * Standards:
57 *	None.
58 */
59
60/***
61 *** Imports
62 ***/
63
64#include <isc/lang.h>
65#include <isc/types.h>
66#include <isc/event.h>
67#include <isc/eventclass.h>
68#include <isc/time.h>
69#include <isc/region.h>
70#include <isc/sockaddr.h>
71
72ISC_LANG_BEGINDECLS
73
74/***
75 *** Constants
76 ***/
77
78/*
79 * Maximum number of buffers in a scatter/gather read/write.  The operating
80 * system in use must support at least this number (plus one on some.)
81 */
82#define ISC_SOCKET_MAXSCATTERGATHER	8
83
84/***
85 *** Types
86 ***/
87
88struct isc_socketevent {
89	ISC_EVENT_COMMON(isc_socketevent_t);
90	isc_result_t		result;		/* OK, EOF, whatever else */
91	unsigned int		minimum;	/* minimum i/o for event */
92	unsigned int		n;		/* bytes read or written */
93	unsigned int		offset;		/* offset into buffer list */
94	isc_region_t		region;		/* for single-buffer i/o */
95	isc_bufferlist_t	bufferlist;	/* list of buffers */
96	isc_sockaddr_t		address;	/* source address */
97	isc_time_t		timestamp;	/* timestamp of packet recv */
98	struct in6_pktinfo	pktinfo;	/* ipv6 pktinfo */
99	isc_uint32_t		attributes;	/* see below */
100};
101
102typedef struct isc_socket_newconnev isc_socket_newconnev_t;
103struct isc_socket_newconnev {
104	ISC_EVENT_COMMON(isc_socket_newconnev_t);
105	isc_socket_t *		newsocket;
106	isc_result_t		result;		/* OK, EOF, whatever else */
107	isc_sockaddr_t		address;	/* source address */
108};
109
110typedef struct isc_socket_connev isc_socket_connev_t;
111struct isc_socket_connev {
112	ISC_EVENT_COMMON(isc_socket_connev_t);
113	isc_result_t		result;		/* OK, EOF, whatever else */
114};
115
116/*
117 * _ATTACHED:	Internal use only.
118 * _TRUNC:	Packet was truncated on receive.
119 * _CTRUNC:	Packet control information was truncated.  This can
120 *		indicate that the packet is not complete, even though
121 *		all the data is valid.
122 * _TIMESTAMP:	The timestamp member is valid.
123 * _PKTINFO:	The pktinfo member is valid.
124 * _MULTICAST:	The UDP packet was received via a multicast transmission.
125 */
126#define ISC_SOCKEVENTATTR_ATTACHED		0x80000000U /* internal */
127#define ISC_SOCKEVENTATTR_TRUNC			0x00800000U /* public */
128#define ISC_SOCKEVENTATTR_CTRUNC		0x00400000U /* public */
129#define ISC_SOCKEVENTATTR_TIMESTAMP		0x00200000U /* public */
130#define ISC_SOCKEVENTATTR_PKTINFO		0x00100000U /* public */
131#define ISC_SOCKEVENTATTR_MULTICAST		0x00080000U /* public */
132
133#define ISC_SOCKEVENT_ANYEVENT  (0)
134#define ISC_SOCKEVENT_RECVDONE	(ISC_EVENTCLASS_SOCKET + 1)
135#define ISC_SOCKEVENT_SENDDONE	(ISC_EVENTCLASS_SOCKET + 2)
136#define ISC_SOCKEVENT_NEWCONN	(ISC_EVENTCLASS_SOCKET + 3)
137#define ISC_SOCKEVENT_CONNECT	(ISC_EVENTCLASS_SOCKET + 4)
138
139/*
140 * Internal events.
141 */
142#define ISC_SOCKEVENT_INTR	(ISC_EVENTCLASS_SOCKET + 256)
143#define ISC_SOCKEVENT_INTW	(ISC_EVENTCLASS_SOCKET + 257)
144
145typedef enum {
146	isc_sockettype_udp = 1,
147	isc_sockettype_tcp = 2
148} isc_sockettype_t;
149
150/*
151 * How a socket should be shutdown in isc_socket_shutdown() calls.
152 */
153#define ISC_SOCKSHUT_RECV	0x00000001	/* close read side */
154#define ISC_SOCKSHUT_SEND	0x00000002	/* close write side */
155#define ISC_SOCKSHUT_ALL	0x00000003	/* close them all */
156
157/*
158 * What I/O events to cancel in isc_socket_cancel() calls.
159 */
160#define ISC_SOCKCANCEL_RECV	0x00000001	/* cancel recv */
161#define ISC_SOCKCANCEL_SEND	0x00000002	/* cancel send */
162#define ISC_SOCKCANCEL_ACCEPT	0x00000004	/* cancel accept */
163#define ISC_SOCKCANCEL_CONNECT	0x00000008	/* cancel connect */
164#define ISC_SOCKCANCEL_ALL	0x0000000f	/* cancel everything */
165
166/*
167 * Flags for isc_socket_send() and isc_socket_recv() calls.
168 */
169#define ISC_SOCKFLAG_IMMEDIATE	0x00000001	/* send event only if needed */
170#define ISC_SOCKFLAG_NORETRY	0x00000002	/* drop failed UDP sends */
171
172/***
173 *** Socket and Socket Manager Functions
174 ***
175 *** Note: all Ensures conditions apply only if the result is success for
176 *** those functions which return an isc_result.
177 ***/
178
179isc_result_t
180isc_socket_create(isc_socketmgr_t *manager,
181		  int pf,
182		  isc_sockettype_t type,
183		  isc_socket_t **socketp);
184/*
185 * Create a new 'type' socket managed by 'manager'.
186 *
187 * Note:
188 *
189 *	'pf' is the desired protocol family, e.g. PF_INET or PF_INET6.
190 *
191 * Requires:
192 *
193 *	'manager' is a valid manager
194 *
195 *	'socketp' is a valid pointer, and *socketp == NULL
196 *
197 * Ensures:
198 *
199 *	'*socketp' is attached to the newly created socket
200 *
201 * Returns:
202 *
203 *	ISC_R_SUCCESS
204 *	ISC_R_NOMEMORY
205 *	ISC_R_NORESOURCES
206 *	ISC_R_UNEXPECTED
207 */
208
209void
210isc_socket_cancel(isc_socket_t *sock, isc_task_t *task,
211		  unsigned int how);
212/*
213 * Cancel pending I/O of the type specified by "how".
214 *
215 * Note: if "task" is NULL, then the cancel applies to all tasks using the
216 * socket.
217 *
218 * Requires:
219 *
220 *	"socket" is a valid socket
221 *
222 *	"task" is NULL or a valid task
223 *
224 * "how" is a bitmask describing the type of cancelation to perform.
225 * The type ISC_SOCKCANCEL_ALL will cancel all pending I/O on this
226 * socket.
227 *
228 * ISC_SOCKCANCEL_RECV:
229 *	Cancel pending isc_socket_recv() calls.
230 *
231 * ISC_SOCKCANCEL_SEND:
232 *	Cancel pending isc_socket_send() and isc_socket_sendto() calls.
233 *
234 * ISC_SOCKCANCEL_ACCEPT:
235 *	Cancel pending isc_socket_accept() calls.
236 *
237 * ISC_SOCKCANCEL_CONNECT:
238 *	Cancel pending isc_socket_connect() call.
239 */
240
241void
242isc_socket_shutdown(isc_socket_t *sock, unsigned int how);
243/*
244 * Shutdown 'socket' according to 'how'.
245 *
246 * Requires:
247 *
248 *	'socket' is a valid socket.
249 *
250 *	'task' is NULL or is a valid task.
251 *
252 *	If 'how' is 'ISC_SOCKSHUT_RECV' or 'ISC_SOCKSHUT_ALL' then
253 *
254 *		The read queue must be empty.
255 *
256 *		No further read requests may be made.
257 *
258 *	If 'how' is 'ISC_SOCKSHUT_SEND' or 'ISC_SOCKSHUT_ALL' then
259 *
260 *		The write queue must be empty.
261 *
262 *		No further write requests may be made.
263 */
264
265void
266isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp);
267/*
268 * Attach *socketp to socket.
269 *
270 * Requires:
271 *
272 *	'socket' is a valid socket.
273 *
274 *	'socketp' points to a NULL socket.
275 *
276 * Ensures:
277 *
278 *	*socketp is attached to socket.
279 */
280
281void
282isc_socket_detach(isc_socket_t **socketp);
283/*
284 * Detach *socketp from its socket.
285 *
286 * Requires:
287 *
288 *	'socketp' points to a valid socket.
289 *
290 *	If '*socketp' is the last reference to the socket,
291 *	then:
292 *
293 *		There must be no pending I/O requests.
294 *
295 * Ensures:
296 *
297 *	*socketp is NULL.
298 *
299 *	If '*socketp' is the last reference to the socket,
300 *	then:
301 *
302 *		The socket will be shutdown (both reading and writing)
303 *		for all tasks.
304 *
305 *		All resources used by the socket have been freed
306 */
307
308isc_result_t
309isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *addressp);
310/*
311 * Bind 'socket' to '*addressp'.
312 *
313 * Requires:
314 *
315 *	'socket' is a valid socket
316 *
317 *	'addressp' points to a valid isc_sockaddr.
318 *
319 * Returns:
320 *
321 *	ISC_R_SUCCESS
322 *	ISC_R_NOPERM
323 *	ISC_R_ADDRNOTAVAIL
324 *	ISC_R_ADDRINUSE
325 *	ISC_R_BOUND
326 *	ISC_R_UNEXPECTED
327 */
328
329isc_result_t
330isc_socket_filter(isc_socket_t *sock, const char *filter);
331/*
332 * Inform the kernel that it should perform accept filtering.
333 * If filter is NULL the current filter will be removed.:w
334 */
335
336isc_result_t
337isc_socket_listen(isc_socket_t *sock, unsigned int backlog);
338/*
339 * Set listen mode on the socket.  After this call, the only function that
340 * can be used (other than attach and detach) is isc_socket_accept().
341 *
342 * Notes:
343 *
344 *	'backlog' is as in the UNIX system call listen() and may be
345 *	ignored by non-UNIX implementations.
346 *
347 *	If 'backlog' is zero, a reasonable system default is used, usually
348 *	SOMAXCONN.
349 *
350 * Requires:
351 *
352 *	'socket' is a valid, bound TCP socket.
353 *
354 * Returns:
355 *
356 *	ISC_R_SUCCESS
357 *	ISC_R_UNEXPECTED
358 */
359
360isc_result_t
361isc_socket_accept(isc_socket_t *sock,
362		  isc_task_t *task, isc_taskaction_t action, const void *arg);
363/*
364 * Queue accept event.  When a new connection is received, the task will
365 * get an ISC_SOCKEVENT_NEWCONN event with the sender set to the listen
366 * socket.  The new socket structure is sent inside the isc_socket_newconnev_t
367 * event type, and is attached to the task 'task'.
368 *
369 * REQUIRES:
370 *	'socket' is a valid TCP socket that isc_socket_listen() was called
371 *	on.
372 *
373 *	'task' is a valid task
374 *
375 *	'action' is a valid action
376 *
377 * RETURNS:
378 *	ISC_R_SUCCESS
379 *	ISC_R_NOMEMORY
380 *	ISC_R_UNEXPECTED
381 */
382
383isc_result_t
384isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addressp,
385		   isc_task_t *task, isc_taskaction_t action,
386		   const void *arg);
387/*
388 * Connect 'socket' to peer with address *saddr.  When the connection
389 * succeeds, or when an error occurs, a CONNECT event with action 'action'
390 * and arg 'arg' will be posted to the event queue for 'task'.
391 *
392 * Requires:
393 *
394 *	'socket' is a valid TCP socket
395 *
396 *	'addressp' points to a valid isc_sockaddr
397 *
398 *	'task' is a valid task
399 *
400 *	'action' is a valid action
401 *
402 * Returns:
403 *
404 *	ISC_R_SUCCESS
405 *	ISC_R_NOMEMORY
406 *	ISC_R_UNEXPECTED
407 *
408 * Posted event's result code:
409 *
410 *	ISC_R_SUCCESS
411 *	ISC_R_TIMEDOUT
412 *	ISC_R_CONNREFUSED
413 *	ISC_R_NETUNREACH
414 *	ISC_R_UNEXPECTED
415 */
416
417isc_result_t
418isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp);
419/*
420 * Get the name of the peer connected to 'socket'.
421 *
422 * Requires:
423 *
424 *	'socket' is a valid TCP socket.
425 *
426 * Returns:
427 *
428 *	ISC_R_SUCCESS
429 *	ISC_R_TOOSMALL
430 *	ISC_R_UNEXPECTED
431 */
432
433isc_result_t
434isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp);
435/*
436 * Get the name of 'socket'.
437 *
438 * Requires:
439 *
440 *	'socket' is a valid socket.
441 *
442 * Returns:
443 *
444 *	ISC_R_SUCCESS
445 *	ISC_R_TOOSMALL
446 *	ISC_R_UNEXPECTED
447 */
448
449isc_result_t
450isc_socket_recv(isc_socket_t *sock, isc_region_t *region,
451		unsigned int minimum,
452		isc_task_t *task, isc_taskaction_t action, const void *arg);
453isc_result_t
454isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
455		 unsigned int minimum,
456		 isc_task_t *task, isc_taskaction_t action, const void *arg);
457
458isc_result_t
459isc_socket_recv2(isc_socket_t *sock, isc_region_t *region,
460		 unsigned int minimum, isc_task_t *task,
461		 isc_socketevent_t *event, unsigned int flags);
462
463/*
464 * Receive from 'socket', storing the results in region.
465 *
466 * Notes:
467 *
468 *	Let 'length' refer to the length of 'region' or to the sum of all
469 *	available regions in the list of buffers '*buflist'.
470 *
471 *	If 'minimum' is non-zero and at least that many bytes are read,
472 *	the completion event will be posted to the task 'task.'  If minimum
473 *	is zero, the exact number of bytes requested in the region must
474 * 	be read for an event to be posted.  This only makes sense for TCP
475 *	connections, and is always set to 1 byte for UDP.
476 *
477 *	The read will complete when the desired number of bytes have been
478 *	read, if end-of-input occurs, or if an error occurs.  A read done
479 *	event with the given 'action' and 'arg' will be posted to the
480 *	event queue of 'task'.
481 *
482 *	The caller may not modify 'region', the buffers which are passed
483 *	into this function, or any data they refer to until the completion
484 *	event is received.
485 *
486 *	For isc_socket_recvv():
487 *	On successful completion, '*buflist' will be empty, and the list of
488 *	all buffers will be returned in the done event's 'bufferlist'
489 *	member.  On error return, '*buflist' will be unchanged.
490 *
491 *	For isc_socket_recv2():
492 *	'event' is not NULL, and the non-socket specific fields are
493 *	expected to be initialized.
494 *
495 *	For isc_socket_recv2():
496 *	The only defined value for 'flags' is ISC_SOCKFLAG_IMMEDIATE.  If
497 *	set and the operation completes, the return value will be
498 *	ISC_R_SUCCESS and the event will be filled in and not sent.  If the
499 *	operation does not complete, the return value will be
500 *	ISC_R_INPROGRESS and the event will be sent when the operation
501 *	completes.
502 *
503 * Requires:
504 *
505 *	'socket' is a valid, bound socket.
506 *
507 *	For isc_socket_recv():
508 *	'region' is a valid region
509 *
510 *	For isc_socket_recvv():
511 *	'buflist' is non-NULL, and '*buflist' contain at least one buffer.
512 *
513 *	'task' is a valid task
514 *
515 *	For isc_socket_recv() and isc_socket_recvv():
516 *	action != NULL and is a valid action
517 *
518 *	For isc_socket_recv2():
519 *	event != NULL
520 *
521 * Returns:
522 *
523 *	ISC_R_SUCCESS
524 *	ISC_R_INPROGRESS
525 *	ISC_R_NOMEMORY
526 *	ISC_R_UNEXPECTED
527 *
528 * Event results:
529 *
530 *	ISC_R_SUCCESS
531 *	ISC_R_UNEXPECTED
532 *	XXX needs other net-type errors
533 */
534
535isc_result_t
536isc_socket_send(isc_socket_t *sock, isc_region_t *region,
537		isc_task_t *task, isc_taskaction_t action, const void *arg);
538isc_result_t
539isc_socket_sendto(isc_socket_t *sock, isc_region_t *region,
540		  isc_task_t *task, isc_taskaction_t action, const void *arg,
541		  isc_sockaddr_t *address, struct in6_pktinfo *pktinfo);
542isc_result_t
543isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
544		 isc_task_t *task, isc_taskaction_t action, const void *arg);
545isc_result_t
546isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
547		   isc_task_t *task, isc_taskaction_t action, const void *arg,
548		   isc_sockaddr_t *address, struct in6_pktinfo *pktinfo);
549isc_result_t
550isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region,
551		   isc_task_t *task,
552		   isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
553		   isc_socketevent_t *event, unsigned int flags);
554
555/*
556 * Send the contents of 'region' to the socket's peer.
557 *
558 * Notes:
559 *
560 *	Shutting down the requestor's task *may* result in any
561 *	still pending writes being dropped or completed, depending on the
562 *	underlying OS implementation.
563 *
564 *	If 'action' is NULL, then no completion event will be posted.
565 *
566 *	The caller may not modify 'region', the buffers which are passed
567 *	into this function, or any data they refer to until the completion
568 *	event is received.
569 *
570 *	For isc_socket_sendv() and isc_socket_sendtov():
571 *	On successful completion, '*buflist' will be empty, and the list of
572 *	all buffers will be returned in the done event's 'bufferlist'
573 *	member.  On error return, '*buflist' will be unchanged.
574 *
575 *	For isc_socket_sendto2():
576 *	'event' is not NULL, and the non-socket specific fields are
577 *	expected to be initialized.
578 *
579 *	For isc_socket_sendto2():
580 *	The only defined values for 'flags' are ISC_SOCKFLAG_IMMEDIATE
581 *	and ISC_SOCKFLAG_NORETRY.
582 *
583 *	If ISC_SOCKFLAG_IMMEDIATE is set and the operation completes, the
584 *	return value will be ISC_R_SUCCESS and the event will be filled
585 *	in and not sent.  If the operation does not complete, the return
586 *	value will be ISC_R_INPROGRESS and the event will be sent when
587 *	the operation completes.
588 *
589 *	ISC_SOCKFLAG_NORETRY can only be set for UDP sockets.  If set
590 *	and the send operation fails due to a transient error, the send
591 *	will not be retried and the error will be indicated in the event.
592 *	Using this option along with ISC_SOCKFLAG_IMMEDIATE allows the caller
593 *	to specify a region that is allocated on the stack.
594 *
595 * Requires:
596 *
597 *	'socket' is a valid, bound socket.
598 *
599 *	For isc_socket_send():
600 *	'region' is a valid region
601 *
602 *	For isc_socket_sendv() and isc_socket_sendtov():
603 *	'buflist' is non-NULL, and '*buflist' contain at least one buffer.
604 *
605 *	'task' is a valid task
606 *
607 *	For isc_socket_sendv(), isc_socket_sendtov(), isc_socket_send(), and
608 *	isc_socket_sendto():
609 *	action == NULL or is a valid action
610 *
611 *	For isc_socket_sendto2():
612 *	event != NULL
613 *
614 * Returns:
615 *
616 *	ISC_R_SUCCESS
617 *	ISC_R_INPROGRESS
618 *	ISC_R_NOMEMORY
619 *	ISC_R_UNEXPECTED
620 *
621 * Event results:
622 *
623 *	ISC_R_SUCCESS
624 *	ISC_R_UNEXPECTED
625 *	XXX needs other net-type errors
626 */
627
628isc_result_t
629isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp);
630/*
631 * Create a socket manager.
632 *
633 * Notes:
634 *
635 *	All memory will be allocated in memory context 'mctx'.
636 *
637 * Requires:
638 *
639 *	'mctx' is a valid memory context.
640 *
641 *	'managerp' points to a NULL isc_socketmgr_t.
642 *
643 * Ensures:
644 *
645 *	'*managerp' is a valid isc_socketmgr_t.
646 *
647 * Returns:
648 *
649 *	ISC_R_SUCCESS
650 *	ISC_R_NOMEMORY
651 *	ISC_R_UNEXPECTED
652 */
653
654void
655isc_socketmgr_destroy(isc_socketmgr_t **managerp);
656/*
657 * Destroy a socket manager.
658 *
659 * Notes:
660 *
661 *	This routine blocks until there are no sockets left in the manager,
662 *	so if the caller holds any socket references using the manager, it
663 *	must detach them before calling isc_socketmgr_destroy() or it will
664 *	block forever.
665 *
666 * Requires:
667 *
668 *	'*managerp' is a valid isc_socketmgr_t.
669 *
670 *	All sockets managed by this manager are fully detached.
671 *
672 * Ensures:
673 *
674 *	*managerp == NULL
675 *
676 *	All resources used by the manager have been freed.
677 */
678
679isc_sockettype_t
680isc_socket_gettype(isc_socket_t *sock);
681/*
682 * Returns the socket type for "sock."
683 *
684 * Requires:
685 *
686 *	"sock" is a valid socket.
687 */
688
689isc_boolean_t
690isc_socket_isbound(isc_socket_t *sock);
691
692void
693isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes);
694/*
695 * If the socket is an IPv6 socket set/clear the IPV6_IPV6ONLY socket
696 * option if the host OS supports this option.
697 *
698 * Requires:
699 *	'sock' is a valid socket.
700 */
701
702ISC_LANG_ENDDECLS
703
704#endif /* ISC_SOCKET_H */
705