1/*
2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14 * PERFORMANCE OF THIS SOFTWARE.
15 */
16
17/* $Id: socket.h,v 1.8 2022/12/26 19:24:11 jmc Exp $ */
18
19#ifndef ISC_SOCKET_H
20#define ISC_SOCKET_H 1
21
22/*****
23 ***** Module Info
24 *****/
25
26/*! \file isc/socket.h
27 * \brief Provides TCP and UDP sockets for network I/O.  The sockets are event
28 * sources in the task system.
29 *
30 * When I/O completes, a completion event for the socket is posted to the
31 * event queue of the task which requested the I/O.
32 *
33 * \li MP:
34 *	The module ensures appropriate synchronization of data structures it
35 *	creates and manipulates.
36 *	Clients of this module must not be holding a socket's task's lock when
37 *	making a call that affects that socket.  Failure to follow this rule
38 *	can result in deadlock.
39 *	The caller must ensure that isc_socketmgr_destroy() is called only
40 *	once for a given manager.
41 *
42 * \li Reliability:
43 *	No anticipated impact.
44 *
45 * \li Resources:
46 *	TBS
47 *
48 * \li Security:
49 *	No anticipated impact.
50 *
51 * \li Standards:
52 *	None.
53 */
54
55/***
56 *** Imports
57 ***/
58
59#include <time.h>
60
61#include <isc/event.h>
62#include <isc/eventclass.h>
63
64#include <isc/region.h>
65#include <isc/sockaddr.h>
66#include <isc/types.h>
67
68/***
69 *** Constants
70 ***/
71
72/*%
73 * Maximum number of buffers in a scatter/gather read/write.  The operating
74 * system in use must support at least this number (plus one on some.)
75 */
76#define ISC_SOCKET_MAXSCATTERGATHER	8
77
78/*%
79 * In isc_socket_bind() set socket option SO_REUSEADDR prior to calling
80 * bind() if a non zero port is specified (AF_INET and AF_INET6).
81 */
82#define ISC_SOCKET_REUSEADDRESS		0x01U
83
84/***
85 *** Types
86 ***/
87
88struct isc_socketevent {
89	ISC_EVENT_COMMON(isc_socketevent_t);
90	isc_result_t		result;		/*%< OK, EOF, whatever else */
91	unsigned int		minimum;	/*%< minimum i/o for event */
92	unsigned int		n;		/*%< bytes read or written */
93	unsigned int		offset;		/*%< offset into buffer list */
94	isc_region_t		region;		/*%< for single-buffer i/o */
95	isc_bufferlist_t	bufferlist;	/*%< list of buffers */
96	struct sockaddr_storage		address;	/*%< source address */
97	struct timespec		timestamp;	/*%< timestamp of packet recv */
98	struct in6_pktinfo	pktinfo;	/*%< ipv6 pktinfo */
99	uint32_t		attributes;	/*%< see below */
100	isc_eventdestructor_t   destroy;	/*%< original destructor */
101	unsigned int		dscp;		/*%< UDP dscp value */
102};
103
104typedef struct isc_socket_newconnev isc_socket_newconnev_t;
105struct isc_socket_newconnev {
106	ISC_EVENT_COMMON(isc_socket_newconnev_t);
107	isc_socket_t *		newsocket;
108	isc_result_t		result;		/*%< OK, EOF, whatever else */
109	struct sockaddr_storage		address;	/*%< source address */
110};
111
112typedef struct isc_socket_connev isc_socket_connev_t;
113struct isc_socket_connev {
114	ISC_EVENT_COMMON(isc_socket_connev_t);
115	isc_result_t		result;		/*%< OK, EOF, whatever else */
116};
117
118/*@{*/
119/*!
120 * _ATTACHED:	Internal use only.
121 * _TRUNC:	Packet was truncated on receive.
122 * _CTRUNC:	Packet control information was truncated.  This can
123 *		indicate that the packet is not complete, even though
124 *		all the data is valid.
125 * _TIMESTAMP:	The timestamp member is valid.
126 * _PKTINFO:	The pktinfo member is valid.
127 * _MULTICAST:	The UDP packet was received via a multicast transmission.
128 * _DSCP:	The UDP DSCP value is valid.
129 * _USEMINMTU:	Set the per packet IPV6_USE_MIN_MTU flag.
130 */
131#define ISC_SOCKEVENTATTR_ATTACHED		0x80000000U /* internal */
132#define ISC_SOCKEVENTATTR_TRUNC			0x00800000U /* public */
133#define ISC_SOCKEVENTATTR_CTRUNC		0x00400000U /* public */
134#define ISC_SOCKEVENTATTR_TIMESTAMP		0x00200000U /* public */
135#define ISC_SOCKEVENTATTR_PKTINFO		0x00100000U /* public */
136#define ISC_SOCKEVENTATTR_MULTICAST		0x00080000U /* public */
137#define ISC_SOCKEVENTATTR_DSCP			0x00040000U /* public */
138#define ISC_SOCKEVENTATTR_USEMINMTU		0x00020000U /* public */
139/*@}*/
140
141#define ISC_SOCKEVENT_ANYEVENT  (0)
142#define ISC_SOCKEVENT_RECVDONE	(ISC_EVENTCLASS_SOCKET + 1)
143#define ISC_SOCKEVENT_SENDDONE	(ISC_EVENTCLASS_SOCKET + 2)
144#define ISC_SOCKEVENT_NEWCONN	(ISC_EVENTCLASS_SOCKET + 3)
145#define ISC_SOCKEVENT_CONNECT	(ISC_EVENTCLASS_SOCKET + 4)
146
147/*
148 * Internal events.
149 */
150#define ISC_SOCKEVENT_INTR	(ISC_EVENTCLASS_SOCKET + 256)
151#define ISC_SOCKEVENT_INTW	(ISC_EVENTCLASS_SOCKET + 257)
152
153typedef enum {
154	isc_sockettype_udp = 1,
155	isc_sockettype_tcp = 2,
156} isc_sockettype_t;
157
158/*@{*/
159/*!
160 * How a socket should be shutdown in isc_socket_shutdown() calls.
161 */
162#define ISC_SOCKSHUT_RECV	0x00000001	/*%< close read side */
163#define ISC_SOCKSHUT_SEND	0x00000002	/*%< close write side */
164#define ISC_SOCKSHUT_ALL	0x00000003	/*%< close them all */
165/*@}*/
166
167/*@{*/
168/*!
169 * What I/O events to cancel in isc_socket_cancel() calls.
170 */
171#define ISC_SOCKCANCEL_RECV	0x00000001	/*%< cancel recv */
172#define ISC_SOCKCANCEL_SEND	0x00000002	/*%< cancel send */
173#define ISC_SOCKCANCEL_ACCEPT	0x00000004	/*%< cancel accept */
174#define ISC_SOCKCANCEL_CONNECT	0x00000008	/*%< cancel connect */
175#define ISC_SOCKCANCEL_ALL	0x0000000f	/*%< cancel everything */
176/*@}*/
177
178/*@{*/
179/*!
180 * Flags for isc_socket_send() and isc_socket_recv() calls.
181 */
182#define ISC_SOCKFLAG_IMMEDIATE	0x00000001	/*%< send event only if needed */
183#define ISC_SOCKFLAG_NORETRY	0x00000002	/*%< drop failed UDP sends */
184/*@}*/
185
186/*@{*/
187/*!
188 * Flags for fdwatchcreate.
189 */
190#define ISC_SOCKFDWATCH_READ	0x00000001	/*%< watch for readable */
191#define ISC_SOCKFDWATCH_WRITE	0x00000002	/*%< watch for writable */
192/*@}*/
193
194/***
195 *** Socket and Socket Manager Functions
196 ***
197 *** Note: all Ensures conditions apply only if the result is success for
198 *** those functions which return an isc_result.
199 ***/
200
201isc_result_t
202isc_socket_create(isc_socketmgr_t *manager,
203		  int pf,
204		  isc_sockettype_t type,
205		  isc_socket_t **socketp);
206/*%<
207 * Create a new 'type' socket managed by 'manager'.
208 *
209 * For isc_sockettype_fdwatch sockets you should use isc_socket_fdwatchcreate()
210 * rather than isc_socket_create().
211 *
212 * Note:
213 *
214 *\li	'pf' is the desired protocol family, e.g. PF_INET or PF_INET6.
215 *
216 * Requires:
217 *
218 *\li	'manager' is a valid manager
219 *
220 *\li	'socketp' is a valid pointer, and *socketp == NULL
221 *
222 *\li	'type' is not isc_sockettype_fdwatch
223 *
224 * Ensures:
225 *
226 *	'*socketp' is attached to the newly created socket
227 *
228 * Returns:
229 *
230 *\li	#ISC_R_SUCCESS
231 *\li	#ISC_R_NOMEMORY
232 *\li	#ISC_R_NORESOURCES
233 *\li	#ISC_R_UNEXPECTED
234 */
235
236void
237isc_socket_cancel(isc_socket_t *sock, isc_task_t *task,
238		  unsigned int how);
239/*%<
240 * Cancel pending I/O of the type specified by "how".
241 *
242 * Note: if "task" is NULL, then the cancel applies to all tasks using the
243 * socket.
244 *
245 * Requires:
246 *
247 * \li	"socket" is a valid socket
248 *
249 * \li	"task" is NULL or a valid task
250 *
251 * "how" is a bitmask describing the type of cancellation to perform.
252 * The type ISC_SOCKCANCEL_ALL will cancel all pending I/O on this
253 * socket.
254 *
255 * \li ISC_SOCKCANCEL_RECV:
256 *	Cancel pending isc_socket_recv() calls.
257 *
258 * \li ISC_SOCKCANCEL_SEND:
259 *	Cancel pending isc_socket_send() and isc_socket_sendto() calls.
260 *
261 * \li ISC_SOCKCANCEL_ACCEPT:
262 *	Cancel pending isc_socket_accept() calls.
263 *
264 * \li ISC_SOCKCANCEL_CONNECT:
265 *	Cancel pending isc_socket_connect() call.
266 */
267
268void
269isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp);
270/*%<
271 * Attach *socketp to socket.
272 *
273 * Requires:
274 *
275 * \li	'socket' is a valid socket.
276 *
277 * \li	'socketp' points to a NULL socket.
278 *
279 * Ensures:
280 *
281 * \li	*socketp is attached to socket.
282 */
283
284void
285isc_socket_detach(isc_socket_t **socketp);
286/*%<
287 * Detach *socketp from its socket.
288 *
289 * Requires:
290 *
291 * \li	'socketp' points to a valid socket.
292 *
293 * \li	If '*socketp' is the last reference to the socket,
294 *	then:
295 *
296 *		There must be no pending I/O requests.
297 *
298 * Ensures:
299 *
300 * \li	*socketp is NULL.
301 *
302 * \li	If '*socketp' is the last reference to the socket,
303 *	then:
304 *
305 *		The socket will be shutdown (both reading and writing)
306 *		for all tasks.
307 *
308 *		All resources used by the socket have been freed
309 */
310
311isc_result_t
312isc_socket_bind(isc_socket_t *sock, struct sockaddr_storage *addressp,
313		unsigned int options);
314/*%<
315 * Bind 'socket' to '*addressp'.
316 *
317 * Requires:
318 *
319 * \li	'socket' is a valid socket
320 *
321 * \li	'addressp' points to a valid isc_sockaddr.
322 *
323 * Returns:
324 *
325 * \li	ISC_R_SUCCESS
326 * \li	ISC_R_NOPERM
327 * \li	ISC_R_ADDRNOTAVAIL
328 * \li	ISC_R_ADDRINUSE
329 * \li	ISC_R_BOUND
330 * \li	ISC_R_UNEXPECTED
331 */
332
333isc_result_t
334isc_socket_connect(isc_socket_t *sock, struct sockaddr_storage *addressp,
335		   isc_task_t *task, isc_taskaction_t action,
336		   void *arg);
337/*%<
338 * Connect 'socket' to peer with address *saddr.  When the connection
339 * succeeds, or when an error occurs, a CONNECT event with action 'action'
340 * and arg 'arg' will be posted to the event queue for 'task'.
341 *
342 * Requires:
343 *
344 * \li	'socket' is a valid TCP socket
345 *
346 * \li	'addressp' points to a valid isc_sockaddr
347 *
348 * \li	'task' is a valid task
349 *
350 * \li	'action' is a valid action
351 *
352 * Returns:
353 *
354 * \li	ISC_R_SUCCESS
355 * \li	ISC_R_NOMEMORY
356 * \li	ISC_R_UNEXPECTED
357 *
358 * Posted event's result code:
359 *
360 * \li	ISC_R_SUCCESS
361 * \li	ISC_R_TIMEDOUT
362 * \li	ISC_R_CONNREFUSED
363 * \li	ISC_R_NETUNREACH
364 * \li	ISC_R_UNEXPECTED
365 */
366
367/*@{*/
368isc_result_t
369isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
370		 unsigned int minimum,
371		 isc_task_t *task, isc_taskaction_t action, void *arg);
372/*!
373 * Receive from 'socket', storing the results in region.
374 *
375 * Notes:
376 *
377 *\li	Let 'length' refer to the length of 'region' or to the sum of all
378 *	available regions in the list of buffers '*buflist'.
379 *
380 *\li	If 'minimum' is non-zero and at least that many bytes are read,
381 *	the completion event will be posted to the task 'task.'  If minimum
382 *	is zero, the exact number of bytes requested in the region must
383 * 	be read for an event to be posted.  This only makes sense for TCP
384 *	connections, and is always set to 1 byte for UDP.
385 *
386 *\li	The read will complete when the desired number of bytes have been
387 *	read, if end-of-input occurs, or if an error occurs.  A read done
388 *	event with the given 'action' and 'arg' will be posted to the
389 *	event queue of 'task'.
390 *
391 *\li	The caller may not modify 'region', the buffers which are passed
392 *	into this function, or any data they refer to until the completion
393 *	event is received.
394 *
395 *\li	For isc_socket_recvv():
396 *	On successful completion, '*buflist' will be empty, and the list of
397 *	all buffers will be returned in the done event's 'bufferlist'
398 *	member.  On error return, '*buflist' will be unchanged.
399 *
400 *\li	For isc_socket_recv2():
401 *	'event' is not NULL, and the non-socket specific fields are
402 *	expected to be initialized.
403 *
404 *\li	For isc_socket_recv2():
405 *	The only defined value for 'flags' is ISC_SOCKFLAG_IMMEDIATE.  If
406 *	set and the operation completes, the return value will be
407 *	ISC_R_SUCCESS and the event will be filled in and not sent.  If the
408 *	operation does not complete, the return value will be
409 *	ISC_R_INPROGRESS and the event will be sent when the operation
410 *	completes.
411 *
412 * Requires:
413 *
414 *\li	'socket' is a valid, bound socket.
415 *
416 *\li	For isc_socket_recv():
417 *	'region' is a valid region
418 *
419 *\li	For isc_socket_recvv():
420 *	'buflist' is non-NULL, and '*buflist' contain at least one buffer.
421 *
422 *\li	'task' is a valid task
423 *
424 *\li	For isc_socket_recv() and isc_socket_recvv():
425 *	action != NULL and is a valid action
426 *
427 *\li	For isc_socket_recv2():
428 *	event != NULL
429 *
430 * Returns:
431 *
432 *\li	#ISC_R_SUCCESS
433 *\li	#ISC_R_INPROGRESS
434 *\li	#ISC_R_NOMEMORY
435 *\li	#ISC_R_UNEXPECTED
436 *
437 * Event results:
438 *
439 *\li	#ISC_R_SUCCESS
440 *\li	#ISC_R_UNEXPECTED
441 *\li	XXX needs other net-type errors
442 */
443/*@}*/
444
445/*@{*/
446isc_result_t
447isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
448		 isc_task_t *task, isc_taskaction_t action, void *arg);
449isc_result_t
450isc_socket_sendtov2(isc_socket_t *sock, isc_bufferlist_t *buflist,
451		    isc_task_t *task, isc_taskaction_t action, void *arg,
452		    struct sockaddr_storage *address, struct in6_pktinfo *pktinfo,
453		    unsigned int flags);
454/*!
455 * Send the contents of 'region' to the socket's peer.
456 *
457 * Notes:
458 *
459 *\li	Shutting down the requestor's task *may* result in any
460 *	still pending writes being dropped or completed, depending on the
461 *	underlying OS implementation.
462 *
463 *\li	If 'action' is NULL, then no completion event will be posted.
464 *
465 *\li	The caller may not modify 'region', the buffers which are passed
466 *	into this function, or any data they refer to until the completion
467 *	event is received.
468 *
469 *\li	For isc_socket_sendv() and isc_socket_sendtov():
470 *	On successful completion, '*buflist' will be empty, and the list of
471 *	all buffers will be returned in the done event's 'bufferlist'
472 *	member.  On error return, '*buflist' will be unchanged.
473 *
474 *\li	For isc_socket_sendto2():
475 *	'event' is not NULL, and the non-socket specific fields are
476 *	expected to be initialized.
477 *
478 *\li	For isc_socket_sendto2():
479 *	The only defined values for 'flags' are ISC_SOCKFLAG_IMMEDIATE
480 *	and ISC_SOCKFLAG_NORETRY.
481 *
482 *\li	If ISC_SOCKFLAG_IMMEDIATE is set and the operation completes, the
483 *	return value will be ISC_R_SUCCESS and the event will be filled
484 *	in and not sent.  If the operation does not complete, the return
485 *	value will be ISC_R_INPROGRESS and the event will be sent when
486 *	the operation completes.
487 *
488 *\li	ISC_SOCKFLAG_NORETRY can only be set for UDP sockets.  If set
489 *	and the send operation fails due to a transient error, the send
490 *	will not be retried and the error will be indicated in the event.
491 *	Using this option along with ISC_SOCKFLAG_IMMEDIATE allows the caller
492 *	to specify a region that is allocated on the stack.
493 *
494 * Requires:
495 *
496 *\li	'socket' is a valid, bound socket.
497 *
498 *\li	For isc_socket_send():
499 *	'region' is a valid region
500 *
501 *\li	For isc_socket_sendv() and isc_socket_sendtov():
502 *	'buflist' is non-NULL, and '*buflist' contain at least one buffer.
503 *
504 *\li	'task' is a valid task
505 *
506 *\li	For isc_socket_sendv(), isc_socket_sendtov(), isc_socket_send(), and
507 *	isc_socket_sendto():
508 *	action == NULL or is a valid action
509 *
510 *\li	For isc_socket_sendto2():
511 *	event != NULL
512 *
513 * Returns:
514 *
515 *\li	#ISC_R_SUCCESS
516 *\li	#ISC_R_INPROGRESS
517 *\li	#ISC_R_NOMEMORY
518 *\li	#ISC_R_UNEXPECTED
519 *
520 * Event results:
521 *
522 *\li	#ISC_R_SUCCESS
523 *\li	#ISC_R_UNEXPECTED
524 *\li	XXX needs other net-type errors
525 */
526/*@}*/
527
528isc_result_t
529isc_socketmgr_create(isc_socketmgr_t **managerp);
530
531/*%<
532 * Create a socket manager.  If "maxsocks" is non-zero, it specifies the
533 * maximum number of sockets that the created manager should handle.
534 *
535 * Notes:
536 *
537 * Requires:
538 *
539 *\li	'managerp' points to a NULL isc_socketmgr_t.
540 *
541 * Ensures:
542 *
543 *\li	'*managerp' is a valid isc_socketmgr_t.
544 *
545 * Returns:
546 *
547 *\li	#ISC_R_SUCCESS
548 *\li	#ISC_R_NOMEMORY
549 *\li	#ISC_R_UNEXPECTED
550 *\li	#ISC_R_NOTIMPLEMENTED
551 */
552
553void
554isc_socketmgr_destroy(isc_socketmgr_t **managerp);
555/*%<
556 * Destroy a socket manager.
557 *
558 * Notes:
559 *
560 *\li	This routine blocks until there are no sockets left in the manager,
561 *	so if the caller holds any socket references using the manager, it
562 *	must detach them before calling isc_socketmgr_destroy() or it will
563 *	block forever.
564 *
565 * Requires:
566 *
567 *\li	'*managerp' is a valid isc_socketmgr_t.
568 *
569 *\li	All sockets managed by this manager are fully detached.
570 *
571 * Ensures:
572 *
573 *\li	*managerp == NULL
574 *
575 *\li	All resources used by the manager have been freed.
576 */
577
578#endif /* ISC_SOCKET_H */
579