1/*	$NetBSD: netmgr-int.h,v 1.1 2024/02/18 20:57:55 christos Exp $	*/
2
3/*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16#pragma once
17
18#include <unistd.h>
19#include <uv.h>
20
21#include <openssl/err.h>
22#include <openssl/ssl.h>
23
24#include <isc/astack.h>
25#include <isc/atomic.h>
26#include <isc/barrier.h>
27#include <isc/buffer.h>
28#include <isc/condition.h>
29#include <isc/magic.h>
30#include <isc/mem.h>
31#include <isc/netmgr.h>
32#include <isc/quota.h>
33#include <isc/random.h>
34#include <isc/refcount.h>
35#include <isc/region.h>
36#include <isc/result.h>
37#include <isc/rwlock.h>
38#include <isc/sockaddr.h>
39#include <isc/stats.h>
40#include <isc/thread.h>
41#include <isc/util.h>
42
43#include "uv-compat.h"
44
45#define ISC_NETMGR_TID_UNKNOWN -1
46
47/* Must be different from ISC_NETMGR_TID_UNKNOWN */
48#define ISC_NETMGR_NON_INTERLOCKED -2
49
50/*
51 * Receive buffers
52 */
53#if HAVE_DECL_UV_UDP_MMSG_CHUNK
54/*
55 * The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source,
56 * libuv will not receive more that 20 datagrams in a single recvmmsg call.
57 */
58#define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX)
59#else
60/*
61 * A single DNS message size
62 */
63#define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX
64#endif
65
66/*
67 * The TCP receive buffer can fit one maximum sized DNS message plus its size,
68 * the receive buffer here affects TCP, DoT and DoH.
69 */
70#define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
71
72/* Pick the larger buffer */
73#define ISC_NETMGR_RECVBUF_SIZE                                     \
74	(ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \
75		 ? ISC_NETMGR_UDP_RECVBUF_SIZE                      \
76		 : ISC_NETMGR_TCP_RECVBUF_SIZE)
77
78/*
79 * Send buffer
80 */
81#define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
82
83/*%
84 * Regular TCP buffer size.
85 */
86#define NM_REG_BUF 4096
87
88/*%
89 * Larger buffer for when the regular one isn't enough; this will
90 * hold two full DNS packets with lengths.  netmgr receives 64k at
91 * most in TCPDNS connections, so there's no risk of overrun
92 * when using a buffer this size.
93 */
94#define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2
95
96/*
97 * Define NETMGR_TRACE to activate tracing of handles and sockets.
98 * This will impair performance but enables us to quickly determine,
99 * if netmgr resources haven't been cleaned up on shutdown, which ones
100 * are still in use.
101 */
102#ifdef NETMGR_TRACE
103#define TRACE_SIZE 8
104
105void
106isc__nm_dump_active(isc_nm_t *nm);
107
108#if defined(__linux__)
109#include <syscall.h>
110#define gettid() (uint32_t) syscall(SYS_gettid)
111#elif defined(_WIN32)
112#define gettid() (uint32_t) GetCurrentThreadId()
113#else
114#define gettid() (uint32_t) pthread_self()
115#endif
116
117#ifdef NETMGR_TRACE_VERBOSE
118#define NETMGR_TRACE_LOG(format, ...)                                \
119	fprintf(stderr, "%" PRIu32 ":%d:%s:%u:%s:" format, gettid(), \
120		isc_nm_tid(), file, line, func, __VA_ARGS__)
121#else
122#define NETMGR_TRACE_LOG(format, ...) \
123	(void)file;                   \
124	(void)line;                   \
125	(void)func;
126#endif
127
128#define FLARG_PASS , file, line, func
129#define FLARG                                              \
130	, const char *file __attribute__((unused)),        \
131		unsigned int line __attribute__((unused)), \
132		const char *func __attribute__((unused))
133#define FLARG_IEVENT(ievent)              \
134	const char *file = ievent->file;  \
135	unsigned int line = ievent->line; \
136	const char *func = ievent->func;
137#define FLARG_IEVENT_PASS(ievent) \
138	ievent->file = file;      \
139	ievent->line = line;      \
140	ievent->func = func;
141#define isc__nm_uvreq_get(req, sock) \
142	isc___nm_uvreq_get(req, sock, __FILE__, __LINE__, __func__)
143#define isc__nm_uvreq_put(req, sock) \
144	isc___nm_uvreq_put(req, sock, __FILE__, __LINE__, __func__)
145#define isc__nmsocket_init(sock, mgr, type, iface)                      \
146	isc___nmsocket_init(sock, mgr, type, iface, __FILE__, __LINE__, \
147			    __func__)
148#define isc__nmsocket_put(sockp) \
149	isc___nmsocket_put(sockp, __FILE__, __LINE__, __func__)
150#define isc__nmsocket_attach(sock, target) \
151	isc___nmsocket_attach(sock, target, __FILE__, __LINE__, __func__)
152#define isc__nmsocket_detach(socketp) \
153	isc___nmsocket_detach(socketp, __FILE__, __LINE__, __func__)
154#define isc__nmsocket_close(socketp) \
155	isc___nmsocket_close(socketp, __FILE__, __LINE__, __func__)
156#define isc__nmhandle_get(sock, peer, local) \
157	isc___nmhandle_get(sock, peer, local, __FILE__, __LINE__, __func__)
158#define isc__nmsocket_prep_destroy(sock) \
159	isc___nmsocket_prep_destroy(sock, __FILE__, __LINE__, __func__)
160#else
161#define NETMGR_TRACE_LOG(format, ...)
162
163#define FLARG_PASS
164#define FLARG
165#define FLARG_IEVENT(ievent)
166#define FLARG_IEVENT_PASS(ievent)
167#define isc__nm_uvreq_get(req, sock) isc___nm_uvreq_get(req, sock)
168#define isc__nm_uvreq_put(req, sock) isc___nm_uvreq_put(req, sock)
169#define isc__nmsocket_init(sock, mgr, type, iface) \
170	isc___nmsocket_init(sock, mgr, type, iface)
171#define isc__nmsocket_put(sockp)	   isc___nmsocket_put(sockp)
172#define isc__nmsocket_attach(sock, target) isc___nmsocket_attach(sock, target)
173#define isc__nmsocket_detach(socketp)	   isc___nmsocket_detach(socketp)
174#define isc__nmsocket_close(socketp)	   isc___nmsocket_close(socketp)
175#define isc__nmhandle_get(sock, peer, local) \
176	isc___nmhandle_get(sock, peer, local)
177#define isc__nmsocket_prep_destroy(sock) isc___nmsocket_prep_destroy(sock)
178#endif
179
180/*
181 * Queue types in the order of processing priority.
182 */
183typedef enum {
184	NETIEVENT_PRIORITY = 0,
185	NETIEVENT_PRIVILEGED = 1,
186	NETIEVENT_TASK = 2,
187	NETIEVENT_NORMAL = 3,
188	NETIEVENT_MAX = 4,
189} netievent_type_t;
190
191typedef struct isc__nm_uvreq isc__nm_uvreq_t;
192typedef struct isc__netievent isc__netievent_t;
193
194typedef ISC_LIST(isc__netievent_t) isc__netievent_list_t;
195
196typedef struct ievent {
197	isc_mutex_t lock;
198	isc_condition_t cond;
199	isc__netievent_list_t list;
200} ievent_t;
201
202/*
203 * Single network event loop worker.
204 */
205typedef struct isc__networker {
206	isc_nm_t *mgr;
207	int id;		  /* thread id */
208	uv_loop_t loop;	  /* libuv loop structure */
209	uv_async_t async; /* async channel to send
210			   * data to this networker */
211	bool paused;
212	bool finished;
213	isc_thread_t thread;
214	ievent_t ievents[NETIEVENT_MAX];
215
216	isc_refcount_t references;
217	atomic_int_fast64_t pktcount;
218	char *recvbuf;
219	char *sendbuf;
220	bool recvbuf_inuse;
221} isc__networker_t;
222
223/*
224 * A general handle for a connection bound to a networker.  For UDP
225 * connections we have peer address here, so both TCP and UDP can be
226 * handled with a simple send-like function
227 */
228#define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D')
229#define VALID_NMHANDLE(t)                      \
230	(ISC_MAGIC_VALID(t, NMHANDLE_MAGIC) && \
231	 atomic_load(&(t)->references) > 0)
232
233typedef void (*isc__nm_closecb)(isc_nmhandle_t *);
234
235struct isc_nmhandle {
236	int magic;
237	isc_refcount_t references;
238
239	/*
240	 * The socket is not 'attached' in the traditional
241	 * reference-counting sense. Instead, we keep all handles in an
242	 * array in the socket object.  This way, we don't have circular
243	 * dependencies and we can close all handles when we're destroying
244	 * the socket.
245	 */
246	isc_nmsocket_t *sock;
247
248	isc_sockaddr_t peer;
249	isc_sockaddr_t local;
250	isc_nm_opaquecb_t doreset; /* reset extra callback, external */
251	isc_nm_opaquecb_t dofree;  /* free extra callback, external */
252#ifdef NETMGR_TRACE
253	void *backtrace[TRACE_SIZE];
254	int backtrace_size;
255	LINK(isc_nmhandle_t) active_link;
256#endif
257	void *opaque;
258	char extra[];
259};
260
261typedef enum isc__netievent_type {
262	netievent_udpconnect,
263	netievent_udpclose,
264	netievent_udpsend,
265	netievent_udpread,
266	netievent_udpcancel,
267
268	netievent_tcpconnect,
269	netievent_tcpclose,
270	netievent_tcpsend,
271	netievent_tcpstartread,
272	netievent_tcppauseread,
273	netievent_tcpaccept,
274	netievent_tcpcancel,
275
276	netievent_tcpdnsaccept,
277	netievent_tcpdnsconnect,
278	netievent_tcpdnsclose,
279	netievent_tcpdnssend,
280	netievent_tcpdnsread,
281	netievent_tcpdnscancel,
282
283	netievent_shutdown,
284	netievent_stop,
285	netievent_pause,
286
287	netievent_connectcb,
288	netievent_readcb,
289	netievent_sendcb,
290
291	netievent_task,
292	netievent_privilegedtask,
293
294	/*
295	 * event type values higher than this will be treated
296	 * as high-priority events, which can be processed
297	 * while the netmgr is pausing or paused.
298	 */
299	netievent_prio = 0xff,
300
301	netievent_udplisten,
302	netievent_udpstop,
303	netievent_tcplisten,
304	netievent_tcpstop,
305	netievent_tcpdnslisten,
306	netievent_tcpdnsstop,
307
308	netievent_resume,
309	netievent_detach,
310	netievent_close,
311} isc__netievent_type;
312
313typedef union {
314	isc_nm_recv_cb_t recv;
315	isc_nm_cb_t send;
316	isc_nm_cb_t connect;
317	isc_nm_accept_cb_t accept;
318} isc__nm_cb_t;
319
320/*
321 * Wrapper around uv_req_t with 'our' fields in it.  req->data should
322 * always point to its parent.  Note that we always allocate more than
323 * sizeof(struct) because we make room for different req types;
324 */
325#define UVREQ_MAGIC    ISC_MAGIC('N', 'M', 'U', 'R')
326#define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC)
327
328struct isc__nm_uvreq {
329	int magic;
330	isc_nmsocket_t *sock;
331	isc_nmhandle_t *handle;
332	char tcplen[2];	       /* The TCP DNS message length */
333	uv_buf_t uvbuf;	       /* translated isc_region_t, to be
334				* sent or received */
335	isc_sockaddr_t local;  /* local address */
336	isc_sockaddr_t peer;   /* peer address */
337	isc__nm_cb_t cb;       /* callback */
338	void *cbarg;	       /* callback argument */
339	isc_nm_timer_t *timer; /* TCP write timer */
340
341	union {
342		uv_handle_t handle;
343		uv_req_t req;
344		uv_getaddrinfo_t getaddrinfo;
345		uv_getnameinfo_t getnameinfo;
346		uv_shutdown_t shutdown;
347		uv_write_t write;
348		uv_connect_t connect;
349		uv_udp_send_t udp_send;
350		uv_fs_t fs;
351		uv_work_t work;
352	} uv_req;
353	ISC_LINK(isc__nm_uvreq_t) link;
354};
355
356struct isc_nm_timer {
357	isc_refcount_t references;
358	uv_timer_t timer;
359	isc_nmhandle_t *handle;
360	isc_nm_timer_cb cb;
361	void *cbarg;
362};
363
364void *
365isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type);
366/*%<
367 * Allocate an ievent and set the type.
368 */
369void
370isc__nm_put_netievent(isc_nm_t *mgr, void *ievent);
371
372/*
373 * The macros here are used to simulate the "inheritance" in C, there's the base
374 * netievent structure that contains just its own type and socket, and there are
375 * extended netievent types that also have handles or requests or other data.
376 *
377 * The macros here ensure that:
378 *
379 *   1. every netievent type has matching definition, declaration and
380 *      implementation
381 *
382 *   2. we handle all the netievent types of same subclass the same, e.g. if the
383 *      extended netievent contains handle, we always attach to the handle in
384 *      the ctor and detach from the handle in dtor.
385 *
386 * There are three macros here for each netievent subclass:
387 *
388 *   1. NETIEVENT_*_TYPE(type) creates the typedef for each type; used below in
389 *   this header
390 *
391 *   2. NETIEVENT_*_DECL(type) generates the declaration of the get and put
392 *      functions (isc__nm_get_netievent_* and isc__nm_put_netievent_*); used
393 *      below in this header
394 *
395 *   3. NETIEVENT_*_DEF(type) generates the definition of the functions; used
396 *   either in netmgr.c or matching protocol file (e.g. udp.c, tcp.c, etc.)
397 */
398
399#define NETIEVENT__SOCKET                \
400	isc__netievent_type type;        \
401	ISC_LINK(isc__netievent_t) link; \
402	isc_nmsocket_t *sock;            \
403	const char *file;                \
404	unsigned int line;               \
405	const char *func
406
407typedef struct isc__netievent__socket {
408	NETIEVENT__SOCKET;
409} isc__netievent__socket_t;
410
411#define NETIEVENT_SOCKET_TYPE(type) \
412	typedef isc__netievent__socket_t isc__netievent_##type##_t
413
414#define NETIEVENT_SOCKET_DECL(type)                              \
415	isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
416		isc_nm_t *nm, isc_nmsocket_t *sock);             \
417	void isc__nm_put_netievent_##type(isc_nm_t *nm,          \
418					  isc__netievent_##type##_t *ievent)
419
420#define NETIEVENT_SOCKET_DEF(type)                                             \
421	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
422		isc_nm_t *nm, isc_nmsocket_t *sock) {                          \
423		isc__netievent_##type##_t *ievent =                            \
424			isc__nm_get_netievent(nm, netievent_##type);           \
425		isc__nmsocket_attach(sock, &ievent->sock);                     \
426                                                                               \
427		return (ievent);                                               \
428	}                                                                      \
429                                                                               \
430	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
431					  isc__netievent_##type##_t *ievent) { \
432		isc__nmsocket_detach(&ievent->sock);                           \
433		isc__nm_put_netievent(nm, ievent);                             \
434	}
435
436typedef struct isc__netievent__socket_req {
437	NETIEVENT__SOCKET;
438	isc__nm_uvreq_t *req;
439} isc__netievent__socket_req_t;
440
441#define NETIEVENT_SOCKET_REQ_TYPE(type) \
442	typedef isc__netievent__socket_req_t isc__netievent_##type##_t
443
444#define NETIEVENT_SOCKET_REQ_DECL(type)                                    \
445	isc__netievent_##type##_t *isc__nm_get_netievent_##type(           \
446		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req); \
447	void isc__nm_put_netievent_##type(isc_nm_t *nm,                    \
448					  isc__netievent_##type##_t *ievent)
449
450#define NETIEVENT_SOCKET_REQ_DEF(type)                                         \
451	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
452		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {    \
453		isc__netievent_##type##_t *ievent =                            \
454			isc__nm_get_netievent(nm, netievent_##type);           \
455		isc__nmsocket_attach(sock, &ievent->sock);                     \
456		ievent->req = req;                                             \
457                                                                               \
458		return (ievent);                                               \
459	}                                                                      \
460                                                                               \
461	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
462					  isc__netievent_##type##_t *ievent) { \
463		isc__nmsocket_detach(&ievent->sock);                           \
464		isc__nm_put_netievent(nm, ievent);                             \
465	}
466
467typedef struct isc__netievent__socket_req_result {
468	NETIEVENT__SOCKET;
469	isc__nm_uvreq_t *req;
470	isc_result_t result;
471} isc__netievent__socket_req_result_t;
472
473#define NETIEVENT_SOCKET_REQ_RESULT_TYPE(type) \
474	typedef isc__netievent__socket_req_result_t isc__netievent_##type##_t
475
476#define NETIEVENT_SOCKET_REQ_RESULT_DECL(type)                            \
477	isc__netievent_##type##_t *isc__nm_get_netievent_##type(          \
478		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \
479		isc_result_t result);                                     \
480	void isc__nm_put_netievent_##type(isc_nm_t *nm,                   \
481					  isc__netievent_##type##_t *ievent)
482
483#define NETIEVENT_SOCKET_REQ_RESULT_DEF(type)                                  \
484	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
485		isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req,      \
486		isc_result_t result) {                                         \
487		isc__netievent_##type##_t *ievent =                            \
488			isc__nm_get_netievent(nm, netievent_##type);           \
489		isc__nmsocket_attach(sock, &ievent->sock);                     \
490		ievent->req = req;                                             \
491		ievent->result = result;                                       \
492                                                                               \
493		return (ievent);                                               \
494	}                                                                      \
495                                                                               \
496	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
497					  isc__netievent_##type##_t *ievent) { \
498		isc__nmsocket_detach(&ievent->sock);                           \
499		isc__nm_put_netievent(nm, ievent);                             \
500	}
501
502typedef struct isc__netievent__socket_handle {
503	NETIEVENT__SOCKET;
504	isc_nmhandle_t *handle;
505} isc__netievent__socket_handle_t;
506
507#define NETIEVENT_SOCKET_HANDLE_TYPE(type) \
508	typedef isc__netievent__socket_handle_t isc__netievent_##type##_t
509
510#define NETIEVENT_SOCKET_HANDLE_DECL(type)                                   \
511	isc__netievent_##type##_t *isc__nm_get_netievent_##type(             \
512		isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle); \
513	void isc__nm_put_netievent_##type(isc_nm_t *nm,                      \
514					  isc__netievent_##type##_t *ievent)
515
516#define NETIEVENT_SOCKET_HANDLE_DEF(type)                                      \
517	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
518		isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle) {  \
519		isc__netievent_##type##_t *ievent =                            \
520			isc__nm_get_netievent(nm, netievent_##type);           \
521		isc__nmsocket_attach(sock, &ievent->sock);                     \
522		isc_nmhandle_attach(handle, &ievent->handle);                  \
523                                                                               \
524		return (ievent);                                               \
525	}                                                                      \
526                                                                               \
527	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
528					  isc__netievent_##type##_t *ievent) { \
529		isc__nmsocket_detach(&ievent->sock);                           \
530		isc_nmhandle_detach(&ievent->handle);                          \
531		isc__nm_put_netievent(nm, ievent);                             \
532	}
533
534typedef struct isc__netievent__socket_quota {
535	NETIEVENT__SOCKET;
536	isc_quota_t *quota;
537} isc__netievent__socket_quota_t;
538
539#define NETIEVENT_SOCKET_QUOTA_TYPE(type) \
540	typedef isc__netievent__socket_quota_t isc__netievent_##type##_t
541
542#define NETIEVENT_SOCKET_QUOTA_DECL(type)                                \
543	isc__netievent_##type##_t *isc__nm_get_netievent_##type(         \
544		isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota); \
545	void isc__nm_put_netievent_##type(isc_nm_t *nm,                  \
546					  isc__netievent_##type##_t *ievent)
547
548#define NETIEVENT_SOCKET_QUOTA_DEF(type)                                       \
549	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
550		isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota) {      \
551		isc__netievent_##type##_t *ievent =                            \
552			isc__nm_get_netievent(nm, netievent_##type);           \
553		isc__nmsocket_attach(sock, &ievent->sock);                     \
554		ievent->quota = quota;                                         \
555                                                                               \
556		return (ievent);                                               \
557	}                                                                      \
558                                                                               \
559	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
560					  isc__netievent_##type##_t *ievent) { \
561		isc__nmsocket_detach(&ievent->sock);                           \
562		isc__nm_put_netievent(nm, ievent);                             \
563	}
564
565typedef struct isc__netievent__task {
566	isc__netievent_type type;
567	ISC_LINK(isc__netievent_t) link;
568	isc_task_t *task;
569} isc__netievent__task_t;
570
571#define NETIEVENT_TASK_TYPE(type) \
572	typedef isc__netievent__task_t isc__netievent_##type##_t;
573
574#define NETIEVENT_TASK_DECL(type)                                \
575	isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
576		isc_nm_t *nm, isc_task_t *task);                 \
577	void isc__nm_put_netievent_##type(isc_nm_t *nm,          \
578					  isc__netievent_##type##_t *ievent);
579
580#define NETIEVENT_TASK_DEF(type)                                               \
581	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
582		isc_nm_t *nm, isc_task_t *task) {                              \
583		isc__netievent_##type##_t *ievent =                            \
584			isc__nm_get_netievent(nm, netievent_##type);           \
585		ievent->task = task;                                           \
586                                                                               \
587		return (ievent);                                               \
588	}                                                                      \
589                                                                               \
590	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
591					  isc__netievent_##type##_t *ievent) { \
592		ievent->task = NULL;                                           \
593		isc__nm_put_netievent(nm, ievent);                             \
594	}
595
596typedef struct isc__netievent_udpsend {
597	NETIEVENT__SOCKET;
598	isc_sockaddr_t peer;
599	isc__nm_uvreq_t *req;
600} isc__netievent_udpsend_t;
601
602struct isc__netievent {
603	isc__netievent_type type;
604	ISC_LINK(isc__netievent_t) link;
605};
606
607#define NETIEVENT_TYPE(type) typedef isc__netievent_t isc__netievent_##type##_t
608
609#define NETIEVENT_DECL(type)                                                   \
610	isc__netievent_##type##_t *isc__nm_get_netievent_##type(isc_nm_t *nm); \
611	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
612					  isc__netievent_##type##_t *ievent)
613
614#define NETIEVENT_DEF(type)                                                    \
615	isc__netievent_##type##_t *isc__nm_get_netievent_##type(               \
616		isc_nm_t *nm) {                                                \
617		isc__netievent_##type##_t *ievent =                            \
618			isc__nm_get_netievent(nm, netievent_##type);           \
619                                                                               \
620		return (ievent);                                               \
621	}                                                                      \
622                                                                               \
623	void isc__nm_put_netievent_##type(isc_nm_t *nm,                        \
624					  isc__netievent_##type##_t *ievent) { \
625		isc__nm_put_netievent(nm, ievent);                             \
626	}
627
628typedef union {
629	isc__netievent_t ni;
630	isc__netievent__socket_t nis;
631	isc__netievent__socket_req_t nisr;
632	isc__netievent_udpsend_t nius;
633	isc__netievent__socket_quota_t nisq;
634} isc__netievent_storage_t;
635
636/*
637 * Work item for a uv_work threadpool.
638 */
639typedef struct isc__nm_work {
640	isc_nm_t *netmgr;
641	uv_work_t req;
642	isc_nm_workcb_t cb;
643	isc_nm_after_workcb_t after_cb;
644	void *data;
645} isc__nm_work_t;
646
647/*
648 * Network manager
649 */
650#define NM_MAGIC    ISC_MAGIC('N', 'E', 'T', 'M')
651#define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC)
652
653struct isc_nm {
654	int magic;
655	isc_refcount_t references;
656	isc_mem_t *mctx;
657	int nworkers;
658	int nlisteners;
659	isc_mutex_t lock;
660	isc_condition_t wkstatecond;
661	isc_condition_t wkpausecond;
662	isc__networker_t *workers;
663
664	isc_stats_t *stats;
665
666	uint_fast32_t workers_running;
667	atomic_uint_fast32_t workers_paused;
668	atomic_uint_fast32_t maxudp;
669
670	bool load_balance_sockets;
671
672	atomic_bool paused;
673
674	/*
675	 * Active connections are being closed and new connections are
676	 * no longer allowed.
677	 */
678	atomic_bool closing;
679
680	/*
681	 * A worker is actively waiting for other workers, for example to
682	 * stop listening; that means no other thread can do the same thing
683	 * or pause, or we'll deadlock. We have to either re-enqueue our
684	 * event or wait for the other one to finish if we want to pause.
685	 */
686	atomic_int interlocked;
687
688	/*
689	 * Timeout values for TCP connections, corresponding to
690	 * tcp-intiial-timeout, tcp-idle-timeout, tcp-keepalive-timeout,
691	 * and tcp-advertised-timeout. Note that these are stored in
692	 * milliseconds so they can be used directly with the libuv timer,
693	 * but they are configured in tenths of seconds.
694	 */
695	atomic_uint_fast32_t init;
696	atomic_uint_fast32_t idle;
697	atomic_uint_fast32_t keepalive;
698	atomic_uint_fast32_t advertised;
699
700	isc_barrier_t pausing;
701	isc_barrier_t resuming;
702
703#ifdef NETMGR_TRACE
704	ISC_LIST(isc_nmsocket_t) active_sockets;
705#endif
706};
707
708typedef enum isc_nmsocket_type {
709	isc_nm_udpsocket,
710	isc_nm_udplistener, /* Aggregate of nm_udpsocks */
711	isc_nm_tcpsocket,
712	isc_nm_tcplistener,
713	isc_nm_tcpdnslistener,
714	isc_nm_tcpdnssocket,
715} isc_nmsocket_type;
716
717/*%
718 * A universal structure for either a single socket or a group of
719 * dup'd/SO_REUSE_PORT-using sockets listening on the same interface.
720 */
721#define NMSOCK_MAGIC	ISC_MAGIC('N', 'M', 'S', 'K')
722#define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC)
723
724/*%
725 * Index into socket stat counter arrays.
726 */
727enum {
728	STATID_OPEN = 0,
729	STATID_OPENFAIL = 1,
730	STATID_CLOSE = 2,
731	STATID_BINDFAIL = 3,
732	STATID_CONNECTFAIL = 4,
733	STATID_CONNECT = 5,
734	STATID_ACCEPTFAIL = 6,
735	STATID_ACCEPT = 7,
736	STATID_SENDFAIL = 8,
737	STATID_RECVFAIL = 9,
738	STATID_ACTIVE = 10
739};
740
741typedef void (*isc_nm_closehandlecb_t)(void *arg);
742/*%<
743 * Opaque callback function, used for isc_nmhandle 'reset' and 'free'
744 * callbacks.
745 */
746
747struct isc_nmsocket {
748	/*% Unlocked, RO */
749	int magic;
750	int tid;
751	isc_nmsocket_type type;
752	isc_nm_t *mgr;
753
754	/*% Parent socket for multithreaded listeners */
755	isc_nmsocket_t *parent;
756	/*% Listener socket this connection was accepted on */
757	isc_nmsocket_t *listener;
758	/*% Self socket */
759	isc_nmsocket_t *self;
760
761	isc_barrier_t startlistening;
762	isc_barrier_t stoplistening;
763
764	/*%
765	 * quota is the TCP client, attached when a TCP connection
766	 * is established. pquota is a non-attached pointer to the
767	 * TCP client quota, stored in listening sockets but only
768	 * attached in connected sockets.
769	 */
770	isc_quota_t *quota;
771	isc_quota_t *pquota;
772	isc_quota_cb_t quotacb;
773
774	/*%
775	 * Socket statistics
776	 */
777	const isc_statscounter_t *statsindex;
778
779	/*%
780	 * TCP read/connect timeout timers.
781	 */
782	uv_timer_t read_timer;
783	uint64_t read_timeout;
784	uint64_t connect_timeout;
785
786	/*%
787	 * TCP write timeout timer.
788	 */
789	uint64_t write_timeout;
790
791	/*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */
792	isc_nmsocket_t *outer;
793
794	/*% server socket for connections */
795	isc_nmsocket_t *server;
796
797	/*% Child sockets for multi-socket setups */
798	isc_nmsocket_t *children;
799	uint_fast32_t nchildren;
800	isc_sockaddr_t iface;
801	isc_nmhandle_t *statichandle;
802	isc_nmhandle_t *outerhandle;
803
804	/*% Extra data allocated at the end of each isc_nmhandle_t */
805	size_t extrahandlesize;
806
807	/*% TCP backlog */
808	int backlog;
809
810	/*% libuv data */
811	uv_os_sock_t fd;
812	union uv_any_handle uv_handle;
813
814	/*% Peer address */
815	isc_sockaddr_t peer;
816
817	/* Atomic */
818	/*% Number of running (e.g. listening) child sockets */
819	atomic_uint_fast32_t rchildren;
820
821	/*%
822	 * Socket is active if it's listening, working, etc. If it's
823	 * closing, then it doesn't make a sense, for example, to
824	 * push handles or reqs for reuse.
825	 */
826	atomic_bool active;
827	atomic_bool destroying;
828
829	/*%
830	 * Socket is closed if it's not active and all the possible
831	 * callbacks were fired, there are no active handles, etc.
832	 * If active==false but closed==false, that means the socket
833	 * is closing.
834	 */
835	atomic_bool closing;
836	atomic_bool closed;
837	atomic_bool listening;
838	atomic_bool connecting;
839	atomic_bool connected;
840	bool accepting;
841	bool reading;
842	atomic_bool timedout;
843	isc_refcount_t references;
844
845	/*%
846	 * Established an outgoing connection, as client not server.
847	 */
848	atomic_bool client;
849
850	/*%
851	 * TCPDNS socket has been set not to pipeline.
852	 */
853	atomic_bool sequential;
854
855	/*%
856	 * The socket is processing read callback, this is guard to not read
857	 * data before the readcb is back.
858	 */
859	bool processing;
860
861	/*%
862	 * A TCP socket has had isc_nm_pauseread() called.
863	 */
864	atomic_bool readpaused;
865
866	/*%
867	 * A TCP or TCPDNS socket has been set to use the keepalive
868	 * timeout instead of the default idle timeout.
869	 */
870	atomic_bool keepalive;
871
872	/*%
873	 * 'spare' handles for that can be reused to avoid allocations,
874	 * for UDP.
875	 */
876	isc_astack_t *inactivehandles;
877	isc_astack_t *inactivereqs;
878
879	/*%
880	 * Used to wait for TCP listening events to complete, and
881	 * for the number of running children to reach zero during
882	 * shutdown.
883	 *
884	 * We use two condition variables to prevent the race where the netmgr
885	 * threads would be able to finish and destroy the socket before it's
886	 * unlocked by the isc_nm_listen<proto>() function.  So, the flow is as
887	 * follows:
888	 *
889	 *   1. parent thread creates all children sockets and passes then to
890	 *      netthreads, looks at the signaling variable and WAIT(cond) until
891	 *      the childrens are done initializing
892	 *
893	 *   2. the events get picked by netthreads, calls the libuv API (and
894	 *      either succeeds or fails) and WAIT(scond) until all other
895	 *      children sockets in netthreads are initialized and the listening
896	 *      socket lock is unlocked
897	 *
898	 *   3. the control is given back to the parent thread which now either
899	 *      returns success or shutdowns the listener if an error has
900	 *      occured in the children netthread
901	 *
902	 * NOTE: The other approach would be doing an extra attach to the parent
903	 * listening socket, and then detach it in the parent thread, but that
904	 * breaks the promise that once the libuv socket is initialized on the
905	 * nmsocket, the nmsocket needs to be handled only by matching
906	 * netthread, so in fact that would add a complexity in a way that
907	 * isc__nmsocket_detach would have to be converted to use an
908	 * asynchrounous netievent.
909	 */
910	isc_mutex_t lock;
911	isc_condition_t cond;
912	isc_condition_t scond;
913
914	/*%
915	 * Used to pass a result back from listen or connect events.
916	 */
917	isc_result_t result;
918
919	/*%
920	 * Current number of active handles.
921	 */
922	atomic_int_fast32_t ah;
923
924	/*% Buffer for TCPDNS processing */
925	size_t buf_size;
926	size_t buf_len;
927	unsigned char *buf;
928
929	/*%
930	 * This function will be called with handle->sock
931	 * as the argument whenever a handle's references drop
932	 * to zero, after its reset callback has been called.
933	 */
934	isc_nm_closehandlecb_t closehandle_cb;
935
936	isc_nmhandle_t *recv_handle;
937	isc_nm_recv_cb_t recv_cb;
938	void *recv_cbarg;
939	bool recv_read;
940
941	isc_nm_cb_t connect_cb;
942	void *connect_cbarg;
943
944	isc_nm_accept_cb_t accept_cb;
945	void *accept_cbarg;
946
947	atomic_int_fast32_t active_child_connections;
948
949#ifdef NETMGR_TRACE
950	void *backtrace[TRACE_SIZE];
951	int backtrace_size;
952	LINK(isc_nmsocket_t) active_link;
953	ISC_LIST(isc_nmhandle_t) active_handles;
954#endif
955};
956
957bool
958isc__nm_in_netthread(void);
959/*%
960 * Returns 'true' if we're in the network thread.
961 */
962
963void
964isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
965/*%<
966 * If the caller is already in the matching nmthread, process the netievent
967 * directly, if not enqueue using isc__nm_enqueue_ievent().
968 */
969
970void
971isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
972/*%<
973 * Enqueue an ievent onto a specific worker queue. (This the only safe
974 * way to use an isc__networker_t from another thread.)
975 */
976
977void
978isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf);
979/*%<
980 * Free a buffer allocated for a receive operation.
981 *
982 * Note that as currently implemented, this doesn't actually
983 * free anything, marks the isc__networker's UDP receive buffer
984 * as "not in use".
985 */
986
987isc_nmhandle_t *
988isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
989		   isc_sockaddr_t *local FLARG);
990/*%<
991 * Get a handle for the socket 'sock', allocating a new one
992 * if there isn't one available in 'sock->inactivehandles'.
993 *
994 * If 'peer' is not NULL, set the handle's peer address to 'peer',
995 * otherwise set it to 'sock->peer'.
996 *
997 * If 'local' is not NULL, set the handle's local address to 'local',
998 * otherwise set it to 'sock->iface->addr'.
999 *
1000 * 'sock' will be attached to 'handle->sock'. The caller may need
1001 * to detach the socket afterward.
1002 */
1003
1004isc__nm_uvreq_t *
1005isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG);
1006/*%<
1007 * Get a UV request structure for the socket 'sock', allocating a
1008 * new one if there isn't one available in 'sock->inactivereqs'.
1009 */
1010
1011void
1012isc___nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock FLARG);
1013/*%<
1014 * Completes the use of a UV request structure, setting '*req' to NULL.
1015 *
1016 * The UV request is pushed onto the 'sock->inactivereqs' stack or,
1017 * if that doesn't work, freed.
1018 */
1019
1020void
1021isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
1022		    isc_sockaddr_t *iface FLARG);
1023/*%<
1024 * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type'
1025 * and its interface to 'iface'.
1026 */
1027
1028void
1029isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG);
1030/*%<
1031 * Attach to a socket, increasing refcount
1032 */
1033
1034void
1035isc___nmsocket_detach(isc_nmsocket_t **socketp FLARG);
1036/*%<
1037 * Detach from socket, decreasing refcount and possibly destroying the
1038 * socket if it's no longer referenced.
1039 */
1040
1041void
1042isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG);
1043/*%<
1044 * Market 'sock' as inactive, close it if necessary, and destroy it
1045 * if there are no remaining references or active handles.
1046 */
1047
1048void
1049isc__nmsocket_shutdown(isc_nmsocket_t *sock);
1050/*%<
1051 * Initiate the socket shutdown which actively calls the active
1052 * callbacks.
1053 */
1054
1055bool
1056isc__nmsocket_active(isc_nmsocket_t *sock);
1057/*%<
1058 * Determine whether 'sock' is active by checking 'sock->active'
1059 * or, for child sockets, 'sock->parent->active'.
1060 */
1061
1062bool
1063isc__nmsocket_deactivate(isc_nmsocket_t *sock);
1064/*%<
1065 * @brief Deactivate active socket
1066 *
1067 * Atomically deactive the socket by setting @p sock->active or, for child
1068 * sockets, @p sock->parent->active to @c false
1069 *
1070 * @param[in] sock - valid nmsocket
1071 * @return @c false if the socket was already inactive, @c true otherwise
1072 */
1073
1074void
1075isc__nmsocket_clearcb(isc_nmsocket_t *sock);
1076/*%<
1077 * Clear the recv and accept callbacks in 'sock'.
1078 */
1079
1080void
1081isc__nmsocket_timer_stop(isc_nmsocket_t *sock);
1082void
1083isc__nmsocket_timer_start(isc_nmsocket_t *sock);
1084void
1085isc__nmsocket_timer_restart(isc_nmsocket_t *sock);
1086bool
1087isc__nmsocket_timer_running(isc_nmsocket_t *sock);
1088/*%<
1089 * Start/stop/restart/check the timeout on the socket
1090 */
1091
1092void
1093isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1094		  isc_result_t eresult, bool async);
1095
1096void
1097isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0);
1098/*%<
1099 * Issue a connect callback on the socket, used to call the callback
1100 */
1101
1102void
1103isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1104	       isc_result_t eresult);
1105void
1106isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0);
1107
1108/*%<
1109 * Issue a read callback on the socket, used to call the callback
1110 * on failed conditions when the event can't be scheduled on the uv loop.
1111 *
1112 */
1113
1114void
1115isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
1116	       isc_result_t eresult, bool async);
1117void
1118isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0);
1119/*%<
1120 * Issue a write callback on the socket, used to call the callback
1121 * on failed conditions when the event can't be scheduled on the uv loop.
1122 */
1123
1124void
1125isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0);
1126/*%<
1127 * Walk through all uv handles, get the underlying sockets and issue
1128 * close on them.
1129 */
1130
1131void
1132isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region,
1133		 isc_nm_cb_t cb, void *cbarg);
1134/*%<
1135 * Back-end implementation of isc_nm_send() for UDP handles.
1136 */
1137
1138void
1139isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1140/*
1141 * Back-end implementation of isc_nm_read() for UDP handles.
1142 */
1143
1144void
1145isc__nm_udp_close(isc_nmsocket_t *sock);
1146/*%<
1147 * Close a UDP socket.
1148 */
1149
1150void
1151isc__nm_udp_cancelread(isc_nmhandle_t *handle);
1152/*%<
1153 * Stop reading on a connected UDP handle.
1154 */
1155
1156void
1157isc__nm_udp_shutdown(isc_nmsocket_t *sock);
1158/*%<
1159 * Called during the shutdown process to close and clean up connected
1160 * sockets.
1161 */
1162
1163void
1164isc__nm_udp_stoplistening(isc_nmsocket_t *sock);
1165/*%<
1166 * Stop listening on 'sock'.
1167 */
1168
1169void
1170isc__nm_udp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1171/*%<
1172 * Set or clear the recv timeout for the UDP socket associated with 'handle'.
1173 */
1174
1175void
1176isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0);
1177void
1178isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1179void
1180isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0);
1181void
1182isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0);
1183void
1184isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0);
1185void
1186isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
1187void
1188isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0);
1189/*%<
1190 * Callback handlers for asynchronous UDP events (listen, stoplisten, send).
1191 */
1192
1193void
1194isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
1195		 isc_nm_cb_t cb, void *cbarg);
1196/*%<
1197 * Back-end implementation of isc_nm_send() for TCP handles.
1198 */
1199
1200void
1201isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1202/*
1203 * Back-end implementation of isc_nm_read() for TCP handles.
1204 */
1205
1206void
1207isc__nm_tcp_close(isc_nmsocket_t *sock);
1208/*%<
1209 * Close a TCP socket.
1210 */
1211void
1212isc__nm_tcp_pauseread(isc_nmhandle_t *handle);
1213/*%<
1214 * Pause reading on this handle, while still remembering the callback.
1215 */
1216
1217void
1218isc__nm_tcp_resumeread(isc_nmhandle_t *handle);
1219/*%<
1220 * Resume reading from socket.
1221 *
1222 */
1223
1224void
1225isc__nm_tcp_shutdown(isc_nmsocket_t *sock);
1226/*%<
1227 * Called during the shutdown process to close and clean up connected
1228 * sockets.
1229 */
1230
1231void
1232isc__nm_tcp_cancelread(isc_nmhandle_t *handle);
1233/*%<
1234 * Stop reading on a connected TCP handle.
1235 */
1236
1237void
1238isc__nm_tcp_stoplistening(isc_nmsocket_t *sock);
1239/*%<
1240 * Stop listening on 'sock'.
1241 */
1242
1243int_fast32_t
1244isc__nm_tcp_listener_nactive(isc_nmsocket_t *sock);
1245/*%<
1246 * Returns the number of active connections for the TCP listener socket.
1247 */
1248
1249void
1250isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1251/*%<
1252 * Set the read timeout for the TCP socket associated with 'handle'.
1253 */
1254
1255void
1256isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1257void
1258isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0);
1259void
1260isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1261void
1262isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0);
1263void
1264isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0);
1265void
1266isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ev0);
1267void
1268isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ev0);
1269void
1270isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0);
1271void
1272isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0);
1273void
1274isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
1275void
1276isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0);
1277/*%<
1278 * Callback handlers for asynchronous TCP events (connect, listen,
1279 * stoplisten, send, read, pause, close).
1280 */
1281
1282void
1283isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1284void
1285isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1286void
1287isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
1288
1289void
1290isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
1291		    isc_nm_cb_t cb, void *cbarg);
1292/*%<
1293 * Back-end implementation of isc_nm_send() for TCPDNS handles.
1294 */
1295
1296void
1297isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock);
1298
1299void
1300isc__nm_tcpdns_close(isc_nmsocket_t *sock);
1301/*%<
1302 * Close a TCPDNS socket.
1303 */
1304
1305void
1306isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock);
1307/*%<
1308 * Stop listening on 'sock'.
1309 */
1310
1311void
1312isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
1313/*%<
1314 * Set the read timeout and reset the timer for the TCPDNS socket
1315 * associated with 'handle', and the TCP socket it wraps around.
1316 */
1317
1318void
1319isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
1320void
1321isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0);
1322void
1323isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
1324void
1325isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0);
1326void
1327isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0);
1328void
1329isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0);
1330void
1331isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0);
1332void
1333isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0);
1334/*%<
1335 * Callback handlers for asynchronous TCPDNS events.
1336 */
1337
1338void
1339isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
1340/*
1341 * Back-end implementation of isc_nm_read() for TCPDNS handles.
1342 */
1343
1344void
1345isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle);
1346/*%<
1347 * Stop reading on a connected TCPDNS handle.
1348 */
1349
1350#define isc__nm_uverr2result(x) \
1351	isc___nm_uverr2result(x, true, __FILE__, __LINE__, __func__)
1352isc_result_t
1353isc___nm_uverr2result(int uverr, bool dolog, const char *file,
1354		      unsigned int line, const char *func);
1355/*%<
1356 * Convert a libuv error value into an isc_result_t.  The
1357 * list of supported error values is not complete; new users
1358 * of this function should add any expected errors that are
1359 * not already there.
1360 */
1361
1362bool
1363isc__nm_acquire_interlocked(isc_nm_t *mgr);
1364/*%<
1365 * Try to acquire interlocked state; return true if successful.
1366 */
1367
1368void
1369isc__nm_drop_interlocked(isc_nm_t *mgr);
1370/*%<
1371 * Drop interlocked state; signal waiters.
1372 */
1373
1374void
1375isc__nm_acquire_interlocked_force(isc_nm_t *mgr);
1376/*%<
1377 * Actively wait for interlocked state.
1378 */
1379
1380void
1381isc__nm_incstats(isc_nm_t *mgr, isc_statscounter_t counterid);
1382/*%<
1383 * Increment socket-related statistics counters.
1384 */
1385
1386void
1387isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid);
1388/*%<
1389 * Decrement socket-related statistics counters.
1390 */
1391
1392isc_result_t
1393isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp);
1394/*%<
1395 * Platform independent socket() version
1396 */
1397
1398void
1399isc__nm_closesocket(uv_os_sock_t sock);
1400/*%<
1401 * Platform independent closesocket() version
1402 */
1403
1404isc_result_t
1405isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family);
1406/*%<
1407 * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle
1408 */
1409
1410isc_result_t
1411isc__nm_socket_reuse(uv_os_sock_t fd);
1412/*%<
1413 * Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd
1414 */
1415
1416isc_result_t
1417isc__nm_socket_reuse_lb(uv_os_sock_t fd);
1418/*%<
1419 * Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd
1420 */
1421
1422isc_result_t
1423isc__nm_socket_incoming_cpu(uv_os_sock_t fd);
1424/*%<
1425 * Set the SO_INCOMING_CPU socket option on the fd if available
1426 */
1427
1428isc_result_t
1429isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family);
1430/*%<
1431 * Disable the Path MTU Discovery, either by disabling IP(V6)_DONTFRAG socket
1432 * option, or setting the IP(V6)_MTU_DISCOVER socket option to IP_PMTUDISC_OMIT
1433 */
1434
1435isc_result_t
1436isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms);
1437/*%<
1438 * Set the connection timeout in milliseconds, on non-Linux platforms,
1439 * the minimum value must be at least 1000 (1 second).
1440 */
1441
1442isc_result_t
1443isc__nm_socket_tcp_nodelay(uv_os_sock_t fd);
1444/*%<
1445 * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY).
1446 */
1447
1448/*
1449 * typedef all the netievent types
1450 */
1451
1452NETIEVENT_SOCKET_TYPE(close);
1453NETIEVENT_SOCKET_TYPE(tcpclose);
1454NETIEVENT_SOCKET_TYPE(tcplisten);
1455NETIEVENT_SOCKET_TYPE(tcppauseread);
1456NETIEVENT_SOCKET_TYPE(tcpstop);
1457NETIEVENT_SOCKET_TYPE(udpclose);
1458NETIEVENT_SOCKET_TYPE(udplisten);
1459NETIEVENT_SOCKET_TYPE(udpread);
1460/* NETIEVENT_SOCKET_TYPE(udpsend); */ /* unique type, defined independently */
1461NETIEVENT_SOCKET_TYPE(udpstop);
1462
1463NETIEVENT_SOCKET_TYPE(tcpdnsclose);
1464NETIEVENT_SOCKET_TYPE(tcpdnsread);
1465NETIEVENT_SOCKET_TYPE(tcpdnsstop);
1466NETIEVENT_SOCKET_TYPE(tcpdnslisten);
1467NETIEVENT_SOCKET_REQ_TYPE(tcpdnsconnect);
1468NETIEVENT_SOCKET_REQ_TYPE(tcpdnssend);
1469NETIEVENT_SOCKET_HANDLE_TYPE(tcpdnscancel);
1470NETIEVENT_SOCKET_QUOTA_TYPE(tcpdnsaccept);
1471
1472NETIEVENT_SOCKET_REQ_TYPE(tcpconnect);
1473NETIEVENT_SOCKET_REQ_TYPE(tcpsend);
1474NETIEVENT_SOCKET_TYPE(tcpstartread);
1475NETIEVENT_SOCKET_REQ_TYPE(udpconnect);
1476
1477NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb);
1478NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb);
1479NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb);
1480
1481NETIEVENT_SOCKET_HANDLE_TYPE(detach);
1482NETIEVENT_SOCKET_HANDLE_TYPE(tcpcancel);
1483NETIEVENT_SOCKET_HANDLE_TYPE(udpcancel);
1484
1485NETIEVENT_SOCKET_QUOTA_TYPE(tcpaccept);
1486
1487NETIEVENT_TYPE(pause);
1488NETIEVENT_TYPE(resume);
1489NETIEVENT_TYPE(shutdown);
1490NETIEVENT_TYPE(stop);
1491
1492NETIEVENT_TASK_TYPE(task);
1493NETIEVENT_TASK_TYPE(privilegedtask);
1494
1495/* Now declared the helper functions */
1496
1497NETIEVENT_SOCKET_DECL(close);
1498NETIEVENT_SOCKET_DECL(tcpclose);
1499NETIEVENT_SOCKET_DECL(tcplisten);
1500NETIEVENT_SOCKET_DECL(tcppauseread);
1501NETIEVENT_SOCKET_DECL(tcpstartread);
1502NETIEVENT_SOCKET_DECL(tcpstop);
1503NETIEVENT_SOCKET_DECL(udpclose);
1504NETIEVENT_SOCKET_DECL(udplisten);
1505NETIEVENT_SOCKET_DECL(udpread);
1506NETIEVENT_SOCKET_DECL(udpsend);
1507NETIEVENT_SOCKET_DECL(udpstop);
1508
1509NETIEVENT_SOCKET_DECL(tcpdnsclose);
1510NETIEVENT_SOCKET_DECL(tcpdnsread);
1511NETIEVENT_SOCKET_DECL(tcpdnsstop);
1512NETIEVENT_SOCKET_DECL(tcpdnslisten);
1513NETIEVENT_SOCKET_REQ_DECL(tcpdnsconnect);
1514NETIEVENT_SOCKET_REQ_DECL(tcpdnssend);
1515NETIEVENT_SOCKET_HANDLE_DECL(tcpdnscancel);
1516NETIEVENT_SOCKET_QUOTA_DECL(tcpdnsaccept);
1517
1518NETIEVENT_SOCKET_REQ_DECL(tcpconnect);
1519NETIEVENT_SOCKET_REQ_DECL(tcpsend);
1520NETIEVENT_SOCKET_REQ_DECL(udpconnect);
1521
1522NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb);
1523NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb);
1524NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb);
1525
1526NETIEVENT_SOCKET_HANDLE_DECL(udpcancel);
1527NETIEVENT_SOCKET_HANDLE_DECL(tcpcancel);
1528NETIEVENT_SOCKET_DECL(detach);
1529
1530NETIEVENT_SOCKET_QUOTA_DECL(tcpaccept);
1531
1532NETIEVENT_DECL(pause);
1533NETIEVENT_DECL(resume);
1534NETIEVENT_DECL(shutdown);
1535NETIEVENT_DECL(stop);
1536
1537NETIEVENT_TASK_DECL(task);
1538NETIEVENT_TASK_DECL(privilegedtask);
1539
1540void
1541isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1542void
1543isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1544void
1545isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
1546
1547isc_result_t
1548isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock);
1549
1550isc__nm_uvreq_t *
1551isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr);
1552
1553void
1554isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf);
1555
1556void
1557isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
1558		    const struct sockaddr *addr, unsigned flags);
1559void
1560isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
1561void
1562isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
1563
1564isc_result_t
1565isc__nm_start_reading(isc_nmsocket_t *sock);
1566void
1567isc__nm_stop_reading(isc_nmsocket_t *sock);
1568isc_result_t
1569isc__nm_process_sock_buffer(isc_nmsocket_t *sock);
1570void
1571isc__nm_resume_processing(void *arg);
1572bool
1573isc__nmsocket_closing(isc_nmsocket_t *sock);
1574bool
1575isc__nm_closing(isc_nmsocket_t *sock);
1576
1577void
1578isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len);
1579
1580void
1581isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1582		       isc_result_t eresult);
1583void
1584isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult);
1585void
1586isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
1587			  isc_result_t eresult, bool async);
1588void
1589isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async);
1590
1591void
1592isc__nm_accept_connection_log(isc_result_t result, bool can_log_quota);
1593
1594/*
1595 * Timeout callbacks
1596 */
1597void
1598isc__nmsocket_connecttimeout_cb(uv_timer_t *timer);
1599void
1600isc__nmsocket_readtimeout_cb(uv_timer_t *timer);
1601void
1602isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult);
1603
1604/*%<
1605 *
1606 * Maximum number of simultaneous handles in flight supported for a single
1607 * connected TCPDNS socket. This value was chosen arbitrarily, and may be
1608 * changed in the future.
1609 */
1610#define STREAM_CLIENTS_PER_CONN 23
1611
1612#define UV_RUNTIME_CHECK(func, ret)                                           \
1613	if (ret != 0) {                                                       \
1614		isc_error_fatal(__FILE__, __LINE__, "%s failed: %s\n", #func, \
1615				uv_strerror(ret));                            \
1616	}
1617