1/*	$NetBSD: tcpdns.c,v 1.10 2024/02/21 22:52:32 christos Exp $	*/
2
3/*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16#include <libgen.h>
17#include <unistd.h>
18#include <uv.h>
19
20#include <isc/atomic.h>
21#include <isc/barrier.h>
22#include <isc/buffer.h>
23#include <isc/condition.h>
24#include <isc/errno.h>
25#include <isc/log.h>
26#include <isc/magic.h>
27#include <isc/mem.h>
28#include <isc/netmgr.h>
29#include <isc/quota.h>
30#include <isc/random.h>
31#include <isc/refcount.h>
32#include <isc/region.h>
33#include <isc/result.h>
34#include <isc/sockaddr.h>
35#include <isc/stdtime.h>
36#include <isc/thread.h>
37#include <isc/util.h>
38
39#include "netmgr-int.h"
40#include "uv-compat.h"
41
42static atomic_uint_fast32_t last_tcpdnsquota_log = 0;
43
44static bool
45can_log_tcpdns_quota(void) {
46	isc_stdtime_t now, last;
47
48	isc_stdtime_get(&now);
49	last = atomic_exchange_relaxed(&last_tcpdnsquota_log, now);
50	if (now != last) {
51		return (true);
52	}
53
54	return (false);
55}
56
57static isc_result_t
58tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
59
60static void
61tcpdns_close_direct(isc_nmsocket_t *sock);
62
63static void
64tcpdns_connect_cb(uv_connect_t *uvreq, int status);
65
66static void
67tcpdns_connection_cb(uv_stream_t *server, int status);
68
69static void
70tcpdns_close_cb(uv_handle_t *uvhandle);
71
72static isc_result_t
73accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota);
74
75static void
76quota_accept_cb(isc_quota_t *quota, void *sock0);
77
78static void
79stop_tcpdns_parent(isc_nmsocket_t *sock);
80static void
81stop_tcpdns_child(isc_nmsocket_t *sock);
82
83static isc_result_t
84tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
85	isc__networker_t *worker = NULL;
86	isc_result_t result = ISC_R_UNSET;
87	int r;
88
89	REQUIRE(VALID_NMSOCK(sock));
90	REQUIRE(VALID_UVREQ(req));
91
92	REQUIRE(isc__nm_in_netthread());
93	REQUIRE(sock->tid == isc_nm_tid());
94
95	worker = &sock->mgr->workers[sock->tid];
96
97	atomic_store(&sock->connecting, true);
98
99	r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
100	UV_RUNTIME_CHECK(uv_tcp_init, r);
101	uv_handle_set_data(&sock->uv_handle.handle, sock);
102
103	r = uv_timer_init(&worker->loop, &sock->read_timer);
104	UV_RUNTIME_CHECK(uv_timer_init, r);
105	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
106
107	if (isc__nm_closing(sock)) {
108		result = ISC_R_SHUTTINGDOWN;
109		goto error;
110	}
111
112	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
113	if (r != 0) {
114		isc__nm_closesocket(sock->fd);
115		isc__nm_incstats(sock, STATID_OPENFAIL);
116		goto done;
117	}
118	isc__nm_incstats(sock, STATID_OPEN);
119
120	if (req->local.length != 0) {
121		r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
122		/*
123		 * In case of shared socket UV_EINVAL will be returned and needs
124		 * to be ignored
125		 */
126		if (r != 0 && r != UV_EINVAL) {
127			isc__nm_incstats(sock, STATID_BINDFAIL);
128			goto done;
129		}
130	}
131
132	isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
133
134	uv_handle_set_data(&req->uv_req.handle, req);
135	r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp,
136			   &req->peer.type.sa, tcpdns_connect_cb);
137	if (r != 0) {
138		isc__nm_incstats(sock, STATID_CONNECTFAIL);
139		goto done;
140	}
141
142	uv_handle_set_data((uv_handle_t *)&sock->read_timer,
143			   &req->uv_req.connect);
144	isc__nmsocket_timer_start(sock);
145
146	atomic_store(&sock->connected, true);
147
148done:
149	result = isc__nm_uverr2result(r);
150error:
151	LOCK(&sock->lock);
152	sock->result = result;
153	SIGNAL(&sock->cond);
154	if (!atomic_load(&sock->active)) {
155		WAIT(&sock->scond, &sock->lock);
156	}
157	INSIST(atomic_load(&sock->active));
158	UNLOCK(&sock->lock);
159
160	return (result);
161}
162
163void
164isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
165	isc__netievent_tcpdnsconnect_t *ievent =
166		(isc__netievent_tcpdnsconnect_t *)ev0;
167	isc_nmsocket_t *sock = ievent->sock;
168	isc__nm_uvreq_t *req = ievent->req;
169	isc_result_t result = ISC_R_SUCCESS;
170
171	UNUSED(worker);
172
173	REQUIRE(VALID_NMSOCK(sock));
174	REQUIRE(sock->type == isc_nm_tcpdnssocket);
175	REQUIRE(sock->parent == NULL);
176	REQUIRE(sock->tid == isc_nm_tid());
177
178	result = tcpdns_connect_direct(sock, req);
179	if (result != ISC_R_SUCCESS) {
180		isc__nmsocket_clearcb(sock);
181		isc__nm_connectcb(sock, req, result, true);
182		atomic_store(&sock->active, false);
183		isc__nm_tcpdns_close(sock);
184	}
185
186	/*
187	 * The sock is now attached to the handle.
188	 */
189	isc__nmsocket_detach(&sock);
190}
191
192static void
193tcpdns_connect_cb(uv_connect_t *uvreq, int status) {
194	isc_result_t result = ISC_R_UNSET;
195	isc__nm_uvreq_t *req = NULL;
196	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
197	struct sockaddr_storage ss;
198	int r;
199
200	REQUIRE(VALID_NMSOCK(sock));
201	REQUIRE(sock->tid == isc_nm_tid());
202
203	req = uv_handle_get_data((uv_handle_t *)uvreq);
204
205	REQUIRE(VALID_UVREQ(req));
206	REQUIRE(VALID_NMHANDLE(req->handle));
207
208	if (atomic_load(&sock->timedout)) {
209		result = ISC_R_TIMEDOUT;
210		goto error;
211	} else if (isc__nm_closing(sock)) {
212		/* Network manager shutting down */
213		result = ISC_R_SHUTTINGDOWN;
214		goto error;
215	} else if (isc__nmsocket_closing(sock)) {
216		/* Connection canceled */
217		result = ISC_R_CANCELED;
218		goto error;
219	} else if (status == UV_ETIMEDOUT) {
220		/* Timeout status code here indicates hard error */
221		result = ISC_R_TIMEDOUT;
222		goto error;
223	} else if (status == UV_EADDRINUSE) {
224		/*
225		 * On FreeBSD the TCP connect() call sometimes results in a
226		 * spurious transient EADDRINUSE. Try a few more times before
227		 * giving up.
228		 */
229		if (--req->connect_tries > 0) {
230			r = uv_tcp_connect(
231				&req->uv_req.connect, &sock->uv_handle.tcp,
232				&req->peer.type.sa, tcpdns_connect_cb);
233			if (r != 0) {
234				result = isc__nm_uverr2result(r);
235				goto error;
236			}
237			return;
238		}
239		result = isc__nm_uverr2result(status);
240		goto error;
241	} else if (status != 0) {
242		result = isc__nm_uverr2result(status);
243		goto error;
244	}
245
246	isc__nmsocket_timer_stop(sock);
247	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
248
249	isc__nm_incstats(sock, STATID_CONNECT);
250	r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
251			       &(int){ sizeof(ss) });
252	if (r != 0) {
253		result = isc__nm_uverr2result(r);
254		goto error;
255	}
256
257	atomic_store(&sock->connecting, false);
258
259	result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss);
260	RUNTIME_CHECK(result == ISC_R_SUCCESS);
261
262	isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
263
264	return;
265error:
266	isc__nm_failed_connect_cb(sock, req, result, false);
267}
268
269void
270isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
271		     isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
272		     size_t extrahandlesize) {
273	isc_result_t result = ISC_R_SUCCESS;
274	isc_nmsocket_t *sock = NULL;
275	isc__netievent_tcpdnsconnect_t *ievent = NULL;
276	isc__nm_uvreq_t *req = NULL;
277	sa_family_t sa_family;
278
279	REQUIRE(VALID_NM(mgr));
280	REQUIRE(local != NULL);
281	REQUIRE(peer != NULL);
282
283	sa_family = peer->type.sa.sa_family;
284
285	sock = isc_mem_get(mgr->mctx, sizeof(*sock));
286	isc__nmsocket_init(sock, mgr, isc_nm_tcpdnssocket, local);
287
288	sock->extrahandlesize = extrahandlesize;
289	sock->connect_timeout = timeout;
290	sock->result = ISC_R_UNSET;
291	atomic_init(&sock->client, true);
292
293	req = isc__nm_uvreq_get(mgr, sock);
294	req->cb.connect = cb;
295	req->cbarg = cbarg;
296	req->peer = *peer;
297	req->local = *local;
298	req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
299
300	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock->fd);
301	if (result != ISC_R_SUCCESS) {
302		if (isc__nm_in_netthread()) {
303			sock->tid = isc_nm_tid();
304		}
305		isc__nmsocket_clearcb(sock);
306		isc__nm_connectcb(sock, req, result, true);
307		atomic_store(&sock->closed, true);
308		isc__nmsocket_detach(&sock);
309		return;
310	}
311
312	(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
313	(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
314
315	/* 2 minute timeout */
316	result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
317	RUNTIME_CHECK(result == ISC_R_SUCCESS);
318
319	ievent = isc__nm_get_netievent_tcpdnsconnect(mgr, sock, req);
320
321	if (isc__nm_in_netthread()) {
322		atomic_store(&sock->active, true);
323		sock->tid = isc_nm_tid();
324		isc__nm_async_tcpdnsconnect(&mgr->workers[sock->tid],
325					    (isc__netievent_t *)ievent);
326		isc__nm_put_netievent_tcpdnsconnect(mgr, ievent);
327	} else {
328		atomic_init(&sock->active, false);
329		sock->tid = isc_random_uniform(mgr->nlisteners);
330		isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
331				       (isc__netievent_t *)ievent);
332	}
333
334	LOCK(&sock->lock);
335	while (sock->result == ISC_R_UNSET) {
336		WAIT(&sock->cond, &sock->lock);
337	}
338	atomic_store(&sock->active, true);
339	BROADCAST(&sock->scond);
340	UNLOCK(&sock->lock);
341}
342
343static uv_os_sock_t
344isc__nm_tcpdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) {
345	isc_result_t result;
346	uv_os_sock_t sock;
347
348	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
349	RUNTIME_CHECK(result == ISC_R_SUCCESS);
350
351	(void)isc__nm_socket_incoming_cpu(sock);
352	(void)isc__nm_socket_v6only(sock, sa_family);
353
354	/* FIXME: set mss */
355
356	result = isc__nm_socket_reuse(sock);
357	RUNTIME_CHECK(result == ISC_R_SUCCESS);
358
359	if (mgr->load_balance_sockets) {
360		result = isc__nm_socket_reuse_lb(sock);
361		RUNTIME_CHECK(result == ISC_R_SUCCESS);
362	}
363
364	return (sock);
365}
366
367static void
368enqueue_stoplistening(isc_nmsocket_t *sock) {
369	isc__netievent_tcpdnsstop_t *ievent =
370		isc__nm_get_netievent_tcpdnsstop(sock->mgr, sock);
371	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
372			       (isc__netievent_t *)ievent);
373}
374
375static void
376start_tcpdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
377		   uv_os_sock_t fd, int tid) {
378	isc__netievent_tcpdnslisten_t *ievent = NULL;
379	isc_nmsocket_t *csock = &sock->children[tid];
380
381	isc__nmsocket_init(csock, mgr, isc_nm_tcpdnssocket, iface);
382	csock->parent = sock;
383	csock->accept_cb = sock->accept_cb;
384	csock->accept_cbarg = sock->accept_cbarg;
385	csock->recv_cb = sock->recv_cb;
386	csock->recv_cbarg = sock->recv_cbarg;
387	csock->extrahandlesize = sock->extrahandlesize;
388	csock->backlog = sock->backlog;
389	csock->tid = tid;
390	/*
391	 * We don't attach to quota, just assign - to avoid
392	 * increasing quota unnecessarily.
393	 */
394	csock->pquota = sock->pquota;
395	isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock);
396
397	if (mgr->load_balance_sockets) {
398		UNUSED(fd);
399		csock->fd = isc__nm_tcpdns_lb_socket(mgr,
400						     iface->type.sa.sa_family);
401	} else {
402		csock->fd = dup(fd);
403	}
404	REQUIRE(csock->fd >= 0);
405
406	ievent = isc__nm_get_netievent_tcpdnslisten(mgr, csock);
407	isc__nm_maybe_enqueue_ievent(&mgr->workers[tid],
408				     (isc__netievent_t *)ievent);
409}
410isc_result_t
411isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface,
412		    isc_nm_recv_cb_t recv_cb, void *recv_cbarg,
413		    isc_nm_accept_cb_t accept_cb, void *accept_cbarg,
414		    size_t extrahandlesize, int backlog, isc_quota_t *quota,
415		    isc_nmsocket_t **sockp) {
416	isc_result_t result = ISC_R_SUCCESS;
417	isc_nmsocket_t *sock = NULL;
418	size_t children_size = 0;
419	uv_os_sock_t fd = -1;
420
421	REQUIRE(VALID_NM(mgr));
422
423	sock = isc_mem_get(mgr->mctx, sizeof(*sock));
424	isc__nmsocket_init(sock, mgr, isc_nm_tcpdnslistener, iface);
425
426	atomic_init(&sock->rchildren, 0);
427	sock->nchildren = mgr->nlisteners;
428	children_size = sock->nchildren * sizeof(sock->children[0]);
429	sock->children = isc_mem_get(mgr->mctx, children_size);
430	memset(sock->children, 0, children_size);
431
432	sock->result = ISC_R_UNSET;
433	sock->accept_cb = accept_cb;
434	sock->accept_cbarg = accept_cbarg;
435	sock->recv_cb = recv_cb;
436	sock->recv_cbarg = recv_cbarg;
437	sock->extrahandlesize = extrahandlesize;
438	sock->backlog = backlog;
439	sock->pquota = quota;
440
441	sock->tid = 0;
442	sock->fd = -1;
443
444	if (!mgr->load_balance_sockets) {
445		fd = isc__nm_tcpdns_lb_socket(mgr, iface->type.sa.sa_family);
446	}
447
448	isc_barrier_init(&sock->startlistening, sock->nchildren);
449
450	for (size_t i = 0; i < sock->nchildren; i++) {
451		if ((int)i == isc_nm_tid()) {
452			continue;
453		}
454		start_tcpdns_child(mgr, iface, sock, fd, i);
455	}
456
457	if (isc__nm_in_netthread()) {
458		start_tcpdns_child(mgr, iface, sock, fd, isc_nm_tid());
459	}
460
461	if (!mgr->load_balance_sockets) {
462		isc__nm_closesocket(fd);
463	}
464
465	LOCK(&sock->lock);
466	while (atomic_load(&sock->rchildren) != sock->nchildren) {
467		WAIT(&sock->cond, &sock->lock);
468	}
469	result = sock->result;
470	atomic_store(&sock->active, true);
471	UNLOCK(&sock->lock);
472
473	INSIST(result != ISC_R_UNSET);
474
475	if (result == ISC_R_SUCCESS) {
476		REQUIRE(atomic_load(&sock->rchildren) == sock->nchildren);
477		*sockp = sock;
478	} else {
479		atomic_store(&sock->active, false);
480		enqueue_stoplistening(sock);
481		isc_nmsocket_close(&sock);
482	}
483
484	return (result);
485}
486
487void
488isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) {
489	isc__netievent_tcpdnslisten_t *ievent =
490		(isc__netievent_tcpdnslisten_t *)ev0;
491	sa_family_t sa_family;
492	int r;
493	int flags = 0;
494	isc_nmsocket_t *sock = NULL;
495	isc_result_t result = ISC_R_UNSET;
496	isc_nm_t *mgr = NULL;
497
498	REQUIRE(VALID_NMSOCK(ievent->sock));
499	REQUIRE(ievent->sock->tid == isc_nm_tid());
500	REQUIRE(VALID_NMSOCK(ievent->sock->parent));
501
502	sock = ievent->sock;
503	sa_family = sock->iface.type.sa.sa_family;
504	mgr = sock->mgr;
505
506	REQUIRE(sock->type == isc_nm_tcpdnssocket);
507	REQUIRE(sock->parent != NULL);
508	REQUIRE(sock->tid == isc_nm_tid());
509
510	(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
511	(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
512
513	r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
514	UV_RUNTIME_CHECK(uv_tcp_init, r);
515	uv_handle_set_data(&sock->uv_handle.handle, sock);
516	/* This keeps the socket alive after everything else is gone */
517	isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
518
519	r = uv_timer_init(&worker->loop, &sock->read_timer);
520	UV_RUNTIME_CHECK(uv_timer_init, r);
521	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
522
523	LOCK(&sock->parent->lock);
524
525	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
526	if (r < 0) {
527		isc__nm_closesocket(sock->fd);
528		isc__nm_incstats(sock, STATID_OPENFAIL);
529		goto done;
530	}
531	isc__nm_incstats(sock, STATID_OPEN);
532
533	if (sa_family == AF_INET6) {
534		flags = UV_TCP_IPV6ONLY;
535	}
536
537	if (mgr->load_balance_sockets) {
538		r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
539					&sock->iface.type.sa, flags);
540		if (r < 0) {
541			isc__nm_incstats(sock, STATID_BINDFAIL);
542			goto done;
543		}
544	} else {
545		if (sock->parent->fd == -1) {
546			r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
547						&sock->iface.type.sa, flags);
548			if (r < 0) {
549				isc__nm_incstats(sock, STATID_BINDFAIL);
550				goto done;
551			}
552			sock->parent->uv_handle.tcp.flags =
553				sock->uv_handle.tcp.flags;
554			sock->parent->fd = sock->fd;
555		} else {
556			/* The socket is already bound, just copy the flags */
557			sock->uv_handle.tcp.flags =
558				sock->parent->uv_handle.tcp.flags;
559		}
560	}
561
562	isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
563
564	/*
565	 * The callback will run in the same thread uv_listen() was called
566	 * from, so a race with tcpdns_connection_cb() isn't possible.
567	 */
568	r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
569		      tcpdns_connection_cb);
570	if (r != 0) {
571		isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
572			      ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
573			      "uv_listen failed: %s",
574			      isc_result_totext(isc__nm_uverr2result(r)));
575		isc__nm_incstats(sock, STATID_BINDFAIL);
576		goto done;
577	}
578
579	atomic_store(&sock->listening, true);
580
581done:
582	result = isc__nm_uverr2result(r);
583	if (result != ISC_R_SUCCESS) {
584		sock->pquota = NULL;
585	}
586
587	atomic_fetch_add(&sock->parent->rchildren, 1);
588	if (sock->parent->result == ISC_R_UNSET) {
589		sock->parent->result = result;
590	}
591	SIGNAL(&sock->parent->cond);
592	UNLOCK(&sock->parent->lock);
593
594	isc_barrier_wait(&sock->parent->startlistening);
595}
596
597static void
598tcpdns_connection_cb(uv_stream_t *server, int status) {
599	isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server);
600	isc_result_t result;
601	isc_quota_t *quota = NULL;
602
603	if (status != 0) {
604		result = isc__nm_uverr2result(status);
605		goto done;
606	}
607
608	REQUIRE(VALID_NMSOCK(ssock));
609	REQUIRE(ssock->tid == isc_nm_tid());
610
611	if (isc__nmsocket_closing(ssock)) {
612		result = ISC_R_CANCELED;
613		goto done;
614	}
615
616	if (ssock->pquota != NULL) {
617		result = isc_quota_attach_cb(ssock->pquota, &quota,
618					     &ssock->quotacb);
619		if (result == ISC_R_QUOTA) {
620			isc__nm_incstats(ssock, STATID_ACCEPTFAIL);
621			goto done;
622		}
623	}
624
625	result = accept_connection(ssock, quota);
626done:
627	isc__nm_accept_connection_log(result, can_log_tcpdns_quota());
628}
629
630void
631isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock) {
632	REQUIRE(VALID_NMSOCK(sock));
633	REQUIRE(sock->type == isc_nm_tcpdnslistener);
634
635	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
636					    true))
637	{
638		UNREACHABLE();
639	}
640
641	if (!isc__nm_in_netthread()) {
642		enqueue_stoplistening(sock);
643	} else {
644		stop_tcpdns_parent(sock);
645	}
646}
647
648void
649isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0) {
650	isc__netievent_tcpdnsstop_t *ievent =
651		(isc__netievent_tcpdnsstop_t *)ev0;
652	isc_nmsocket_t *sock = ievent->sock;
653
654	UNUSED(worker);
655
656	REQUIRE(VALID_NMSOCK(sock));
657	REQUIRE(sock->tid == isc_nm_tid());
658
659	if (sock->parent != NULL) {
660		stop_tcpdns_child(sock);
661		return;
662	}
663
664	stop_tcpdns_parent(sock);
665}
666
667void
668isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) {
669	REQUIRE(VALID_NMSOCK(sock));
670	REQUIRE(result != ISC_R_SUCCESS);
671
672	isc__nmsocket_timer_stop(sock);
673	isc__nm_stop_reading(sock);
674
675	if (!sock->recv_read) {
676		goto destroy;
677	}
678	sock->recv_read = false;
679
680	if (sock->recv_cb != NULL) {
681		isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
682		isc__nmsocket_clearcb(sock);
683		isc__nm_readcb(sock, req, result);
684	}
685
686destroy:
687	isc__nmsocket_prep_destroy(sock);
688
689	/*
690	 * We need to detach from quota after the read callback function had a
691	 * chance to be executed.
692	 */
693	if (sock->quota != NULL) {
694		isc_quota_detach(&sock->quota);
695	}
696}
697
698void
699isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
700	REQUIRE(VALID_NMHANDLE(handle));
701	REQUIRE(VALID_NMSOCK(handle->sock));
702
703	isc_nmsocket_t *sock = handle->sock;
704	isc__netievent_tcpdnsread_t *ievent = NULL;
705
706	REQUIRE(sock->type == isc_nm_tcpdnssocket);
707	REQUIRE(sock->statichandle == handle);
708
709	sock->recv_cb = cb;
710	sock->recv_cbarg = cbarg;
711	sock->recv_read = true;
712	if (sock->read_timeout == 0) {
713		sock->read_timeout =
714			(atomic_load(&sock->keepalive)
715				 ? atomic_load(&sock->mgr->keepalive)
716				 : atomic_load(&sock->mgr->idle));
717	}
718
719	ievent = isc__nm_get_netievent_tcpdnsread(sock->mgr, sock);
720
721	/*
722	 * This MUST be done asynchronously, no matter which thread we're
723	 * in. The callback function for isc_nm_read() often calls
724	 * isc_nm_read() again; if we tried to do that synchronously
725	 * we'd clash in processbuffer() and grow the stack indefinitely.
726	 */
727	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
728			       (isc__netievent_t *)ievent);
729
730	return;
731}
732
733void
734isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0) {
735	isc__netievent_tcpdnsread_t *ievent =
736		(isc__netievent_tcpdnsread_t *)ev0;
737	isc_nmsocket_t *sock = ievent->sock;
738	isc_result_t result;
739
740	UNUSED(worker);
741
742	REQUIRE(VALID_NMSOCK(sock));
743	REQUIRE(sock->tid == isc_nm_tid());
744
745	if (isc__nmsocket_closing(sock)) {
746		result = ISC_R_CANCELED;
747	} else {
748		result = isc__nm_process_sock_buffer(sock);
749	}
750
751	if (result != ISC_R_SUCCESS) {
752		atomic_store(&sock->reading, true);
753		isc__nm_failed_read_cb(sock, result, false);
754	}
755}
756
757/*
758 * Process a single packet from the incoming buffer.
759 *
760 * Return ISC_R_SUCCESS and attach 'handlep' to a handle if something
761 * was processed; return ISC_R_NOMORE if there isn't a full message
762 * to be processed.
763 *
764 * The caller will need to unreference the handle.
765 */
766isc_result_t
767isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock) {
768	size_t len;
769	isc__nm_uvreq_t *req = NULL;
770	isc_nmhandle_t *handle = NULL;
771
772	REQUIRE(VALID_NMSOCK(sock));
773	REQUIRE(sock->tid == isc_nm_tid());
774
775	if (isc__nmsocket_closing(sock)) {
776		return (ISC_R_CANCELED);
777	}
778
779	/*
780	 * If we don't even have the length yet, we can't do
781	 * anything.
782	 */
783	if (sock->buf_len < 2) {
784		return (ISC_R_NOMORE);
785	}
786
787	/*
788	 * Process the first packet from the buffer, leaving
789	 * the rest (if any) for later.
790	 */
791	len = ntohs(*(uint16_t *)sock->buf);
792	if (len > sock->buf_len - 2) {
793		return (ISC_R_NOMORE);
794	}
795
796	if (sock->recv_cb == NULL) {
797		/*
798		 * recv_cb has been cleared - there is
799		 * nothing to do
800		 */
801		return (ISC_R_CANCELED);
802	} else if (sock->statichandle == NULL &&
803		   atomic_load(&sock->connected) &&
804		   !atomic_load(&sock->connecting))
805	{
806		/*
807		 * It seems that some unexpected data (a DNS message) has
808		 * arrived while we are wrapping up.
809		 */
810		return (ISC_R_CANCELED);
811	}
812
813	if (sock->client && !sock->recv_read) {
814		/*
815		 * We are not reading data - stop here.
816		 */
817		return (ISC_R_CANCELED);
818	}
819
820	req = isc__nm_get_read_req(sock, NULL);
821	REQUIRE(VALID_UVREQ(req));
822
823	/*
824	 * We need to launch isc__nm_resume_processing() after the buffer
825	 * has been consumed, thus we must delay detaching the handle.
826	 */
827	isc_nmhandle_attach(req->handle, &handle);
828
829	/*
830	 * The callback will be called synchronously because the
831	 * result is ISC_R_SUCCESS, so we don't need to have
832	 * the buffer on the heap
833	 */
834	req->uvbuf.base = (char *)sock->buf + 2;
835	req->uvbuf.len = len;
836
837	/*
838	 * If isc__nm_tcpdns_read() was called, it will be satisfied by single
839	 * DNS message in the next call.
840	 */
841	sock->recv_read = false;
842
843	/*
844	 * An assertion failure here means that there's an erroneous
845	 * extra nmhandle detach happening in the callback and
846	 * isc__nm_resume_processing() is called while we're
847	 * processing the buffer.
848	 */
849	REQUIRE(sock->processing == false);
850	sock->processing = true;
851	isc__nm_readcb(sock, req, ISC_R_SUCCESS);
852	sock->processing = false;
853
854	len += 2;
855	sock->buf_len -= len;
856	if (sock->buf_len > 0) {
857		memmove(sock->buf, sock->buf + len, sock->buf_len);
858	}
859
860	isc_nmhandle_detach(&handle);
861
862	return (ISC_R_SUCCESS);
863}
864
865void
866isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread,
867		       const uv_buf_t *buf) {
868	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
869	uint8_t *base = NULL;
870	size_t len;
871	isc_result_t result;
872
873	REQUIRE(VALID_NMSOCK(sock));
874	REQUIRE(sock->tid == isc_nm_tid());
875	REQUIRE(atomic_load(&sock->reading));
876	REQUIRE(buf != NULL);
877
878	if (isc__nmsocket_closing(sock)) {
879		isc__nm_failed_read_cb(sock, ISC_R_CANCELED, true);
880		goto free;
881	}
882
883	if (nread < 0) {
884		if (nread != UV_EOF) {
885			isc__nm_incstats(sock, STATID_RECVFAIL);
886		}
887
888		isc__nm_failed_read_cb(sock, isc__nm_uverr2result(nread), true);
889		goto free;
890	}
891
892	base = (uint8_t *)buf->base;
893	len = nread;
894
895	/*
896	 * FIXME: We can avoid the memmove here if we know we have received full
897	 * packet; e.g. we should be smarter, a.s. there are just few situations
898	 *
899	 * The tcp_alloc_buf should be smarter and point the uv_read_start to
900	 * the position where previous read has ended in the sock->buf, that way
901	 * the data could be read directly into sock->buf.
902	 */
903
904	if (sock->buf_len + len > sock->buf_size) {
905		isc__nm_alloc_dnsbuf(sock, sock->buf_len + len);
906	}
907	memmove(sock->buf + sock->buf_len, base, len);
908	sock->buf_len += len;
909
910	if (!atomic_load(&sock->client)) {
911		sock->read_timeout = atomic_load(&sock->mgr->idle);
912	}
913
914	result = isc__nm_process_sock_buffer(sock);
915	if (result != ISC_R_SUCCESS) {
916		isc__nm_failed_read_cb(sock, result, true);
917	}
918free:
919	if (nread < 0) {
920		/*
921		 * The buffer may be a null buffer on error.
922		 */
923		if (buf->base == NULL && buf->len == 0) {
924			return;
925		}
926	}
927
928	isc__nm_free_uvbuf(sock, buf);
929}
930
931static void
932quota_accept_cb(isc_quota_t *quota, void *sock0) {
933	isc_nmsocket_t *sock = (isc_nmsocket_t *)sock0;
934
935	REQUIRE(VALID_NMSOCK(sock));
936
937	/*
938	 * Create a tcpdnsaccept event and pass it using the async channel.
939	 */
940
941	isc__netievent_tcpdnsaccept_t *ievent =
942		isc__nm_get_netievent_tcpdnsaccept(sock->mgr, sock, quota);
943	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
944				     (isc__netievent_t *)ievent);
945}
946
947/*
948 * This is called after we get a quota_accept_cb() callback.
949 */
950void
951isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0) {
952	isc__netievent_tcpdnsaccept_t *ievent =
953		(isc__netievent_tcpdnsaccept_t *)ev0;
954	isc_result_t result;
955
956	UNUSED(worker);
957
958	REQUIRE(VALID_NMSOCK(ievent->sock));
959	REQUIRE(ievent->sock->tid == isc_nm_tid());
960
961	result = accept_connection(ievent->sock, ievent->quota);
962	isc__nm_accept_connection_log(result, can_log_tcpdns_quota());
963}
964
965static isc_result_t
966accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
967	isc_nmsocket_t *csock = NULL;
968	isc__networker_t *worker = NULL;
969	int r;
970	isc_result_t result;
971	struct sockaddr_storage peer_ss;
972	struct sockaddr_storage local_ss;
973	isc_sockaddr_t local;
974	isc_nmhandle_t *handle = NULL;
975
976	REQUIRE(VALID_NMSOCK(ssock));
977	REQUIRE(ssock->tid == isc_nm_tid());
978
979	if (isc__nmsocket_closing(ssock)) {
980		if (quota != NULL) {
981			isc_quota_detach(&quota);
982		}
983		return (ISC_R_CANCELED);
984	}
985
986	REQUIRE(ssock->accept_cb != NULL);
987
988	csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t));
989	isc__nmsocket_init(csock, ssock->mgr, isc_nm_tcpdnssocket,
990			   &ssock->iface);
991	csock->tid = ssock->tid;
992	csock->extrahandlesize = ssock->extrahandlesize;
993	isc__nmsocket_attach(ssock, &csock->server);
994	csock->recv_cb = ssock->recv_cb;
995	csock->recv_cbarg = ssock->recv_cbarg;
996	csock->quota = quota;
997	atomic_init(&csock->accepting, true);
998
999	worker = &csock->mgr->workers[csock->tid];
1000
1001	r = uv_tcp_init(&worker->loop, &csock->uv_handle.tcp);
1002	UV_RUNTIME_CHECK(uv_tcp_init, r);
1003	uv_handle_set_data(&csock->uv_handle.handle, csock);
1004
1005	r = uv_timer_init(&worker->loop, &csock->read_timer);
1006	UV_RUNTIME_CHECK(uv_timer_init, r);
1007	uv_handle_set_data((uv_handle_t *)&csock->read_timer, csock);
1008
1009	r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream);
1010	if (r != 0) {
1011		result = isc__nm_uverr2result(r);
1012		goto failure;
1013	}
1014
1015	r = uv_tcp_getpeername(&csock->uv_handle.tcp,
1016			       (struct sockaddr *)&peer_ss,
1017			       &(int){ sizeof(peer_ss) });
1018	if (r != 0) {
1019		result = isc__nm_uverr2result(r);
1020		goto failure;
1021	}
1022
1023	result = isc_sockaddr_fromsockaddr(&csock->peer,
1024					   (struct sockaddr *)&peer_ss);
1025	if (result != ISC_R_SUCCESS) {
1026		goto failure;
1027	}
1028
1029	r = uv_tcp_getsockname(&csock->uv_handle.tcp,
1030			       (struct sockaddr *)&local_ss,
1031			       &(int){ sizeof(local_ss) });
1032	if (r != 0) {
1033		result = isc__nm_uverr2result(r);
1034		goto failure;
1035	}
1036
1037	result = isc_sockaddr_fromsockaddr(&local,
1038					   (struct sockaddr *)&local_ss);
1039	if (result != ISC_R_SUCCESS) {
1040		goto failure;
1041	}
1042
1043	/*
1044	 * The handle will be either detached on acceptcb failure or in the
1045	 * readcb.
1046	 */
1047	handle = isc__nmhandle_get(csock, NULL, &local);
1048
1049	result = ssock->accept_cb(handle, ISC_R_SUCCESS, ssock->accept_cbarg);
1050	if (result != ISC_R_SUCCESS) {
1051		isc_nmhandle_detach(&handle);
1052		goto failure;
1053	}
1054
1055	atomic_store(&csock->accepting, false);
1056
1057	isc__nm_incstats(csock, STATID_ACCEPT);
1058
1059	csock->read_timeout = atomic_load(&csock->mgr->init);
1060
1061	csock->closehandle_cb = isc__nm_resume_processing;
1062
1063	/*
1064	 * We need to keep the handle alive until we fail to read or connection
1065	 * is closed by the other side, it will be detached via
1066	 * prep_destroy()->tcpdns_close_direct().
1067	 */
1068	isc_nmhandle_attach(handle, &csock->recv_handle);
1069	result = isc__nm_process_sock_buffer(csock);
1070	if (result != ISC_R_SUCCESS) {
1071		isc_nmhandle_detach(&csock->recv_handle);
1072		isc_nmhandle_detach(&handle);
1073		goto failure;
1074	}
1075
1076	/*
1077	 * The initial timer has been set, update the read timeout for the next
1078	 * reads.
1079	 */
1080	csock->read_timeout = (atomic_load(&csock->keepalive)
1081				       ? atomic_load(&csock->mgr->keepalive)
1082				       : atomic_load(&csock->mgr->idle));
1083
1084	isc_nmhandle_detach(&handle);
1085
1086	/*
1087	 * sock is now attached to the handle.
1088	 */
1089	isc__nmsocket_detach(&csock);
1090
1091	return (ISC_R_SUCCESS);
1092
1093failure:
1094
1095	atomic_store(&csock->active, false);
1096
1097	isc__nm_failed_accept_cb(csock, result);
1098
1099	isc__nmsocket_prep_destroy(csock);
1100
1101	isc__nmsocket_detach(&csock);
1102
1103	return (result);
1104}
1105
1106void
1107isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
1108		    isc_nm_cb_t cb, void *cbarg) {
1109	isc__netievent_tcpdnssend_t *ievent = NULL;
1110	isc__nm_uvreq_t *uvreq = NULL;
1111	isc_nmsocket_t *sock = NULL;
1112
1113	REQUIRE(VALID_NMHANDLE(handle));
1114
1115	sock = handle->sock;
1116
1117	REQUIRE(VALID_NMSOCK(sock));
1118	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1119
1120	uvreq = isc__nm_uvreq_get(sock->mgr, sock);
1121	*(uint16_t *)uvreq->tcplen = htons(region->length);
1122	uvreq->uvbuf.base = (char *)region->base;
1123	uvreq->uvbuf.len = region->length;
1124
1125	isc_nmhandle_attach(handle, &uvreq->handle);
1126
1127	uvreq->cb.send = cb;
1128	uvreq->cbarg = cbarg;
1129
1130	ievent = isc__nm_get_netievent_tcpdnssend(sock->mgr, sock, uvreq);
1131	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
1132				     (isc__netievent_t *)ievent);
1133
1134	return;
1135}
1136
1137static void
1138tcpdns_send_cb(uv_write_t *req, int status) {
1139	isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
1140	isc_nmsocket_t *sock = NULL;
1141
1142	REQUIRE(VALID_UVREQ(uvreq));
1143	REQUIRE(VALID_NMSOCK(uvreq->sock));
1144
1145	sock = uvreq->sock;
1146
1147	isc_nm_timer_stop(uvreq->timer);
1148	isc_nm_timer_detach(&uvreq->timer);
1149
1150	if (status < 0) {
1151		isc__nm_incstats(sock, STATID_SENDFAIL);
1152		isc__nm_failed_send_cb(sock, uvreq,
1153				       isc__nm_uverr2result(status));
1154		return;
1155	}
1156
1157	isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
1158}
1159
1160/*
1161 * Handle 'tcpsend' async event - send a packet on the socket
1162 */
1163void
1164isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0) {
1165	isc_result_t result;
1166	isc__netievent_tcpdnssend_t *ievent =
1167		(isc__netievent_tcpdnssend_t *)ev0;
1168	isc_nmsocket_t *sock = NULL;
1169	isc__nm_uvreq_t *uvreq = NULL;
1170	int r, nbufs = 2;
1171
1172	UNUSED(worker);
1173
1174	REQUIRE(VALID_UVREQ(ievent->req));
1175	REQUIRE(VALID_NMSOCK(ievent->sock));
1176	REQUIRE(ievent->sock->type == isc_nm_tcpdnssocket);
1177	REQUIRE(ievent->sock->tid == isc_nm_tid());
1178
1179	sock = ievent->sock;
1180	uvreq = ievent->req;
1181
1182	if (sock->write_timeout == 0) {
1183		sock->write_timeout =
1184			(atomic_load(&sock->keepalive)
1185				 ? atomic_load(&sock->mgr->keepalive)
1186				 : atomic_load(&sock->mgr->idle));
1187	}
1188
1189	uv_buf_t bufs[2] = { { .base = uvreq->tcplen, .len = 2 },
1190			     { .base = uvreq->uvbuf.base,
1191			       .len = uvreq->uvbuf.len } };
1192
1193	if (isc__nmsocket_closing(sock)) {
1194		result = ISC_R_CANCELED;
1195		goto fail;
1196	}
1197
1198	r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs);
1199
1200	if (r == (int)(bufs[0].len + bufs[1].len)) {
1201		/* Wrote everything */
1202		isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, true);
1203		return;
1204	}
1205
1206	if (r == 1) {
1207		/* Partial write of DNSMSG length */
1208		bufs[0].base = uvreq->tcplen + 1;
1209		bufs[0].len = 1;
1210	} else if (r > 0) {
1211		/* Partial write of DNSMSG */
1212		nbufs = 1;
1213		bufs[0].base = uvreq->uvbuf.base + (r - 2);
1214		bufs[0].len = uvreq->uvbuf.len - (r - 2);
1215	} else if (r == UV_ENOSYS || r == UV_EAGAIN) {
1216		/* uv_try_write not supported, send asynchronously */
1217	} else {
1218		/* error sending data */
1219		result = isc__nm_uverr2result(r);
1220		goto fail;
1221	}
1222
1223	r = uv_write(&uvreq->uv_req.write, &sock->uv_handle.stream, bufs, nbufs,
1224		     tcpdns_send_cb);
1225	if (r < 0) {
1226		result = isc__nm_uverr2result(r);
1227		goto fail;
1228	}
1229
1230	isc_nm_timer_create(uvreq->handle, isc__nmsocket_writetimeout_cb, uvreq,
1231			    &uvreq->timer);
1232	if (sock->write_timeout > 0) {
1233		isc_nm_timer_start(uvreq->timer, sock->write_timeout);
1234	}
1235
1236	return;
1237fail:
1238	isc__nm_incstats(sock, STATID_SENDFAIL);
1239	isc__nm_failed_send_cb(sock, uvreq, result);
1240}
1241
1242static void
1243tcpdns_stop_cb(uv_handle_t *handle) {
1244	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1245
1246	REQUIRE(VALID_NMSOCK(sock));
1247	REQUIRE(sock->tid == isc_nm_tid());
1248	REQUIRE(atomic_load(&sock->closing));
1249
1250	uv_handle_set_data(handle, NULL);
1251
1252	if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
1253					    true))
1254	{
1255		UNREACHABLE();
1256	}
1257
1258	isc__nm_incstats(sock, STATID_CLOSE);
1259
1260	atomic_store(&sock->listening, false);
1261
1262	isc__nmsocket_detach(&sock);
1263}
1264
1265static void
1266tcpdns_close_sock(isc_nmsocket_t *sock) {
1267	REQUIRE(VALID_NMSOCK(sock));
1268	REQUIRE(sock->tid == isc_nm_tid());
1269	REQUIRE(atomic_load(&sock->closing));
1270
1271	if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
1272					    true))
1273	{
1274		UNREACHABLE();
1275	}
1276
1277	isc__nm_incstats(sock, STATID_CLOSE);
1278
1279	if (sock->server != NULL) {
1280		isc__nmsocket_detach(&sock->server);
1281	}
1282
1283	atomic_store(&sock->connected, false);
1284
1285	isc__nmsocket_prep_destroy(sock);
1286}
1287
1288static void
1289tcpdns_close_cb(uv_handle_t *handle) {
1290	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1291
1292	uv_handle_set_data(handle, NULL);
1293
1294	tcpdns_close_sock(sock);
1295}
1296
1297static void
1298read_timer_close_cb(uv_handle_t *timer) {
1299	isc_nmsocket_t *sock = uv_handle_get_data(timer);
1300	uv_handle_set_data(timer, NULL);
1301
1302	REQUIRE(VALID_NMSOCK(sock));
1303
1304	if (sock->parent) {
1305		uv_close(&sock->uv_handle.handle, tcpdns_stop_cb);
1306	} else if (uv_is_closing(&sock->uv_handle.handle)) {
1307		tcpdns_close_sock(sock);
1308	} else {
1309		uv_close(&sock->uv_handle.handle, tcpdns_close_cb);
1310	}
1311}
1312
1313static void
1314stop_tcpdns_child(isc_nmsocket_t *sock) {
1315	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1316	REQUIRE(sock->tid == isc_nm_tid());
1317
1318	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1319					    true))
1320	{
1321		return;
1322	}
1323
1324	tcpdns_close_direct(sock);
1325
1326	atomic_fetch_sub(&sock->parent->rchildren, 1);
1327
1328	isc_barrier_wait(&sock->parent->stoplistening);
1329}
1330
1331static void
1332stop_tcpdns_parent(isc_nmsocket_t *sock) {
1333	isc_nmsocket_t *csock = NULL;
1334
1335	REQUIRE(VALID_NMSOCK(sock));
1336	REQUIRE(sock->tid == isc_nm_tid());
1337	REQUIRE(sock->type == isc_nm_tcpdnslistener);
1338
1339	isc_barrier_init(&sock->stoplistening, sock->nchildren);
1340
1341	for (size_t i = 0; i < sock->nchildren; i++) {
1342		csock = &sock->children[i];
1343		REQUIRE(VALID_NMSOCK(csock));
1344
1345		if ((int)i == isc_nm_tid()) {
1346			/*
1347			 * We need to schedule closing the other sockets first
1348			 */
1349			continue;
1350		}
1351
1352		atomic_store(&csock->active, false);
1353		enqueue_stoplistening(csock);
1354	}
1355
1356	csock = &sock->children[isc_nm_tid()];
1357	atomic_store(&csock->active, false);
1358	stop_tcpdns_child(csock);
1359
1360	atomic_store(&sock->closed, true);
1361	isc__nmsocket_prep_destroy(sock);
1362}
1363
1364static void
1365tcpdns_close_direct(isc_nmsocket_t *sock) {
1366	REQUIRE(VALID_NMSOCK(sock));
1367	REQUIRE(sock->tid == isc_nm_tid());
1368	REQUIRE(atomic_load(&sock->closing));
1369
1370	if (sock->quota != NULL) {
1371		isc_quota_detach(&sock->quota);
1372	}
1373
1374	if (sock->recv_handle != NULL) {
1375		isc_nmhandle_detach(&sock->recv_handle);
1376	}
1377
1378	isc__nmsocket_timer_stop(sock);
1379	isc__nm_stop_reading(sock);
1380
1381	uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
1382	uv_close((uv_handle_t *)&sock->read_timer, read_timer_close_cb);
1383}
1384
1385void
1386isc__nm_tcpdns_close(isc_nmsocket_t *sock) {
1387	REQUIRE(VALID_NMSOCK(sock));
1388	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1389	REQUIRE(!isc__nmsocket_active(sock));
1390
1391	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1392					    true))
1393	{
1394		return;
1395	}
1396
1397	if (sock->tid == isc_nm_tid()) {
1398		tcpdns_close_direct(sock);
1399	} else {
1400		/*
1401		 * We need to create an event and pass it using async channel
1402		 */
1403		isc__netievent_tcpdnsclose_t *ievent =
1404			isc__nm_get_netievent_tcpdnsclose(sock->mgr, sock);
1405
1406		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1407				       (isc__netievent_t *)ievent);
1408	}
1409}
1410
1411void
1412isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0) {
1413	isc__netievent_tcpdnsclose_t *ievent =
1414		(isc__netievent_tcpdnsclose_t *)ev0;
1415	isc_nmsocket_t *sock = ievent->sock;
1416
1417	UNUSED(worker);
1418
1419	REQUIRE(VALID_NMSOCK(sock));
1420	REQUIRE(sock->tid == isc_nm_tid());
1421
1422	tcpdns_close_direct(sock);
1423}
1424
1425static void
1426tcpdns_close_connect_cb(uv_handle_t *handle) {
1427	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1428
1429	REQUIRE(VALID_NMSOCK(sock));
1430
1431	REQUIRE(isc__nm_in_netthread());
1432	REQUIRE(sock->tid == isc_nm_tid());
1433
1434	isc__nmsocket_prep_destroy(sock);
1435	isc__nmsocket_detach(&sock);
1436}
1437
1438void
1439isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock) {
1440	REQUIRE(VALID_NMSOCK(sock));
1441	REQUIRE(sock->tid == isc_nm_tid());
1442	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1443
1444	/*
1445	 * If the socket is active, mark it inactive and
1446	 * continue. If it isn't active, stop now.
1447	 */
1448	if (!isc__nmsocket_deactivate(sock)) {
1449		return;
1450	}
1451
1452	if (atomic_load(&sock->accepting)) {
1453		return;
1454	}
1455
1456	if (atomic_load(&sock->connecting)) {
1457		isc_nmsocket_t *tsock = NULL;
1458		isc__nmsocket_attach(sock, &tsock);
1459		uv_close(&sock->uv_handle.handle, tcpdns_close_connect_cb);
1460		return;
1461	}
1462
1463	if (sock->statichandle != NULL) {
1464		if (isc__nm_closing(sock)) {
1465			isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false);
1466		} else {
1467			isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
1468		}
1469		return;
1470	}
1471
1472	/*
1473	 * Otherwise, we just send the socket to abyss...
1474	 */
1475	if (sock->parent == NULL) {
1476		isc__nmsocket_prep_destroy(sock);
1477	}
1478}
1479
1480void
1481isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle) {
1482	isc_nmsocket_t *sock = NULL;
1483	isc__netievent_tcpdnscancel_t *ievent = NULL;
1484
1485	REQUIRE(VALID_NMHANDLE(handle));
1486
1487	sock = handle->sock;
1488
1489	REQUIRE(VALID_NMSOCK(sock));
1490	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1491
1492	ievent = isc__nm_get_netievent_tcpdnscancel(sock->mgr, sock, handle);
1493	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1494			       (isc__netievent_t *)ievent);
1495}
1496
1497void
1498isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0) {
1499	isc__netievent_tcpdnscancel_t *ievent =
1500		(isc__netievent_tcpdnscancel_t *)ev0;
1501	isc_nmsocket_t *sock = ievent->sock;
1502
1503	UNUSED(worker);
1504
1505	REQUIRE(VALID_NMSOCK(sock));
1506	REQUIRE(sock->tid == isc_nm_tid());
1507
1508	isc__nm_failed_read_cb(sock, ISC_R_EOF, false);
1509}
1510