dispatch.c revision 182645
1/*
2 * Copyright (C) 2004-2008  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id: dispatch.c,v 1.116.18.19.12.5 2008/07/23 23:16:43 marka Exp $ */
19
20/*! \file */
21
22#include <config.h>
23
24#include <stdlib.h>
25#include <sys/types.h>
26#include <unistd.h>
27
28#include <isc/entropy.h>
29#include <isc/mem.h>
30#include <isc/mutex.h>
31#include <isc/print.h>
32#include <isc/random.h>
33#include <isc/string.h>
34#include <isc/task.h>
35#include <isc/time.h>
36#include <isc/util.h>
37
38#include <dns/acl.h>
39#include <dns/dispatch.h>
40#include <dns/events.h>
41#include <dns/log.h>
42#include <dns/message.h>
43#include <dns/portlist.h>
44#include <dns/tcpmsg.h>
45#include <dns/types.h>
46
47typedef ISC_LIST(dns_dispentry_t)	dns_displist_t;
48
49typedef struct dns_qid {
50	unsigned int	magic;
51	unsigned int	qid_nbuckets;	/*%< hash table size */
52	unsigned int	qid_increment;	/*%< id increment on collision */
53	isc_mutex_t	lock;
54	dns_displist_t	*qid_table;	/*%< the table itself */
55} dns_qid_t;
56
57/* ARC4 Random generator state */
58typedef struct arc4ctx {
59	isc_uint8_t	i;
60	isc_uint8_t	j;
61	isc_uint8_t	s[256];
62	int		count;
63} arc4ctx_t;
64
65struct dns_dispatchmgr {
66	/* Unlocked. */
67	unsigned int			magic;
68	isc_mem_t		       *mctx;
69	dns_acl_t		       *blackhole;
70	dns_portlist_t		       *portlist;
71
72	/* Locked by "lock". */
73	isc_mutex_t			lock;
74	unsigned int			state;
75	ISC_LIST(dns_dispatch_t)	list;
76
77	/* Locked by arc4_lock. */
78	isc_mutex_t			arc4_lock;
79	arc4ctx_t			arc4ctx;    /*%< ARC4 context for QID */
80
81	/* locked by buffer lock */
82	dns_qid_t			*qid;
83	isc_mutex_t			buffer_lock;
84	unsigned int			buffers;    /*%< allocated buffers */
85	unsigned int			buffersize; /*%< size of each buffer */
86	unsigned int			maxbuffers; /*%< max buffers */
87
88	/* Locked internally. */
89	isc_mutex_t			pool_lock;
90	isc_mempool_t		       *epool;	/*%< memory pool for events */
91	isc_mempool_t		       *rpool;	/*%< memory pool for replies */
92	isc_mempool_t		       *dpool;  /*%< dispatch allocations */
93	isc_mempool_t		       *bpool;	/*%< memory pool for buffers */
94
95	isc_entropy_t		       *entropy; /*%< entropy source */
96};
97
98#define MGR_SHUTTINGDOWN		0x00000001U
99#define MGR_IS_SHUTTINGDOWN(l)	(((l)->state & MGR_SHUTTINGDOWN) != 0)
100
101#define IS_PRIVATE(d)	(((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
102
103struct dns_dispentry {
104	unsigned int			magic;
105	dns_dispatch_t		       *disp;
106	dns_messageid_t			id;
107	in_port_t			port;
108	unsigned int			bucket;
109	isc_sockaddr_t			host;
110	isc_task_t		       *task;
111	isc_taskaction_t		action;
112	void			       *arg;
113	isc_boolean_t			item_out;
114	ISC_LIST(dns_dispatchevent_t)	items;
115	ISC_LINK(dns_dispentry_t)	link;
116};
117
118#define INVALID_BUCKET		(0xffffdead)
119
120struct dns_dispatch {
121	/* Unlocked. */
122	unsigned int		magic;		/*%< magic */
123	dns_dispatchmgr_t      *mgr;		/*%< dispatch manager */
124	isc_task_t	       *task;		/*%< internal task */
125	isc_socket_t	       *socket;		/*%< isc socket attached to */
126	isc_sockaddr_t		local;		/*%< local address */
127	in_port_t		localport;	/*%< local UDP port */
128	unsigned int		maxrequests;	/*%< max requests */
129	isc_event_t	       *ctlevent;
130
131	/*% Locked by mgr->lock. */
132	ISC_LINK(dns_dispatch_t) link;
133
134	/* Locked by "lock". */
135	isc_mutex_t		lock;		/*%< locks all below */
136	isc_sockettype_t	socktype;
137	unsigned int		attributes;
138	unsigned int		refcount;	/*%< number of users */
139	dns_dispatchevent_t    *failsafe_ev;	/*%< failsafe cancel event */
140	unsigned int		shutting_down : 1,
141				shutdown_out : 1,
142				connected : 1,
143				tcpmsg_valid : 1,
144				recv_pending : 1; /*%< is a recv() pending? */
145	isc_result_t		shutdown_why;
146	unsigned int		requests;	/*%< how many requests we have */
147	unsigned int		tcpbuffers;	/*%< allocated buffers */
148	dns_tcpmsg_t		tcpmsg;		/*%< for tcp streams */
149	dns_qid_t		*qid;
150};
151
152#define QID_MAGIC		ISC_MAGIC('Q', 'i', 'd', ' ')
153#define VALID_QID(e)		ISC_MAGIC_VALID((e), QID_MAGIC)
154
155#define RESPONSE_MAGIC		ISC_MAGIC('D', 'r', 's', 'p')
156#define VALID_RESPONSE(e)	ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
157
158#define DISPATCH_MAGIC		ISC_MAGIC('D', 'i', 's', 'p')
159#define VALID_DISPATCH(e)	ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
160
161#define DNS_DISPATCHMGR_MAGIC	ISC_MAGIC('D', 'M', 'g', 'r')
162#define VALID_DISPATCHMGR(e)	ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
163
164#define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
165		       (disp)->qid : (disp)->mgr->qid
166/*
167 * Statics.
168 */
169static dns_dispentry_t *bucket_search(dns_qid_t *, isc_sockaddr_t *,
170				      dns_messageid_t, in_port_t, unsigned int);
171static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
172static void destroy_disp(isc_task_t *task, isc_event_t *event);
173static void udp_recv(isc_task_t *, isc_event_t *);
174static void tcp_recv(isc_task_t *, isc_event_t *);
175static void startrecv(dns_dispatch_t *);
176static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t,
177			     in_port_t);
178static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
179static void *allocate_udp_buffer(dns_dispatch_t *disp);
180static inline void free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
181static inline dns_dispatchevent_t *allocate_event(dns_dispatch_t *disp);
182static void do_cancel(dns_dispatch_t *disp);
183static dns_dispentry_t *linear_first(dns_qid_t *disp);
184static dns_dispentry_t *linear_next(dns_qid_t *disp,
185				    dns_dispentry_t *resp);
186static void dispatch_free(dns_dispatch_t **dispp);
187static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
188				       isc_socketmgr_t *sockmgr,
189				       isc_taskmgr_t *taskmgr,
190				       isc_sockaddr_t *localaddr,
191				       unsigned int maxrequests,
192				       unsigned int attributes,
193				       dns_dispatch_t **dispp);
194static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
195static void destroy_mgr(dns_dispatchmgr_t **mgrp);
196static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
197				 unsigned int increment, dns_qid_t **qidp);
198static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
199
200#define LVL(x) ISC_LOG_DEBUG(x)
201
202static void
203mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
204     ISC_FORMAT_PRINTF(3, 4);
205
206static void
207mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
208	char msgbuf[2048];
209	va_list ap;
210
211	if (! isc_log_wouldlog(dns_lctx, level))
212		return;
213
214	va_start(ap, fmt);
215	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
216	va_end(ap);
217
218	isc_log_write(dns_lctx,
219		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
220		      level, "dispatchmgr %p: %s", mgr, msgbuf);
221}
222
223static void
224dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
225     ISC_FORMAT_PRINTF(3, 4);
226
227static void
228dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
229	char msgbuf[2048];
230	va_list ap;
231
232	if (! isc_log_wouldlog(dns_lctx, level))
233		return;
234
235	va_start(ap, fmt);
236	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
237	va_end(ap);
238
239	isc_log_write(dns_lctx,
240		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
241		      level, "dispatch %p: %s", disp, msgbuf);
242}
243
244static void
245request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
246	    int level, const char *fmt, ...)
247     ISC_FORMAT_PRINTF(4, 5);
248
249static void
250request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
251	    int level, const char *fmt, ...)
252{
253	char msgbuf[2048];
254	char peerbuf[256];
255	va_list ap;
256
257	if (! isc_log_wouldlog(dns_lctx, level))
258		return;
259
260	va_start(ap, fmt);
261	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
262	va_end(ap);
263
264	if (VALID_RESPONSE(resp)) {
265		isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
266		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
267			      DNS_LOGMODULE_DISPATCH, level,
268			      "dispatch %p response %p %s: %s", disp, resp,
269			      peerbuf, msgbuf);
270	} else {
271		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
272			      DNS_LOGMODULE_DISPATCH, level,
273			      "dispatch %p req/resp %p: %s", disp, resp,
274			      msgbuf);
275	}
276}
277
278/*
279 * ARC4 random number generator derived from OpenBSD.
280 * Only dispatch_arc4random() and dispatch_arc4uniformrandom() are expected
281 * to be called from general dispatch routines; the rest of them are subroutines
282 * for these two.
283 *
284 * The original copyright follows:
285 * Copyright (c) 1996, David Mazieres <dm@uun.org>
286 * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
287 *
288 * Permission to use, copy, modify, and distribute this software for any
289 * purpose with or without fee is hereby granted, provided that the above
290 * copyright notice and this permission notice appear in all copies.
291 *
292 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
293 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
294 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
295 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
296 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
297 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
298 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
299 */
300static void
301dispatch_arc4init(arc4ctx_t *actx) {
302	int n;
303	for (n = 0; n < 256; n++)
304		actx->s[n] = n;
305	actx->i = 0;
306	actx->j = 0;
307	actx->count = 0;
308}
309
310static void
311dispatch_arc4addrandom(arc4ctx_t *actx, unsigned char *dat, int datlen) {
312	int n;
313	isc_uint8_t si;
314
315	actx->i--;
316	for (n = 0; n < 256; n++) {
317		actx->i = (actx->i + 1);
318		si = actx->s[actx->i];
319		actx->j = (actx->j + si + dat[n % datlen]);
320		actx->s[actx->i] = actx->s[actx->j];
321		actx->s[actx->j] = si;
322	}
323	actx->j = actx->i;
324}
325
326static inline isc_uint8_t
327dispatch_arc4get8(arc4ctx_t *actx) {
328	isc_uint8_t si, sj;
329
330	actx->i = (actx->i + 1);
331	si = actx->s[actx->i];
332	actx->j = (actx->j + si);
333	sj = actx->s[actx->j];
334	actx->s[actx->i] = sj;
335	actx->s[actx->j] = si;
336
337	return (actx->s[(si + sj) & 0xff]);
338}
339
340static inline isc_uint16_t
341dispatch_arc4get16(arc4ctx_t *actx) {
342	isc_uint16_t val;
343
344	val = dispatch_arc4get8(actx) << 8;
345	val |= dispatch_arc4get8(actx);
346
347	return (val);
348}
349
350static void
351dispatch_arc4stir(dns_dispatchmgr_t *mgr) {
352	int i;
353	union {
354		unsigned char rnd[128];
355		isc_uint32_t rnd32[32];
356	} rnd;
357	isc_result_t result;
358
359	if (mgr->entropy != NULL) {
360		/*
361		 * We accept any quality of random data to avoid blocking.
362		 */
363		result = isc_entropy_getdata(mgr->entropy, rnd.rnd,
364					     sizeof(rnd), NULL, 0);
365		RUNTIME_CHECK(result == ISC_R_SUCCESS);
366	} else {
367		for (i = 0; i < 32; i++)
368			isc_random_get(&rnd.rnd32[i]);
369	}
370	dispatch_arc4addrandom(&mgr->arc4ctx, rnd.rnd, sizeof(rnd.rnd));
371
372	/*
373	 * Discard early keystream, as per recommendations in:
374	 * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps
375	 */
376	for (i = 0; i < 256; i++)
377		(void)dispatch_arc4get8(&mgr->arc4ctx);
378
379	/*
380	 * Derived from OpenBSD's implementation.  The rationale is not clear,
381	 * but should be conservative enough in safety, and reasonably large
382	 * for efficiency.
383	 */
384	mgr->arc4ctx.count = 1600000;
385}
386
387static isc_uint16_t
388dispatch_arc4random(dns_dispatchmgr_t *mgr) {
389	isc_uint16_t result;
390
391	LOCK(&mgr->arc4_lock);
392	mgr->arc4ctx.count -= sizeof(isc_uint16_t);
393	if (mgr->arc4ctx.count <= 0)
394		dispatch_arc4stir(mgr);
395	result = dispatch_arc4get16(&mgr->arc4ctx);
396	UNLOCK(&mgr->arc4_lock);
397	return (result);
398}
399
400static isc_uint16_t
401dispatch_arc4uniformrandom(dns_dispatchmgr_t *mgr, isc_uint16_t upper_bound) {
402	isc_uint16_t min, r;
403	/* The caller must hold the manager lock. */
404
405	if (upper_bound < 2)
406		return (0);
407
408	/*
409	 * Ensure the range of random numbers [min, 0xffff] be a multiple of
410	 * upper_bound and contain at least a half of the 16 bit range.
411	 */
412
413	if (upper_bound > 0x8000)
414		min = 1 + ~upper_bound; /* 0x8000 - upper_bound */
415	else
416		min = (isc_uint16_t)(0x10000 % (isc_uint32_t)upper_bound);
417
418	/*
419	 * This could theoretically loop forever but each retry has
420	 * p > 0.5 (worst case, usually far better) of selecting a
421	 * number inside the range we need, so it should rarely need
422	 * to re-roll.
423	 */
424	for (;;) {
425		r = dispatch_arc4random(mgr);
426		if (r >= min)
427			break;
428	}
429
430	return (r % upper_bound);
431}
432
433/*
434 * Return a hash of the destination and message id.
435 */
436static isc_uint32_t
437dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
438	 in_port_t port)
439{
440	unsigned int ret;
441
442	ret = isc_sockaddr_hash(dest, ISC_TRUE);
443	ret ^= (id << 16) | port;
444	ret %= qid->qid_nbuckets;
445
446	INSIST(ret < qid->qid_nbuckets);
447
448	return (ret);
449}
450
451/*
452 * Find the first entry in 'qid'.  Returns NULL if there are no entries.
453 */
454static dns_dispentry_t *
455linear_first(dns_qid_t *qid) {
456	dns_dispentry_t *ret;
457	unsigned int bucket;
458
459	bucket = 0;
460
461	while (bucket < qid->qid_nbuckets) {
462		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
463		if (ret != NULL)
464			return (ret);
465		bucket++;
466	}
467
468	return (NULL);
469}
470
471/*
472 * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
473 * no more entries.
474 */
475static dns_dispentry_t *
476linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
477	dns_dispentry_t *ret;
478	unsigned int bucket;
479
480	ret = ISC_LIST_NEXT(resp, link);
481	if (ret != NULL)
482		return (ret);
483
484	bucket = resp->bucket;
485	bucket++;
486	while (bucket < qid->qid_nbuckets) {
487		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
488		if (ret != NULL)
489			return (ret);
490		bucket++;
491	}
492
493	return (NULL);
494}
495
496/*
497 * The dispatch must be locked.
498 */
499static isc_boolean_t
500destroy_disp_ok(dns_dispatch_t *disp)
501{
502	if (disp->refcount != 0)
503		return (ISC_FALSE);
504
505	if (disp->recv_pending != 0)
506		return (ISC_FALSE);
507
508	if (disp->shutting_down == 0)
509		return (ISC_FALSE);
510
511	return (ISC_TRUE);
512}
513
514
515/*
516 * Called when refcount reaches 0 (and safe to destroy).
517 *
518 * The dispatcher must not be locked.
519 * The manager must be locked.
520 */
521static void
522destroy_disp(isc_task_t *task, isc_event_t *event) {
523	dns_dispatch_t *disp;
524	dns_dispatchmgr_t *mgr;
525	isc_boolean_t killmgr;
526
527	INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);
528
529	UNUSED(task);
530
531	disp = event->ev_arg;
532	mgr = disp->mgr;
533
534	LOCK(&mgr->lock);
535	ISC_LIST_UNLINK(mgr->list, disp, link);
536
537	dispatch_log(disp, LVL(90),
538		     "shutting down; detaching from sock %p, task %p",
539		     disp->socket, disp->task);
540
541	isc_socket_detach(&disp->socket);
542	isc_task_detach(&disp->task);
543	isc_event_free(&event);
544
545	dispatch_free(&disp);
546
547	killmgr = destroy_mgr_ok(mgr);
548	UNLOCK(&mgr->lock);
549	if (killmgr)
550		destroy_mgr(&mgr);
551}
552
553
554/*
555 * Find an entry for query ID 'id' and socket address 'dest' in 'qid'.
556 * Return NULL if no such entry exists.
557 */
558static dns_dispentry_t *
559bucket_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id,
560	      in_port_t port, unsigned int bucket)
561{
562	dns_dispentry_t *res;
563
564	REQUIRE(bucket < qid->qid_nbuckets);
565
566	res = ISC_LIST_HEAD(qid->qid_table[bucket]);
567
568	while (res != NULL) {
569		if ((res->id == id) && isc_sockaddr_equal(dest, &res->host) &&
570		    res->port == port) {
571			return (res);
572		}
573		res = ISC_LIST_NEXT(res, link);
574	}
575
576	return (NULL);
577}
578
579static void
580free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
581	INSIST(buf != NULL && len != 0);
582
583
584	switch (disp->socktype) {
585	case isc_sockettype_tcp:
586		INSIST(disp->tcpbuffers > 0);
587		disp->tcpbuffers--;
588		isc_mem_put(disp->mgr->mctx, buf, len);
589		break;
590	case isc_sockettype_udp:
591		LOCK(&disp->mgr->buffer_lock);
592		INSIST(disp->mgr->buffers > 0);
593		INSIST(len == disp->mgr->buffersize);
594		disp->mgr->buffers--;
595		isc_mempool_put(disp->mgr->bpool, buf);
596		UNLOCK(&disp->mgr->buffer_lock);
597		break;
598	default:
599		INSIST(0);
600		break;
601	}
602}
603
604static void *
605allocate_udp_buffer(dns_dispatch_t *disp) {
606	void *temp;
607
608	LOCK(&disp->mgr->buffer_lock);
609	temp = isc_mempool_get(disp->mgr->bpool);
610
611	if (temp != NULL)
612		disp->mgr->buffers++;
613	UNLOCK(&disp->mgr->buffer_lock);
614
615	return (temp);
616}
617
618static inline void
619free_event(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
620	if (disp->failsafe_ev == ev) {
621		INSIST(disp->shutdown_out == 1);
622		disp->shutdown_out = 0;
623
624		return;
625	}
626
627	isc_mempool_put(disp->mgr->epool, ev);
628}
629
630static inline dns_dispatchevent_t *
631allocate_event(dns_dispatch_t *disp) {
632	dns_dispatchevent_t *ev;
633
634	ev = isc_mempool_get(disp->mgr->epool);
635	if (ev == NULL)
636		return (NULL);
637	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
638		       NULL, NULL, NULL, NULL, NULL);
639
640	return (ev);
641}
642
643/*
644 * General flow:
645 *
646 * If I/O result == CANCELED or error, free the buffer.
647 *
648 * If query, free the buffer, restart.
649 *
650 * If response:
651 *	Allocate event, fill in details.
652 *		If cannot allocate, free buffer, restart.
653 *	find target.  If not found, free buffer, restart.
654 *	if event queue is not empty, queue.  else, send.
655 *	restart.
656 */
657static void
658udp_recv(isc_task_t *task, isc_event_t *ev_in) {
659	isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
660	dns_dispatch_t *disp = ev_in->ev_arg;
661	dns_messageid_t id;
662	isc_result_t dres;
663	isc_buffer_t source;
664	unsigned int flags;
665	dns_dispentry_t *resp;
666	dns_dispatchevent_t *rev;
667	unsigned int bucket;
668	isc_boolean_t killit;
669	isc_boolean_t queue_response;
670	dns_dispatchmgr_t *mgr;
671	dns_qid_t *qid;
672	isc_netaddr_t netaddr;
673	int match;
674
675	UNUSED(task);
676
677	LOCK(&disp->lock);
678
679	mgr = disp->mgr;
680	qid = mgr->qid;
681
682	dispatch_log(disp, LVL(90),
683		     "got packet: requests %d, buffers %d, recvs %d",
684		     disp->requests, disp->mgr->buffers, disp->recv_pending);
685
686	if (ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
687		/*
688		 * Unless the receive event was imported from a listening
689		 * interface, in which case the event type is
690		 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
691		 */
692		INSIST(disp->recv_pending != 0);
693		disp->recv_pending = 0;
694	}
695
696	if (disp->shutting_down) {
697		/*
698		 * This dispatcher is shutting down.
699		 */
700		free_buffer(disp, ev->region.base, ev->region.length);
701
702		isc_event_free(&ev_in);
703		ev = NULL;
704
705		killit = destroy_disp_ok(disp);
706		UNLOCK(&disp->lock);
707		if (killit)
708			isc_task_send(disp->task, &disp->ctlevent);
709
710		return;
711	}
712
713	if (ev->result != ISC_R_SUCCESS) {
714		free_buffer(disp, ev->region.base, ev->region.length);
715
716		if (ev->result != ISC_R_CANCELED)
717			dispatch_log(disp, ISC_LOG_ERROR,
718				     "odd socket result in udp_recv(): %s",
719				     isc_result_totext(ev->result));
720
721		UNLOCK(&disp->lock);
722		isc_event_free(&ev_in);
723		return;
724	}
725
726	/*
727	 * If this is from a blackholed address, drop it.
728	 */
729	isc_netaddr_fromsockaddr(&netaddr, &ev->address);
730	if (disp->mgr->blackhole != NULL &&
731	    dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
732		    	  NULL, &match, NULL) == ISC_R_SUCCESS &&
733	    match > 0)
734	{
735		if (isc_log_wouldlog(dns_lctx, LVL(10))) {
736			char netaddrstr[ISC_NETADDR_FORMATSIZE];
737			isc_netaddr_format(&netaddr, netaddrstr,
738					   sizeof(netaddrstr));
739			dispatch_log(disp, LVL(10),
740				     "blackholed packet from %s",
741				     netaddrstr);
742		}
743		free_buffer(disp, ev->region.base, ev->region.length);
744		goto restart;
745	}
746
747	/*
748	 * Peek into the buffer to see what we can see.
749	 */
750	isc_buffer_init(&source, ev->region.base, ev->region.length);
751	isc_buffer_add(&source, ev->n);
752	dres = dns_message_peekheader(&source, &id, &flags);
753	if (dres != ISC_R_SUCCESS) {
754		free_buffer(disp, ev->region.base, ev->region.length);
755		dispatch_log(disp, LVL(10), "got garbage packet");
756		goto restart;
757	}
758
759	dispatch_log(disp, LVL(92),
760		     "got valid DNS message header, /QR %c, id %u",
761		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
762
763	/*
764	 * Look at flags.  If query, drop it. If response,
765	 * look to see where it goes.
766	 */
767	queue_response = ISC_FALSE;
768	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
769		/* query */
770		free_buffer(disp, ev->region.base, ev->region.length);
771		goto restart;
772	}
773
774	/* response */
775	bucket = dns_hash(qid, &ev->address, id, disp->localport);
776	LOCK(&qid->lock);
777	resp = bucket_search(qid, &ev->address, id, disp->localport, bucket);
778	dispatch_log(disp, LVL(90),
779		     "search for response in bucket %d: %s",
780		     bucket, (resp == NULL ? "not found" : "found"));
781
782	if (resp == NULL) {
783		free_buffer(disp, ev->region.base, ev->region.length);
784		goto unlock;
785	}
786
787	/*
788	 * Now that we have the original dispatch the query was sent
789	 * from check that the address and port the response was
790	 * sent to make sense.
791	 */
792	if (disp != resp->disp) {
793		isc_sockaddr_t a1;
794		isc_sockaddr_t a2;
795
796		/*
797		 * Check that the socket types and ports match.
798		 */
799		if (disp->socktype != resp->disp->socktype ||
800		    isc_sockaddr_getport(&disp->local) !=
801		    isc_sockaddr_getport(&resp->disp->local)) {
802			free_buffer(disp, ev->region.base, ev->region.length);
803			goto unlock;
804		}
805
806		/*
807		 * If both dispatches are bound to an address then fail as
808		 * the addresses can't be equal (enforced by the IP stack).
809		 *
810		 * Note under Linux a packet can be sent out via IPv4 socket
811		 * and the response be received via a IPv6 socket.
812		 *
813		 * Requests sent out via IPv6 should always come back in
814		 * via IPv6.
815		 */
816		if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 &&
817		    isc_sockaddr_pf(&disp->local) != PF_INET6) {
818			free_buffer(disp, ev->region.base, ev->region.length);
819			goto unlock;
820		}
821		isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local));
822		isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local));
823		if (!isc_sockaddr_eqaddr(&a1, &resp->disp->local) &&
824		    !isc_sockaddr_eqaddr(&a2, &disp->local)) {
825			free_buffer(disp, ev->region.base, ev->region.length);
826			goto unlock;
827		}
828	}
829
830	queue_response = resp->item_out;
831	rev = allocate_event(resp->disp);
832	if (rev == NULL) {
833		free_buffer(disp, ev->region.base, ev->region.length);
834		goto unlock;
835	}
836
837	/*
838	 * At this point, rev contains the event we want to fill in, and
839	 * resp contains the information on the place to send it to.
840	 * Send the event off.
841	 */
842	isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length);
843	isc_buffer_add(&rev->buffer, ev->n);
844	rev->result = ISC_R_SUCCESS;
845	rev->id = id;
846	rev->addr = ev->address;
847	rev->pktinfo = ev->pktinfo;
848	rev->attributes = ev->attributes;
849	if (queue_response) {
850		ISC_LIST_APPEND(resp->items, rev, ev_link);
851	} else {
852		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL,
853			       DNS_EVENT_DISPATCH,
854			       resp->action, resp->arg, resp, NULL, NULL);
855		request_log(disp, resp, LVL(90),
856			    "[a] Sent event %p buffer %p len %d to task %p",
857			    rev, rev->buffer.base, rev->buffer.length,
858			    resp->task);
859		resp->item_out = ISC_TRUE;
860		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
861	}
862 unlock:
863	UNLOCK(&qid->lock);
864
865	/*
866	 * Restart recv() to get the next packet.
867	 */
868 restart:
869	startrecv(disp);
870
871	UNLOCK(&disp->lock);
872
873	isc_event_free(&ev_in);
874}
875
876/*
877 * General flow:
878 *
879 * If I/O result == CANCELED, EOF, or error, notify everyone as the
880 * various queues drain.
881 *
882 * If query, restart.
883 *
884 * If response:
885 *	Allocate event, fill in details.
886 *		If cannot allocate, restart.
887 *	find target.  If not found, restart.
888 *	if event queue is not empty, queue.  else, send.
889 *	restart.
890 */
891static void
892tcp_recv(isc_task_t *task, isc_event_t *ev_in) {
893	dns_dispatch_t *disp = ev_in->ev_arg;
894	dns_tcpmsg_t *tcpmsg = &disp->tcpmsg;
895	dns_messageid_t id;
896	isc_result_t dres;
897	unsigned int flags;
898	dns_dispentry_t *resp;
899	dns_dispatchevent_t *rev;
900	unsigned int bucket;
901	isc_boolean_t killit;
902	isc_boolean_t queue_response;
903	dns_qid_t *qid;
904	int level;
905	char buf[ISC_SOCKADDR_FORMATSIZE];
906
907	UNUSED(task);
908
909	REQUIRE(VALID_DISPATCH(disp));
910
911	qid = disp->qid;
912
913	dispatch_log(disp, LVL(90),
914		     "got TCP packet: requests %d, buffers %d, recvs %d",
915		     disp->requests, disp->tcpbuffers, disp->recv_pending);
916
917	LOCK(&disp->lock);
918
919	INSIST(disp->recv_pending != 0);
920	disp->recv_pending = 0;
921
922	if (disp->refcount == 0) {
923		/*
924		 * This dispatcher is shutting down.  Force cancelation.
925		 */
926		tcpmsg->result = ISC_R_CANCELED;
927	}
928
929	if (tcpmsg->result != ISC_R_SUCCESS) {
930		switch (tcpmsg->result) {
931		case ISC_R_CANCELED:
932			break;
933
934		case ISC_R_EOF:
935			dispatch_log(disp, LVL(90), "shutting down on EOF");
936			do_cancel(disp);
937			break;
938
939		case ISC_R_CONNECTIONRESET:
940			level = ISC_LOG_INFO;
941			goto logit;
942
943		default:
944			level = ISC_LOG_ERROR;
945		logit:
946			isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf));
947			dispatch_log(disp, level, "shutting down due to TCP "
948				     "receive error: %s: %s", buf,
949				     isc_result_totext(tcpmsg->result));
950			do_cancel(disp);
951			break;
952		}
953
954		/*
955		 * The event is statically allocated in the tcpmsg
956		 * structure, and destroy_disp() frees the tcpmsg, so we must
957		 * free the event *before* calling destroy_disp().
958		 */
959		isc_event_free(&ev_in);
960
961		disp->shutting_down = 1;
962		disp->shutdown_why = tcpmsg->result;
963
964		/*
965		 * If the recv() was canceled pass the word on.
966		 */
967		killit = destroy_disp_ok(disp);
968		UNLOCK(&disp->lock);
969		if (killit)
970			isc_task_send(disp->task, &disp->ctlevent);
971		return;
972	}
973
974	dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p",
975		     tcpmsg->result,
976		     tcpmsg->buffer.length, tcpmsg->buffer.base);
977
978	/*
979	 * Peek into the buffer to see what we can see.
980	 */
981	dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags);
982	if (dres != ISC_R_SUCCESS) {
983		dispatch_log(disp, LVL(10), "got garbage packet");
984		goto restart;
985	}
986
987	dispatch_log(disp, LVL(92),
988		     "got valid DNS message header, /QR %c, id %u",
989		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
990
991	/*
992	 * Allocate an event to send to the query or response client, and
993	 * allocate a new buffer for our use.
994	 */
995
996	/*
997	 * Look at flags.  If query, drop it. If response,
998	 * look to see where it goes.
999	 */
1000	queue_response = ISC_FALSE;
1001	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
1002		/*
1003		 * Query.
1004		 */
1005		goto restart;
1006	}
1007
1008	/*
1009	 * Response.
1010	 */
1011	bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport);
1012	LOCK(&qid->lock);
1013	resp = bucket_search(qid, &tcpmsg->address, id, disp->localport,
1014			     bucket);
1015	dispatch_log(disp, LVL(90),
1016		     "search for response in bucket %d: %s",
1017		     bucket, (resp == NULL ? "not found" : "found"));
1018
1019	if (resp == NULL)
1020		goto unlock;
1021	queue_response = resp->item_out;
1022	rev = allocate_event(disp);
1023	if (rev == NULL)
1024		goto unlock;
1025
1026	/*
1027	 * At this point, rev contains the event we want to fill in, and
1028	 * resp contains the information on the place to send it to.
1029	 * Send the event off.
1030	 */
1031	dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer);
1032	disp->tcpbuffers++;
1033	rev->result = ISC_R_SUCCESS;
1034	rev->id = id;
1035	rev->addr = tcpmsg->address;
1036	if (queue_response) {
1037		ISC_LIST_APPEND(resp->items, rev, ev_link);
1038	} else {
1039		ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH,
1040			       resp->action, resp->arg, resp, NULL, NULL);
1041		request_log(disp, resp, LVL(90),
1042			    "[b] Sent event %p buffer %p len %d to task %p",
1043			    rev, rev->buffer.base, rev->buffer.length,
1044			    resp->task);
1045		resp->item_out = ISC_TRUE;
1046		isc_task_send(resp->task, ISC_EVENT_PTR(&rev));
1047	}
1048 unlock:
1049	UNLOCK(&qid->lock);
1050
1051	/*
1052	 * Restart recv() to get the next packet.
1053	 */
1054 restart:
1055	startrecv(disp);
1056
1057	UNLOCK(&disp->lock);
1058
1059	isc_event_free(&ev_in);
1060}
1061
1062/*
1063 * disp must be locked.
1064 */
1065static void
1066startrecv(dns_dispatch_t *disp) {
1067	isc_result_t res;
1068	isc_region_t region;
1069
1070	if (disp->shutting_down == 1)
1071		return;
1072
1073	if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1074		return;
1075
1076	if (disp->recv_pending != 0)
1077		return;
1078
1079	if (disp->mgr->buffers >= disp->mgr->maxbuffers)
1080		return;
1081
1082	switch (disp->socktype) {
1083		/*
1084		 * UDP reads are always maximal.
1085		 */
1086	case isc_sockettype_udp:
1087		region.length = disp->mgr->buffersize;
1088		region.base = allocate_udp_buffer(disp);
1089		if (region.base == NULL)
1090			return;
1091		res = isc_socket_recv(disp->socket, &region, 1,
1092				      disp->task, udp_recv, disp);
1093		if (res != ISC_R_SUCCESS) {
1094			free_buffer(disp, region.base, region.length);
1095			disp->shutdown_why = res;
1096			disp->shutting_down = 1;
1097			do_cancel(disp);
1098			return;
1099		}
1100		INSIST(disp->recv_pending == 0);
1101		disp->recv_pending = 1;
1102		break;
1103
1104	case isc_sockettype_tcp:
1105		res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task,
1106					     tcp_recv, disp);
1107		if (res != ISC_R_SUCCESS) {
1108			disp->shutdown_why = res;
1109			disp->shutting_down = 1;
1110			do_cancel(disp);
1111			return;
1112		}
1113		INSIST(disp->recv_pending == 0);
1114		disp->recv_pending = 1;
1115		break;
1116	default:
1117		INSIST(0);
1118		break;
1119	}
1120}
1121
1122/*
1123 * Mgr must be locked when calling this function.
1124 */
1125static isc_boolean_t
1126destroy_mgr_ok(dns_dispatchmgr_t *mgr) {
1127	mgr_log(mgr, LVL(90),
1128		"destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, "
1129		"epool=%d, rpool=%d, dpool=%d",
1130		MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list),
1131		isc_mempool_getallocated(mgr->epool),
1132		isc_mempool_getallocated(mgr->rpool),
1133		isc_mempool_getallocated(mgr->dpool));
1134	if (!MGR_IS_SHUTTINGDOWN(mgr))
1135		return (ISC_FALSE);
1136	if (!ISC_LIST_EMPTY(mgr->list))
1137		return (ISC_FALSE);
1138	if (isc_mempool_getallocated(mgr->epool) != 0)
1139		return (ISC_FALSE);
1140	if (isc_mempool_getallocated(mgr->rpool) != 0)
1141		return (ISC_FALSE);
1142	if (isc_mempool_getallocated(mgr->dpool) != 0)
1143		return (ISC_FALSE);
1144
1145	return (ISC_TRUE);
1146}
1147
1148/*
1149 * Mgr must be unlocked when calling this function.
1150 */
1151static void
1152destroy_mgr(dns_dispatchmgr_t **mgrp) {
1153	isc_mem_t *mctx;
1154	dns_dispatchmgr_t *mgr;
1155
1156	mgr = *mgrp;
1157	*mgrp = NULL;
1158
1159	mctx = mgr->mctx;
1160
1161	mgr->magic = 0;
1162	mgr->mctx = NULL;
1163	DESTROYLOCK(&mgr->lock);
1164	mgr->state = 0;
1165
1166	DESTROYLOCK(&mgr->arc4_lock);
1167
1168	isc_mempool_destroy(&mgr->epool);
1169	isc_mempool_destroy(&mgr->rpool);
1170	isc_mempool_destroy(&mgr->dpool);
1171	isc_mempool_destroy(&mgr->bpool);
1172
1173	DESTROYLOCK(&mgr->pool_lock);
1174
1175	if (mgr->entropy != NULL)
1176		isc_entropy_detach(&mgr->entropy);
1177	if (mgr->qid != NULL)
1178		qid_destroy(mctx, &mgr->qid);
1179
1180	DESTROYLOCK(&mgr->buffer_lock);
1181
1182	if (mgr->blackhole != NULL)
1183		dns_acl_detach(&mgr->blackhole);
1184
1185	if (mgr->portlist != NULL)
1186		dns_portlist_detach(&mgr->portlist);
1187
1188	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1189	isc_mem_detach(&mctx);
1190}
1191
1192static isc_result_t
1193create_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local,
1194	      unsigned int options, isc_socket_t **sockp)
1195{
1196	isc_socket_t *sock;
1197	isc_result_t result;
1198
1199	sock = NULL;
1200	result = isc_socket_create(mgr, isc_sockaddr_pf(local),
1201				   isc_sockettype_udp, &sock);
1202	if (result != ISC_R_SUCCESS)
1203		return (result);
1204
1205#ifndef ISC_ALLOW_MAPPED
1206	isc_socket_ipv6only(sock, ISC_TRUE);
1207#endif
1208	result = isc_socket_bind(sock, local, options);
1209	if (result != ISC_R_SUCCESS) {
1210		isc_socket_detach(&sock);
1211		return (result);
1212	}
1213
1214	*sockp = sock;
1215	return (ISC_R_SUCCESS);
1216}
1217
1218/*
1219 * Publics.
1220 */
1221
1222isc_result_t
1223dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy,
1224		       dns_dispatchmgr_t **mgrp)
1225{
1226	dns_dispatchmgr_t *mgr;
1227	isc_result_t result;
1228
1229	REQUIRE(mctx != NULL);
1230	REQUIRE(mgrp != NULL && *mgrp == NULL);
1231
1232	mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t));
1233	if (mgr == NULL)
1234		return (ISC_R_NOMEMORY);
1235
1236	mgr->mctx = NULL;
1237	isc_mem_attach(mctx, &mgr->mctx);
1238
1239	mgr->blackhole = NULL;
1240	mgr->portlist = NULL;
1241
1242	result = isc_mutex_init(&mgr->lock);
1243	if (result != ISC_R_SUCCESS)
1244		goto deallocate;
1245
1246	result = isc_mutex_init(&mgr->arc4_lock);
1247	if (result != ISC_R_SUCCESS)
1248		goto kill_lock;
1249
1250	result = isc_mutex_init(&mgr->buffer_lock);
1251	if (result != ISC_R_SUCCESS)
1252		goto kill_arc4_lock;
1253
1254	result = isc_mutex_init(&mgr->pool_lock);
1255	if (result != ISC_R_SUCCESS)
1256		goto kill_buffer_lock;
1257
1258	mgr->epool = NULL;
1259	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t),
1260			       &mgr->epool) != ISC_R_SUCCESS) {
1261		result = ISC_R_NOMEMORY;
1262		goto kill_pool_lock;
1263	}
1264
1265	mgr->rpool = NULL;
1266	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t),
1267			       &mgr->rpool) != ISC_R_SUCCESS) {
1268		result = ISC_R_NOMEMORY;
1269		goto kill_epool;
1270	}
1271
1272	mgr->dpool = NULL;
1273	if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t),
1274			       &mgr->dpool) != ISC_R_SUCCESS) {
1275		result = ISC_R_NOMEMORY;
1276		goto kill_rpool;
1277	}
1278
1279	isc_mempool_setname(mgr->epool, "dispmgr_epool");
1280	isc_mempool_setfreemax(mgr->epool, 1024);
1281	isc_mempool_associatelock(mgr->epool, &mgr->pool_lock);
1282
1283	isc_mempool_setname(mgr->rpool, "dispmgr_rpool");
1284	isc_mempool_setfreemax(mgr->rpool, 1024);
1285	isc_mempool_associatelock(mgr->rpool, &mgr->pool_lock);
1286
1287	isc_mempool_setname(mgr->dpool, "dispmgr_dpool");
1288	isc_mempool_setfreemax(mgr->dpool, 1024);
1289	isc_mempool_associatelock(mgr->dpool, &mgr->pool_lock);
1290
1291	mgr->buffers = 0;
1292	mgr->buffersize = 0;
1293	mgr->maxbuffers = 0;
1294	mgr->bpool = NULL;
1295	mgr->entropy = NULL;
1296	mgr->qid = NULL;
1297	mgr->state = 0;
1298	ISC_LIST_INIT(mgr->list);
1299	mgr->magic = DNS_DISPATCHMGR_MAGIC;
1300
1301	if (entropy != NULL)
1302		isc_entropy_attach(entropy, &mgr->entropy);
1303
1304	dispatch_arc4init(&mgr->arc4ctx);
1305
1306	*mgrp = mgr;
1307	return (ISC_R_SUCCESS);
1308
1309 kill_rpool:
1310	isc_mempool_destroy(&mgr->rpool);
1311 kill_epool:
1312	isc_mempool_destroy(&mgr->epool);
1313 kill_pool_lock:
1314	DESTROYLOCK(&mgr->pool_lock);
1315 kill_buffer_lock:
1316	DESTROYLOCK(&mgr->buffer_lock);
1317 kill_arc4_lock:
1318	DESTROYLOCK(&mgr->arc4_lock);
1319 kill_lock:
1320	DESTROYLOCK(&mgr->lock);
1321 deallocate:
1322	isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t));
1323	isc_mem_detach(&mctx);
1324
1325	return (result);
1326}
1327
1328void
1329dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) {
1330	REQUIRE(VALID_DISPATCHMGR(mgr));
1331	if (mgr->blackhole != NULL)
1332		dns_acl_detach(&mgr->blackhole);
1333	dns_acl_attach(blackhole, &mgr->blackhole);
1334}
1335
1336dns_acl_t *
1337dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) {
1338	REQUIRE(VALID_DISPATCHMGR(mgr));
1339	return (mgr->blackhole);
1340}
1341
1342void
1343dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr,
1344				 dns_portlist_t *portlist)
1345{
1346	REQUIRE(VALID_DISPATCHMGR(mgr));
1347	if (mgr->portlist != NULL)
1348		dns_portlist_detach(&mgr->portlist);
1349	if (portlist != NULL)
1350		dns_portlist_attach(portlist, &mgr->portlist);
1351}
1352
1353dns_portlist_t *
1354dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) {
1355	REQUIRE(VALID_DISPATCHMGR(mgr));
1356	return (mgr->portlist);
1357}
1358
1359static isc_result_t
1360dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr,
1361			unsigned int buffersize, unsigned int maxbuffers,
1362			unsigned int buckets, unsigned int increment)
1363{
1364	isc_result_t result;
1365
1366	REQUIRE(VALID_DISPATCHMGR(mgr));
1367	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1368	REQUIRE(maxbuffers > 0);
1369	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
1370	REQUIRE(increment > buckets);
1371
1372	/*
1373	 * Keep some number of items around.  This should be a config
1374	 * option.  For now, keep 8, but later keep at least two even
1375	 * if the caller wants less.  This allows us to ensure certain
1376	 * things, like an event can be "freed" and the next allocation
1377	 * will always succeed.
1378	 *
1379	 * Note that if limits are placed on anything here, we use one
1380	 * event internally, so the actual limit should be "wanted + 1."
1381	 *
1382	 * XXXMLG
1383	 */
1384
1385	if (maxbuffers < 8)
1386		maxbuffers = 8;
1387
1388	LOCK(&mgr->buffer_lock);
1389	if (mgr->bpool != NULL) {
1390		isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1391		mgr->maxbuffers = maxbuffers;
1392		UNLOCK(&mgr->buffer_lock);
1393		return (ISC_R_SUCCESS);
1394	}
1395
1396	if (isc_mempool_create(mgr->mctx, buffersize,
1397			       &mgr->bpool) != ISC_R_SUCCESS) {
1398		UNLOCK(&mgr->buffer_lock);
1399		return (ISC_R_NOMEMORY);
1400	}
1401
1402	isc_mempool_setname(mgr->bpool, "dispmgr_bpool");
1403	isc_mempool_setmaxalloc(mgr->bpool, maxbuffers);
1404	isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock);
1405
1406	result = qid_allocate(mgr, buckets, increment, &mgr->qid);
1407	if (result != ISC_R_SUCCESS)
1408		goto cleanup;
1409
1410	mgr->buffersize = buffersize;
1411	mgr->maxbuffers = maxbuffers;
1412	UNLOCK(&mgr->buffer_lock);
1413	return (ISC_R_SUCCESS);
1414
1415 cleanup:
1416	isc_mempool_destroy(&mgr->bpool);
1417	UNLOCK(&mgr->buffer_lock);
1418	return (ISC_R_NOMEMORY);
1419}
1420
1421void
1422dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) {
1423	dns_dispatchmgr_t *mgr;
1424	isc_boolean_t killit;
1425
1426	REQUIRE(mgrp != NULL);
1427	REQUIRE(VALID_DISPATCHMGR(*mgrp));
1428
1429	mgr = *mgrp;
1430	*mgrp = NULL;
1431
1432	LOCK(&mgr->lock);
1433	mgr->state |= MGR_SHUTTINGDOWN;
1434
1435	killit = destroy_mgr_ok(mgr);
1436	UNLOCK(&mgr->lock);
1437
1438	mgr_log(mgr, LVL(90), "destroy: killit=%d", killit);
1439
1440	if (killit)
1441		destroy_mgr(&mgr);
1442}
1443
1444static isc_boolean_t
1445blacklisted(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1446	    isc_sockaddr_t *sockaddrp)
1447{
1448	isc_sockaddr_t sockaddr;
1449	isc_result_t result;
1450
1451	REQUIRE(sock != NULL || sockaddrp != NULL);
1452
1453	if (mgr->portlist == NULL)
1454		return (ISC_FALSE);
1455
1456	if (sock != NULL) {
1457		sockaddrp = &sockaddr;
1458		result = isc_socket_getsockname(sock, sockaddrp);
1459		if (result != ISC_R_SUCCESS)
1460			return (ISC_FALSE);
1461	}
1462
1463	if (mgr->portlist != NULL &&
1464	    dns_portlist_match(mgr->portlist, isc_sockaddr_pf(sockaddrp),
1465			       isc_sockaddr_getport(sockaddrp)))
1466		return (ISC_TRUE);
1467	return (ISC_FALSE);
1468}
1469
1470#define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask)))
1471
1472static isc_boolean_t
1473local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) {
1474	isc_sockaddr_t sockaddr;
1475	isc_result_t result;
1476
1477	if (addr == NULL)
1478		return (ISC_TRUE);
1479
1480	/*
1481	 * Don't match wildcard ports against newly blacklisted ports.
1482	 */
1483	if (disp->mgr->portlist != NULL &&
1484	    isc_sockaddr_getport(addr) == 0 &&
1485	    isc_sockaddr_getport(&disp->local) == 0 &&
1486	    blacklisted(disp->mgr, disp->socket, NULL))
1487		return (ISC_FALSE);
1488
1489	/*
1490	 * Check if we match the binding <address,port>.
1491	 * Wildcard ports match/fail here.
1492	 */
1493	if (isc_sockaddr_equal(&disp->local, addr))
1494		return (ISC_TRUE);
1495	if (isc_sockaddr_getport(addr) == 0)
1496		return (ISC_FALSE);
1497
1498	/*
1499	 * Check if we match a bound wildcard port <address,port>.
1500	 */
1501	if (!isc_sockaddr_eqaddr(&disp->local, addr))
1502		return (ISC_FALSE);
1503	result = isc_socket_getsockname(disp->socket, &sockaddr);
1504	if (result != ISC_R_SUCCESS)
1505		return (ISC_FALSE);
1506
1507	return (isc_sockaddr_equal(&sockaddr, addr));
1508}
1509
1510/*
1511 * Requires mgr be locked.
1512 *
1513 * No dispatcher can be locked by this thread when calling this function.
1514 *
1515 *
1516 * NOTE:
1517 *	If a matching dispatcher is found, it is locked after this function
1518 *	returns, and must be unlocked by the caller.
1519 */
1520static isc_result_t
1521dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local,
1522	      unsigned int attributes, unsigned int mask,
1523	      dns_dispatch_t **dispp)
1524{
1525	dns_dispatch_t *disp;
1526	isc_result_t result;
1527
1528	/*
1529	 * Make certain that we will not match a private dispatch.
1530	 */
1531	attributes &= ~DNS_DISPATCHATTR_PRIVATE;
1532	mask |= DNS_DISPATCHATTR_PRIVATE;
1533
1534	disp = ISC_LIST_HEAD(mgr->list);
1535	while (disp != NULL) {
1536		LOCK(&disp->lock);
1537		if ((disp->shutting_down == 0)
1538		    && ATTRMATCH(disp->attributes, attributes, mask)
1539		    && local_addr_match(disp, local))
1540			break;
1541		UNLOCK(&disp->lock);
1542		disp = ISC_LIST_NEXT(disp, link);
1543	}
1544
1545	if (disp == NULL) {
1546		result = ISC_R_NOTFOUND;
1547		goto out;
1548	}
1549
1550	*dispp = disp;
1551	result = ISC_R_SUCCESS;
1552 out:
1553
1554	return (result);
1555}
1556
1557static isc_result_t
1558qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
1559	     unsigned int increment, dns_qid_t **qidp)
1560{
1561	dns_qid_t *qid;
1562	unsigned int i;
1563	isc_result_t result;
1564
1565	REQUIRE(VALID_DISPATCHMGR(mgr));
1566	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
1567	REQUIRE(increment > buckets);
1568	REQUIRE(qidp != NULL && *qidp == NULL);
1569
1570	qid = isc_mem_get(mgr->mctx, sizeof(*qid));
1571	if (qid == NULL)
1572		return (ISC_R_NOMEMORY);
1573
1574	qid->qid_table = isc_mem_get(mgr->mctx,
1575				     buckets * sizeof(dns_displist_t));
1576	if (qid->qid_table == NULL) {
1577		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1578		return (ISC_R_NOMEMORY);
1579	}
1580
1581	result = isc_mutex_init(&qid->lock);
1582	if (result != ISC_R_SUCCESS) {
1583		isc_mem_put(mgr->mctx, qid->qid_table,
1584			    buckets * sizeof(dns_displist_t));
1585		isc_mem_put(mgr->mctx, qid, sizeof(*qid));
1586		return (result);
1587	}
1588
1589	for (i = 0; i < buckets; i++)
1590		ISC_LIST_INIT(qid->qid_table[i]);
1591
1592	qid->qid_nbuckets = buckets;
1593	qid->qid_increment = increment;
1594	qid->magic = QID_MAGIC;
1595	*qidp = qid;
1596	return (ISC_R_SUCCESS);
1597}
1598
1599static void
1600qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) {
1601	dns_qid_t *qid;
1602
1603	REQUIRE(qidp != NULL);
1604	qid = *qidp;
1605
1606	REQUIRE(VALID_QID(qid));
1607
1608	*qidp = NULL;
1609	qid->magic = 0;
1610	isc_mem_put(mctx, qid->qid_table,
1611		    qid->qid_nbuckets * sizeof(dns_displist_t));
1612	DESTROYLOCK(&qid->lock);
1613	isc_mem_put(mctx, qid, sizeof(*qid));
1614}
1615
1616/*
1617 * Allocate and set important limits.
1618 */
1619static isc_result_t
1620dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests,
1621		  dns_dispatch_t **dispp)
1622{
1623	dns_dispatch_t *disp;
1624	isc_result_t result;
1625
1626	REQUIRE(VALID_DISPATCHMGR(mgr));
1627	REQUIRE(dispp != NULL && *dispp == NULL);
1628
1629	/*
1630	 * Set up the dispatcher, mostly.  Don't bother setting some of
1631	 * the options that are controlled by tcp vs. udp, etc.
1632	 */
1633
1634	disp = isc_mempool_get(mgr->dpool);
1635	if (disp == NULL)
1636		return (ISC_R_NOMEMORY);
1637
1638	disp->magic = 0;
1639	disp->mgr = mgr;
1640	disp->maxrequests = maxrequests;
1641	disp->attributes = 0;
1642	ISC_LINK_INIT(disp, link);
1643	disp->refcount = 1;
1644	disp->recv_pending = 0;
1645	memset(&disp->local, 0, sizeof(disp->local));
1646	disp->localport = 0;
1647	disp->shutting_down = 0;
1648	disp->shutdown_out = 0;
1649	disp->connected = 0;
1650	disp->tcpmsg_valid = 0;
1651	disp->shutdown_why = ISC_R_UNEXPECTED;
1652	disp->requests = 0;
1653	disp->tcpbuffers = 0;
1654	disp->qid = NULL;
1655
1656	result = isc_mutex_init(&disp->lock);
1657	if (result != ISC_R_SUCCESS)
1658		goto deallocate;
1659
1660	disp->failsafe_ev = allocate_event(disp);
1661	if (disp->failsafe_ev == NULL) {
1662		result = ISC_R_NOMEMORY;
1663		goto kill_lock;
1664	}
1665
1666	disp->magic = DISPATCH_MAGIC;
1667
1668	*dispp = disp;
1669	return (ISC_R_SUCCESS);
1670
1671	/*
1672	 * error returns
1673	 */
1674 kill_lock:
1675	DESTROYLOCK(&disp->lock);
1676 deallocate:
1677	isc_mempool_put(mgr->dpool, disp);
1678
1679	return (result);
1680}
1681
1682
1683/*
1684 * MUST be unlocked, and not used by anthing.
1685 */
1686static void
1687dispatch_free(dns_dispatch_t **dispp)
1688{
1689	dns_dispatch_t *disp;
1690	dns_dispatchmgr_t *mgr;
1691
1692	REQUIRE(VALID_DISPATCH(*dispp));
1693	disp = *dispp;
1694	*dispp = NULL;
1695
1696	mgr = disp->mgr;
1697	REQUIRE(VALID_DISPATCHMGR(mgr));
1698
1699	if (disp->tcpmsg_valid) {
1700		dns_tcpmsg_invalidate(&disp->tcpmsg);
1701		disp->tcpmsg_valid = 0;
1702	}
1703
1704	INSIST(disp->tcpbuffers == 0);
1705	INSIST(disp->requests == 0);
1706	INSIST(disp->recv_pending == 0);
1707
1708	isc_mempool_put(mgr->epool, disp->failsafe_ev);
1709	disp->failsafe_ev = NULL;
1710
1711	if (disp->qid != NULL)
1712		qid_destroy(mgr->mctx, &disp->qid);
1713	disp->mgr = NULL;
1714	DESTROYLOCK(&disp->lock);
1715	disp->magic = 0;
1716	isc_mempool_put(mgr->dpool, disp);
1717}
1718
1719isc_result_t
1720dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
1721		       isc_taskmgr_t *taskmgr, unsigned int buffersize,
1722		       unsigned int maxbuffers, unsigned int maxrequests,
1723		       unsigned int buckets, unsigned int increment,
1724		       unsigned int attributes, dns_dispatch_t **dispp)
1725{
1726	isc_result_t result;
1727	dns_dispatch_t *disp;
1728
1729	UNUSED(maxbuffers);
1730	UNUSED(buffersize);
1731
1732	REQUIRE(VALID_DISPATCHMGR(mgr));
1733	REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp);
1734	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0);
1735	REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0);
1736
1737	attributes |= DNS_DISPATCHATTR_PRIVATE;  /* XXXMLG */
1738
1739	LOCK(&mgr->lock);
1740
1741	/*
1742	 * dispatch_allocate() checks mgr for us.
1743	 * qid_allocate() checks buckets and increment for us.
1744	 */
1745	disp = NULL;
1746	result = dispatch_allocate(mgr, maxrequests, &disp);
1747	if (result != ISC_R_SUCCESS) {
1748		UNLOCK(&mgr->lock);
1749		return (result);
1750	}
1751
1752	result = qid_allocate(mgr, buckets, increment, &disp->qid);
1753	if (result != ISC_R_SUCCESS)
1754		goto deallocate_dispatch;
1755
1756	disp->socktype = isc_sockettype_tcp;
1757	disp->socket = NULL;
1758	isc_socket_attach(sock, &disp->socket);
1759
1760	disp->task = NULL;
1761	result = isc_task_create(taskmgr, 0, &disp->task);
1762	if (result != ISC_R_SUCCESS)
1763		goto kill_socket;
1764
1765	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1766					    DNS_EVENT_DISPATCHCONTROL,
1767					    destroy_disp, disp,
1768					    sizeof(isc_event_t));
1769	if (disp->ctlevent == NULL) {
1770		result = ISC_R_NOMEMORY;
1771		goto kill_task;
1772	}
1773
1774	isc_task_setname(disp->task, "tcpdispatch", disp);
1775
1776	dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg);
1777	disp->tcpmsg_valid = 1;
1778
1779	disp->attributes = attributes;
1780
1781	/*
1782	 * Append it to the dispatcher list.
1783	 */
1784	ISC_LIST_APPEND(mgr->list, disp, link);
1785	UNLOCK(&mgr->lock);
1786
1787	mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp);
1788	dispatch_log(disp, LVL(90), "created task %p", disp->task);
1789
1790	*dispp = disp;
1791
1792	return (ISC_R_SUCCESS);
1793
1794	/*
1795	 * Error returns.
1796	 */
1797 kill_task:
1798	isc_task_detach(&disp->task);
1799 kill_socket:
1800	isc_socket_detach(&disp->socket);
1801 deallocate_dispatch:
1802	dispatch_free(&disp);
1803
1804	UNLOCK(&mgr->lock);
1805
1806	return (result);
1807}
1808
1809isc_result_t
1810dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1811		    isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr,
1812		    unsigned int buffersize,
1813		    unsigned int maxbuffers, unsigned int maxrequests,
1814		    unsigned int buckets, unsigned int increment,
1815		    unsigned int attributes, unsigned int mask,
1816		    dns_dispatch_t **dispp)
1817{
1818	isc_result_t result;
1819	dns_dispatch_t *disp = NULL;
1820
1821	REQUIRE(VALID_DISPATCHMGR(mgr));
1822	REQUIRE(sockmgr != NULL);
1823	REQUIRE(localaddr != NULL);
1824	REQUIRE(taskmgr != NULL);
1825	REQUIRE(buffersize >= 512 && buffersize < (64 * 1024));
1826	REQUIRE(maxbuffers > 0);
1827	REQUIRE(buckets < 2097169);  /* next prime > 65536 * 32 */
1828	REQUIRE(increment > buckets);
1829	REQUIRE(dispp != NULL && *dispp == NULL);
1830	REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0);
1831
1832	result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers,
1833					buckets, increment);
1834	if (result != ISC_R_SUCCESS)
1835		return (result);
1836
1837	LOCK(&mgr->lock);
1838
1839	if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1840		REQUIRE(isc_sockaddr_getport(localaddr) == 0);
1841		goto createudp;
1842	}
1843
1844	/*
1845	 * First, see if we have a dispatcher that matches.
1846	 */
1847	disp = NULL;
1848	result = dispatch_find(mgr, localaddr, attributes, mask, &disp);
1849	if (result == ISC_R_SUCCESS) {
1850		disp->refcount++;
1851
1852		if (disp->maxrequests < maxrequests)
1853			disp->maxrequests = maxrequests;
1854
1855		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 &&
1856		    (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0)
1857		{
1858			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
1859			if (disp->recv_pending != 0)
1860				isc_socket_cancel(disp->socket, disp->task,
1861						  ISC_SOCKCANCEL_RECV);
1862		}
1863
1864		UNLOCK(&disp->lock);
1865		UNLOCK(&mgr->lock);
1866
1867		*dispp = disp;
1868
1869		return (ISC_R_SUCCESS);
1870	}
1871
1872 createudp:
1873	/*
1874	 * Nope, create one.
1875	 */
1876	result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr,
1877				    maxrequests, attributes, &disp);
1878	if (result != ISC_R_SUCCESS) {
1879		UNLOCK(&mgr->lock);
1880		return (result);
1881	}
1882
1883	UNLOCK(&mgr->lock);
1884	*dispp = disp;
1885	return (ISC_R_SUCCESS);
1886}
1887
1888/*
1889 * mgr should be locked.
1890 */
1891
1892#ifndef DNS_DISPATCH_HELD
1893#define DNS_DISPATCH_HELD 20U
1894#endif
1895
1896static isc_result_t
1897dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr,
1898		   isc_taskmgr_t *taskmgr,
1899		   isc_sockaddr_t *localaddr,
1900		   unsigned int maxrequests,
1901		   unsigned int attributes,
1902		   dns_dispatch_t **dispp)
1903{
1904	isc_result_t result;
1905	dns_dispatch_t *disp;
1906	isc_socket_t *sock = NULL;
1907	isc_socket_t *held[DNS_DISPATCH_HELD];
1908	unsigned int i = 0, j = 0, k = 0;
1909	isc_sockaddr_t localaddr_bound;
1910	in_port_t localport = 0;
1911
1912	/*
1913	 * dispatch_allocate() checks mgr for us.
1914	 */
1915	disp = NULL;
1916	result = dispatch_allocate(mgr, maxrequests, &disp);
1917	if (result != ISC_R_SUCCESS)
1918		return (result);
1919
1920	/*
1921	 * Try to allocate a socket that is not on the blacklist.
1922	 * Hold up to DNS_DISPATCH_HELD sockets to prevent the OS
1923	 * from returning the same port to us too quickly.
1924	 */
1925	memset(held, 0, sizeof(held));
1926	localaddr_bound = *localaddr;
1927 getsocket:
1928	if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) {
1929		in_port_t prt;
1930
1931		/* XXX: should the range be configurable? */
1932		prt = 1024 + dispatch_arc4uniformrandom(mgr, 65535 - 1023);
1933		isc_sockaddr_setport(&localaddr_bound, prt);
1934		if (blacklisted(mgr, NULL, &localaddr_bound)) {
1935			if (++k == 1024)
1936				attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1937			goto getsocket;
1938		}
1939		result = create_socket(sockmgr, &localaddr_bound, 0, &sock);
1940		if (result == ISC_R_ADDRINUSE) {
1941			if (++k == 1024)
1942				attributes &= ~DNS_DISPATCHATTR_RANDOMPORT;
1943			goto getsocket;
1944		}
1945		localport = prt;
1946	} else
1947		result = create_socket(sockmgr, localaddr,
1948				       ISC_SOCKET_REUSEADDRESS, &sock);
1949	if (result != ISC_R_SUCCESS)
1950		goto deallocate_dispatch;
1951	if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) == 0 &&
1952	    isc_sockaddr_getport(localaddr) == 0 &&
1953	    blacklisted(mgr, sock, NULL))
1954	{
1955		if (held[i] != NULL)
1956			isc_socket_detach(&held[i]);
1957		held[i++] = sock;
1958		sock = NULL;
1959		if (i == DNS_DISPATCH_HELD)
1960			i = 0;
1961		if (j++ == 0xffffU) {
1962			mgr_log(mgr, ISC_LOG_ERROR, "avoid-v%s-udp-ports: "
1963				"unable to allocate a non-blacklisted port",
1964				isc_sockaddr_pf(localaddr) == AF_INET ?
1965					"4" : "6");
1966			result = ISC_R_FAILURE;
1967			goto deallocate_dispatch;
1968		}
1969		goto getsocket;
1970	}
1971
1972	disp->socktype = isc_sockettype_udp;
1973	disp->socket = sock;
1974	disp->local = *localaddr;
1975	disp->localport = localport;
1976
1977	disp->task = NULL;
1978	result = isc_task_create(taskmgr, 0, &disp->task);
1979	if (result != ISC_R_SUCCESS)
1980		goto kill_socket;
1981
1982	disp->ctlevent = isc_event_allocate(mgr->mctx, disp,
1983					    DNS_EVENT_DISPATCHCONTROL,
1984					    destroy_disp, disp,
1985					    sizeof(isc_event_t));
1986	if (disp->ctlevent == NULL) {
1987		result = ISC_R_NOMEMORY;
1988		goto kill_task;
1989	}
1990
1991	isc_task_setname(disp->task, "udpdispatch", disp);
1992
1993	attributes &= ~DNS_DISPATCHATTR_TCP;
1994	attributes |= DNS_DISPATCHATTR_UDP;
1995	disp->attributes = attributes;
1996
1997	/*
1998	 * Append it to the dispatcher list.
1999	 */
2000	ISC_LIST_APPEND(mgr->list, disp, link);
2001
2002	mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp);
2003	dispatch_log(disp, LVL(90), "created task %p", disp->task);
2004	dispatch_log(disp, LVL(90), "created socket %p", disp->socket);
2005
2006	*dispp = disp;
2007
2008	goto cleanheld;
2009
2010	/*
2011	 * Error returns.
2012	 */
2013 kill_task:
2014	isc_task_detach(&disp->task);
2015 kill_socket:
2016	isc_socket_detach(&disp->socket);
2017 deallocate_dispatch:
2018	dispatch_free(&disp);
2019 cleanheld:
2020	for (i = 0; i < DNS_DISPATCH_HELD; i++)
2021		if (held[i] != NULL)
2022			isc_socket_detach(&held[i]);
2023	return (result);
2024}
2025
2026void
2027dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) {
2028	REQUIRE(VALID_DISPATCH(disp));
2029	REQUIRE(dispp != NULL && *dispp == NULL);
2030
2031	LOCK(&disp->lock);
2032	disp->refcount++;
2033	UNLOCK(&disp->lock);
2034
2035	*dispp = disp;
2036}
2037
2038/*
2039 * It is important to lock the manager while we are deleting the dispatch,
2040 * since dns_dispatch_getudp will call dispatch_find, which returns to
2041 * the caller a dispatch but does not attach to it until later.  _getudp
2042 * locks the manager, however, so locking it here will keep us from attaching
2043 * to a dispatcher that is in the process of going away.
2044 */
2045void
2046dns_dispatch_detach(dns_dispatch_t **dispp) {
2047	dns_dispatch_t *disp;
2048	isc_boolean_t killit;
2049
2050	REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp));
2051
2052	disp = *dispp;
2053	*dispp = NULL;
2054
2055	LOCK(&disp->lock);
2056
2057	INSIST(disp->refcount > 0);
2058	disp->refcount--;
2059	killit = ISC_FALSE;
2060	if (disp->refcount == 0) {
2061		if (disp->recv_pending > 0)
2062			isc_socket_cancel(disp->socket, disp->task,
2063					  ISC_SOCKCANCEL_RECV);
2064		disp->shutting_down = 1;
2065	}
2066
2067	dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount);
2068
2069	killit = destroy_disp_ok(disp);
2070	UNLOCK(&disp->lock);
2071	if (killit)
2072		isc_task_send(disp->task, &disp->ctlevent);
2073}
2074
2075isc_result_t
2076dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest,
2077			 isc_task_t *task, isc_taskaction_t action, void *arg,
2078			 dns_messageid_t *idp, dns_dispentry_t **resp)
2079{
2080	dns_dispentry_t *res;
2081	unsigned int bucket;
2082	dns_messageid_t id;
2083	int i;
2084	isc_boolean_t ok;
2085	dns_qid_t *qid;
2086
2087	REQUIRE(VALID_DISPATCH(disp));
2088	REQUIRE(task != NULL);
2089	REQUIRE(dest != NULL);
2090	REQUIRE(resp != NULL && *resp == NULL);
2091	REQUIRE(idp != NULL);
2092
2093	LOCK(&disp->lock);
2094
2095	if (disp->shutting_down == 1) {
2096		UNLOCK(&disp->lock);
2097		return (ISC_R_SHUTTINGDOWN);
2098	}
2099
2100	if (disp->requests >= disp->maxrequests) {
2101		UNLOCK(&disp->lock);
2102		return (ISC_R_QUOTA);
2103	}
2104
2105	/*
2106	 * Try somewhat hard to find an unique ID.
2107	 */
2108	id = (dns_messageid_t)dispatch_arc4random(disp->mgr);
2109	qid = DNS_QID(disp);
2110	LOCK(&qid->lock);
2111	bucket = dns_hash(qid, dest, id, disp->localport);
2112	ok = ISC_FALSE;
2113	for (i = 0; i < 64; i++) {
2114		if (bucket_search(qid, dest, id, disp->localport, bucket) ==
2115		    NULL) {
2116			ok = ISC_TRUE;
2117			break;
2118		}
2119		id += qid->qid_increment;
2120		id &= 0x0000ffff;
2121		bucket = dns_hash(qid, dest, id, disp->localport);
2122	}
2123
2124	if (!ok) {
2125		UNLOCK(&qid->lock);
2126		UNLOCK(&disp->lock);
2127		return (ISC_R_NOMORE);
2128	}
2129
2130	res = isc_mempool_get(disp->mgr->rpool);
2131	if (res == NULL) {
2132		UNLOCK(&qid->lock);
2133		UNLOCK(&disp->lock);
2134		return (ISC_R_NOMEMORY);
2135	}
2136
2137	disp->refcount++;
2138	disp->requests++;
2139	res->task = NULL;
2140	isc_task_attach(task, &res->task);
2141	res->disp = disp;
2142	res->id = id;
2143	res->port = disp->localport;
2144	res->bucket = bucket;
2145	res->host = *dest;
2146	res->action = action;
2147	res->arg = arg;
2148	res->item_out = ISC_FALSE;
2149	ISC_LIST_INIT(res->items);
2150	ISC_LINK_INIT(res, link);
2151	res->magic = RESPONSE_MAGIC;
2152	ISC_LIST_APPEND(qid->qid_table[bucket], res, link);
2153	UNLOCK(&qid->lock);
2154
2155	request_log(disp, res, LVL(90),
2156		    "attached to task %p", res->task);
2157
2158	if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) ||
2159	    ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0))
2160		startrecv(disp);
2161
2162	UNLOCK(&disp->lock);
2163
2164	*idp = id;
2165	*resp = res;
2166
2167	return (ISC_R_SUCCESS);
2168}
2169
2170void
2171dns_dispatch_starttcp(dns_dispatch_t *disp) {
2172
2173	REQUIRE(VALID_DISPATCH(disp));
2174
2175	dispatch_log(disp, LVL(90), "starttcp %p", disp->task);
2176
2177	LOCK(&disp->lock);
2178	disp->attributes |= DNS_DISPATCHATTR_CONNECTED;
2179	startrecv(disp);
2180	UNLOCK(&disp->lock);
2181}
2182
2183void
2184dns_dispatch_removeresponse(dns_dispentry_t **resp,
2185			    dns_dispatchevent_t **sockevent)
2186{
2187	dns_dispatchmgr_t *mgr;
2188	dns_dispatch_t *disp;
2189	dns_dispentry_t *res;
2190	dns_dispatchevent_t *ev;
2191	unsigned int bucket;
2192	isc_boolean_t killit;
2193	unsigned int n;
2194	isc_eventlist_t events;
2195	dns_qid_t *qid;
2196
2197	REQUIRE(resp != NULL);
2198	REQUIRE(VALID_RESPONSE(*resp));
2199
2200	res = *resp;
2201	*resp = NULL;
2202
2203	disp = res->disp;
2204	REQUIRE(VALID_DISPATCH(disp));
2205	mgr = disp->mgr;
2206	REQUIRE(VALID_DISPATCHMGR(mgr));
2207
2208	qid = DNS_QID(disp);
2209
2210	if (sockevent != NULL) {
2211		REQUIRE(*sockevent != NULL);
2212		ev = *sockevent;
2213		*sockevent = NULL;
2214	} else {
2215		ev = NULL;
2216	}
2217
2218	LOCK(&disp->lock);
2219
2220	INSIST(disp->requests > 0);
2221	disp->requests--;
2222	INSIST(disp->refcount > 0);
2223	disp->refcount--;
2224	killit = ISC_FALSE;
2225	if (disp->refcount == 0) {
2226		if (disp->recv_pending > 0)
2227			isc_socket_cancel(disp->socket, disp->task,
2228					  ISC_SOCKCANCEL_RECV);
2229		disp->shutting_down = 1;
2230	}
2231
2232	bucket = res->bucket;
2233
2234	LOCK(&qid->lock);
2235	ISC_LIST_UNLINK(qid->qid_table[bucket], res, link);
2236	UNLOCK(&qid->lock);
2237
2238	if (ev == NULL && res->item_out) {
2239		/*
2240		 * We've posted our event, but the caller hasn't gotten it
2241		 * yet.  Take it back.
2242		 */
2243		ISC_LIST_INIT(events);
2244		n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH,
2245				    NULL, &events);
2246		/*
2247		 * We had better have gotten it back.
2248		 */
2249		INSIST(n == 1);
2250		ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events);
2251	}
2252
2253	if (ev != NULL) {
2254		REQUIRE(res->item_out == ISC_TRUE);
2255		res->item_out = ISC_FALSE;
2256		if (ev->buffer.base != NULL)
2257			free_buffer(disp, ev->buffer.base, ev->buffer.length);
2258		free_event(disp, ev);
2259	}
2260
2261	request_log(disp, res, LVL(90), "detaching from task %p", res->task);
2262	isc_task_detach(&res->task);
2263
2264	/*
2265	 * Free any buffered requests as well
2266	 */
2267	ev = ISC_LIST_HEAD(res->items);
2268	while (ev != NULL) {
2269		ISC_LIST_UNLINK(res->items, ev, ev_link);
2270		if (ev->buffer.base != NULL)
2271			free_buffer(disp, ev->buffer.base, ev->buffer.length);
2272		free_event(disp, ev);
2273		ev = ISC_LIST_HEAD(res->items);
2274	}
2275	res->magic = 0;
2276	isc_mempool_put(disp->mgr->rpool, res);
2277	if (disp->shutting_down == 1)
2278		do_cancel(disp);
2279	else
2280		startrecv(disp);
2281
2282	killit = destroy_disp_ok(disp);
2283	UNLOCK(&disp->lock);
2284	if (killit)
2285		isc_task_send(disp->task, &disp->ctlevent);
2286}
2287
2288static void
2289do_cancel(dns_dispatch_t *disp) {
2290	dns_dispatchevent_t *ev;
2291	dns_dispentry_t *resp;
2292	dns_qid_t *qid;
2293
2294	if (disp->shutdown_out == 1)
2295		return;
2296
2297	qid = DNS_QID(disp);
2298
2299	/*
2300	 * Search for the first response handler without packets outstanding.
2301	 */
2302	LOCK(&qid->lock);
2303	for (resp = linear_first(qid);
2304	     resp != NULL && resp->item_out != ISC_FALSE;
2305	     /* Empty. */)
2306		resp = linear_next(qid, resp);
2307	/*
2308	 * No one to send the cancel event to, so nothing to do.
2309	 */
2310	if (resp == NULL)
2311		goto unlock;
2312
2313	/*
2314	 * Send the shutdown failsafe event to this resp.
2315	 */
2316	ev = disp->failsafe_ev;
2317	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH,
2318		       resp->action, resp->arg, resp, NULL, NULL);
2319	ev->result = disp->shutdown_why;
2320	ev->buffer.base = NULL;
2321	ev->buffer.length = 0;
2322	disp->shutdown_out = 1;
2323	request_log(disp, resp, LVL(10),
2324		    "cancel: failsafe event %p -> task %p",
2325		    ev, resp->task);
2326	resp->item_out = ISC_TRUE;
2327	isc_task_send(resp->task, ISC_EVENT_PTR(&ev));
2328 unlock:
2329	UNLOCK(&qid->lock);
2330}
2331
2332isc_socket_t *
2333dns_dispatch_getsocket(dns_dispatch_t *disp) {
2334	REQUIRE(VALID_DISPATCH(disp));
2335
2336	return (disp->socket);
2337}
2338
2339isc_result_t
2340dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) {
2341
2342	REQUIRE(VALID_DISPATCH(disp));
2343	REQUIRE(addrp != NULL);
2344
2345	if (disp->socktype == isc_sockettype_udp) {
2346		*addrp = disp->local;
2347		return (ISC_R_SUCCESS);
2348	}
2349	return (ISC_R_NOTIMPLEMENTED);
2350}
2351
2352void
2353dns_dispatch_cancel(dns_dispatch_t *disp) {
2354	REQUIRE(VALID_DISPATCH(disp));
2355
2356	LOCK(&disp->lock);
2357
2358	if (disp->shutting_down == 1) {
2359		UNLOCK(&disp->lock);
2360		return;
2361	}
2362
2363	disp->shutdown_why = ISC_R_CANCELED;
2364	disp->shutting_down = 1;
2365	do_cancel(disp);
2366
2367	UNLOCK(&disp->lock);
2368
2369	return;
2370}
2371
2372void
2373dns_dispatch_changeattributes(dns_dispatch_t *disp,
2374			      unsigned int attributes, unsigned int mask)
2375{
2376	REQUIRE(VALID_DISPATCH(disp));
2377
2378	/* XXXMLG
2379	 * Should check for valid attributes here!
2380	 */
2381
2382	LOCK(&disp->lock);
2383
2384	if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2385		if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 &&
2386		    (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) {
2387			disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN;
2388			startrecv(disp);
2389		} else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN)
2390			   == 0 &&
2391			   (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) {
2392			disp->attributes |= DNS_DISPATCHATTR_NOLISTEN;
2393			if (disp->recv_pending != 0)
2394				isc_socket_cancel(disp->socket, disp->task,
2395						  ISC_SOCKCANCEL_RECV);
2396		}
2397	}
2398
2399	disp->attributes &= ~mask;
2400	disp->attributes |= (attributes & mask);
2401	UNLOCK(&disp->lock);
2402}
2403
2404void
2405dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) {
2406	void *buf;
2407	isc_socketevent_t *sevent, *newsevent;
2408
2409	REQUIRE(VALID_DISPATCH(disp));
2410	REQUIRE((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0);
2411	REQUIRE(event != NULL);
2412
2413	sevent = (isc_socketevent_t *)event;
2414
2415	INSIST(sevent->n <= disp->mgr->buffersize);
2416	newsevent = (isc_socketevent_t *)
2417		    isc_event_allocate(disp->mgr->mctx, NULL,
2418				      DNS_EVENT_IMPORTRECVDONE, udp_recv,
2419				      disp, sizeof(isc_socketevent_t));
2420	if (newsevent == NULL)
2421		return;
2422
2423	buf = allocate_udp_buffer(disp);
2424	if (buf == NULL) {
2425		isc_event_free(ISC_EVENT_PTR(&newsevent));
2426		return;
2427	}
2428	memcpy(buf, sevent->region.base, sevent->n);
2429	newsevent->region.base = buf;
2430	newsevent->region.length = disp->mgr->buffersize;
2431	newsevent->n = sevent->n;
2432	newsevent->result = sevent->result;
2433	newsevent->address = sevent->address;
2434	newsevent->timestamp = sevent->timestamp;
2435	newsevent->pktinfo = sevent->pktinfo;
2436	newsevent->attributes = sevent->attributes;
2437
2438	isc_task_send(disp->task, ISC_EVENT_PTR(&newsevent));
2439}
2440
2441#if 0
2442void
2443dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) {
2444	dns_dispatch_t *disp;
2445	char foo[1024];
2446
2447	disp = ISC_LIST_HEAD(mgr->list);
2448	while (disp != NULL) {
2449		isc_sockaddr_format(&disp->local, foo, sizeof(foo));
2450		printf("\tdispatch %p, addr %s\n", disp, foo);
2451		disp = ISC_LIST_NEXT(disp, link);
2452	}
2453}
2454#endif
2455