resolver.c revision 296611
1/*
2 * Copyright (C) 2004-2014  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id$ */
19
20/*! \file */
21
22#include <config.h>
23
24#include <isc/log.h>
25#include <isc/platform.h>
26#include <isc/print.h>
27#include <isc/string.h>
28#include <isc/random.h>
29#include <isc/socket.h>
30#include <isc/stats.h>
31#include <isc/task.h>
32#include <isc/timer.h>
33#include <isc/util.h>
34
35#include <dns/acl.h>
36#include <dns/adb.h>
37#include <dns/cache.h>
38#include <dns/db.h>
39#include <dns/dispatch.h>
40#include <dns/ds.h>
41#include <dns/events.h>
42#include <dns/forward.h>
43#include <dns/keytable.h>
44#include <dns/log.h>
45#include <dns/message.h>
46#include <dns/ncache.h>
47#include <dns/nsec.h>
48#include <dns/nsec3.h>
49#include <dns/opcode.h>
50#include <dns/peer.h>
51#include <dns/rbt.h>
52#include <dns/rcode.h>
53#include <dns/rdata.h>
54#include <dns/rdataclass.h>
55#include <dns/rdatalist.h>
56#include <dns/rdataset.h>
57#include <dns/rdatastruct.h>
58#include <dns/rdatatype.h>
59#include <dns/resolver.h>
60#include <dns/result.h>
61#include <dns/rootns.h>
62#include <dns/stats.h>
63#include <dns/tsig.h>
64#include <dns/validator.h>
65
66#define DNS_RESOLVER_TRACE
67#ifdef DNS_RESOLVER_TRACE
68#define RTRACE(m)       isc_log_write(dns_lctx, \
69				      DNS_LOGCATEGORY_RESOLVER, \
70				      DNS_LOGMODULE_RESOLVER, \
71				      ISC_LOG_DEBUG(3), \
72				      "res %p: %s", res, (m))
73#define RRTRACE(r, m)   isc_log_write(dns_lctx, \
74				      DNS_LOGCATEGORY_RESOLVER, \
75				      DNS_LOGMODULE_RESOLVER, \
76				      ISC_LOG_DEBUG(3), \
77				      "res %p: %s", (r), (m))
78#define FCTXTRACE(m)    isc_log_write(dns_lctx, \
79				      DNS_LOGCATEGORY_RESOLVER, \
80				      DNS_LOGMODULE_RESOLVER, \
81				      ISC_LOG_DEBUG(3), \
82				      "fctx %p(%s): %s", fctx, fctx->info, (m))
83#define FCTXTRACE2(m1, m2) \
84			isc_log_write(dns_lctx, \
85				      DNS_LOGCATEGORY_RESOLVER, \
86				      DNS_LOGMODULE_RESOLVER, \
87				      ISC_LOG_DEBUG(3), \
88				      "fctx %p(%s): %s %s", \
89				      fctx, fctx->info, (m1), (m2))
90#define FTRACE(m)       isc_log_write(dns_lctx, \
91				      DNS_LOGCATEGORY_RESOLVER, \
92				      DNS_LOGMODULE_RESOLVER, \
93				      ISC_LOG_DEBUG(3), \
94				      "fetch %p (fctx %p(%s)): %s", \
95				      fetch, fetch->private, \
96				      fetch->private->info, (m))
97#define QTRACE(m)       isc_log_write(dns_lctx, \
98				      DNS_LOGCATEGORY_RESOLVER, \
99				      DNS_LOGMODULE_RESOLVER, \
100				      ISC_LOG_DEBUG(3), \
101				      "resquery %p (fctx %p(%s)): %s", \
102				      query, query->fctx, \
103				      query->fctx->info, (m))
104#else
105#define RTRACE(m)
106#define RRTRACE(r, m)
107#define FCTXTRACE(m)
108#define FCTXTRACE2(m1, m2)
109#define FTRACE(m)
110#define QTRACE(m)
111#endif
112
113#define US_PER_SEC 1000000U
114/*
115 * The maximum time we will wait for a single query.
116 */
117#define MAX_SINGLE_QUERY_TIMEOUT 9U
118#define MAX_SINGLE_QUERY_TIMEOUT_US (MAX_SINGLE_QUERY_TIMEOUT*US_PER_SEC)
119
120/*
121 * We need to allow a individual query time to complete / timeout.
122 */
123#define MINIMUM_QUERY_TIMEOUT (MAX_SINGLE_QUERY_TIMEOUT + 1U)
124
125/* The default time in seconds for the whole query to live. */
126#ifndef DEFAULT_QUERY_TIMEOUT
127#define DEFAULT_QUERY_TIMEOUT MINIMUM_QUERY_TIMEOUT
128#endif
129
130#ifndef MAXIMUM_QUERY_TIMEOUT
131#define MAXIMUM_QUERY_TIMEOUT 30 /* The maximum time in seconds for the whole query to live. */
132#endif
133
134/* The default maximum number of recursions to follow before giving up. */
135#ifndef DEFAULT_RECURSION_DEPTH
136#define DEFAULT_RECURSION_DEPTH 7
137#endif
138
139/* The default maximum number of iterative queries to allow before giving up. */
140#ifndef DEFAULT_MAX_QUERIES
141#define DEFAULT_MAX_QUERIES 50
142#endif
143
144/*%
145 * Maximum EDNS0 input packet size.
146 */
147#define RECV_BUFFER_SIZE                4096            /* XXXRTH  Constant. */
148#define EDNSOPTS			2
149
150/*%
151 * This defines the maximum number of timeouts we will permit before we
152 * disable EDNS0 on the query.
153 */
154#define MAX_EDNS0_TIMEOUTS      3
155
156typedef struct fetchctx fetchctx_t;
157
158typedef struct query {
159	/* Locked by task event serialization. */
160	unsigned int			magic;
161	fetchctx_t *			fctx;
162	isc_mem_t *			mctx;
163	dns_dispatchmgr_t *		dispatchmgr;
164	dns_dispatch_t *		dispatch;
165	isc_boolean_t			exclusivesocket;
166	dns_adbaddrinfo_t *		addrinfo;
167	isc_socket_t *			tcpsocket;
168	isc_time_t			start;
169	dns_messageid_t			id;
170	dns_dispentry_t *		dispentry;
171	ISC_LINK(struct query)		link;
172	isc_buffer_t			buffer;
173	isc_buffer_t			*tsig;
174	dns_tsigkey_t			*tsigkey;
175	isc_socketevent_t		sendevent;
176	unsigned int			options;
177	unsigned int			attributes;
178	unsigned int			sends;
179	unsigned int			connects;
180	unsigned char			data[512];
181} resquery_t;
182
183#define QUERY_MAGIC			ISC_MAGIC('Q', '!', '!', '!')
184#define VALID_QUERY(query)		ISC_MAGIC_VALID(query, QUERY_MAGIC)
185
186#define RESQUERY_ATTR_CANCELED          0x02
187
188#define RESQUERY_CONNECTING(q)          ((q)->connects > 0)
189#define RESQUERY_CANCELED(q)            (((q)->attributes & \
190					  RESQUERY_ATTR_CANCELED) != 0)
191#define RESQUERY_SENDING(q)             ((q)->sends > 0)
192
193typedef enum {
194	fetchstate_init = 0,            /*%< Start event has not run yet. */
195	fetchstate_active,
196	fetchstate_done                 /*%< FETCHDONE events posted. */
197} fetchstate;
198
199typedef enum {
200	badns_unreachable = 0,
201	badns_response,
202	badns_validation
203} badnstype_t;
204
205struct fetchctx {
206	/*% Not locked. */
207	unsigned int			magic;
208	dns_resolver_t *		res;
209	dns_name_t			name;
210	dns_rdatatype_t			type;
211	unsigned int			options;
212	unsigned int			bucketnum;
213	char *				info;
214	isc_mem_t *			mctx;
215
216	/*% Locked by appropriate bucket lock. */
217	fetchstate			state;
218	isc_boolean_t			want_shutdown;
219	isc_boolean_t			cloned;
220	isc_boolean_t			spilled;
221	unsigned int			references;
222	isc_event_t			control_event;
223	ISC_LINK(struct fetchctx)       link;
224	ISC_LIST(dns_fetchevent_t)      events;
225	/*% Locked by task event serialization. */
226	dns_name_t			domain;
227	dns_rdataset_t			nameservers;
228	unsigned int			attributes;
229	isc_timer_t *			timer;
230	isc_time_t			expires;
231	isc_interval_t			interval;
232	dns_message_t *			qmessage;
233	dns_message_t *			rmessage;
234	ISC_LIST(resquery_t)		queries;
235	dns_adbfindlist_t		finds;
236	dns_adbfind_t *			find;
237	dns_adbfindlist_t		altfinds;
238	dns_adbfind_t *			altfind;
239	dns_adbaddrinfolist_t		forwaddrs;
240	dns_adbaddrinfolist_t		altaddrs;
241	isc_sockaddrlist_t		forwarders;
242	dns_fwdpolicy_t			fwdpolicy;
243	isc_sockaddrlist_t		bad;
244	isc_sockaddrlist_t		edns;
245	isc_sockaddrlist_t		edns512;
246	isc_sockaddrlist_t		bad_edns;
247	dns_validator_t			*validator;
248	ISC_LIST(dns_validator_t)       validators;
249	dns_db_t *			cache;
250	dns_adb_t *			adb;
251	isc_boolean_t			ns_ttl_ok;
252	isc_uint32_t			ns_ttl;
253
254	/*%
255	 * The number of events we're waiting for.
256	 */
257	unsigned int			pending;
258
259	/*%
260	 * The number of times we've "restarted" the current
261	 * nameserver set.  This acts as a failsafe to prevent
262	 * us from pounding constantly on a particular set of
263	 * servers that, for whatever reason, are not giving
264	 * us useful responses, but are responding in such a
265	 * way that they are not marked "bad".
266	 */
267	unsigned int			restarts;
268
269	/*%
270	 * The number of timeouts that have occurred since we
271	 * last successfully received a response packet.  This
272	 * is used for EDNS0 black hole detection.
273	 */
274	unsigned int			timeouts;
275
276	/*%
277	 * Look aside state for DS lookups.
278	 */
279	dns_name_t 			nsname;
280	dns_fetch_t *			nsfetch;
281	dns_rdataset_t			nsrrset;
282
283	/*%
284	 * Number of queries that reference this context.
285	 */
286	unsigned int			nqueries;
287
288	/*%
289	 * The reason to print when logging a successful
290	 * response to a query.
291	 */
292	const char *			reason;
293
294	/*%
295	 * Random numbers to use for mixing up server addresses.
296	 */
297	isc_uint32_t                    rand_buf;
298	isc_uint32_t                    rand_bits;
299
300	/*%
301	 * Fetch-local statistics for detailed logging.
302	 */
303	isc_result_t			result; /*%< fetch result  */
304	isc_result_t			vresult; /*%< validation result  */
305	int				exitline;
306	isc_time_t			start;
307	isc_uint64_t			duration;
308	isc_boolean_t			logged;
309	unsigned int			querysent;
310	unsigned int			totalqueries;
311	unsigned int			referrals;
312	unsigned int			lamecount;
313	unsigned int			neterr;
314	unsigned int			badresp;
315	unsigned int			adberr;
316	unsigned int			findfail;
317	unsigned int			valfail;
318	isc_boolean_t			timeout;
319	dns_adbaddrinfo_t 		*addrinfo;
320	isc_sockaddr_t			*client;
321	unsigned int			depth;
322};
323
324#define FCTX_MAGIC			ISC_MAGIC('F', '!', '!', '!')
325#define VALID_FCTX(fctx)		ISC_MAGIC_VALID(fctx, FCTX_MAGIC)
326
327#define FCTX_ATTR_HAVEANSWER            0x0001
328#define FCTX_ATTR_GLUING                0x0002
329#define FCTX_ATTR_ADDRWAIT              0x0004
330#define FCTX_ATTR_SHUTTINGDOWN          0x0008
331#define FCTX_ATTR_WANTCACHE             0x0010
332#define FCTX_ATTR_WANTNCACHE            0x0020
333#define FCTX_ATTR_NEEDEDNS0             0x0040
334#define FCTX_ATTR_TRIEDFIND             0x0080
335#define FCTX_ATTR_TRIEDALT              0x0100
336
337#define HAVE_ANSWER(f)          (((f)->attributes & FCTX_ATTR_HAVEANSWER) != \
338				 0)
339#define GLUING(f)               (((f)->attributes & FCTX_ATTR_GLUING) != \
340				 0)
341#define ADDRWAIT(f)             (((f)->attributes & FCTX_ATTR_ADDRWAIT) != \
342				 0)
343#define SHUTTINGDOWN(f)         (((f)->attributes & FCTX_ATTR_SHUTTINGDOWN) \
344				 != 0)
345#define WANTCACHE(f)            (((f)->attributes & FCTX_ATTR_WANTCACHE) != 0)
346#define WANTNCACHE(f)           (((f)->attributes & FCTX_ATTR_WANTNCACHE) != 0)
347#define NEEDEDNS0(f)            (((f)->attributes & FCTX_ATTR_NEEDEDNS0) != 0)
348#define TRIEDFIND(f)            (((f)->attributes & FCTX_ATTR_TRIEDFIND) != 0)
349#define TRIEDALT(f)             (((f)->attributes & FCTX_ATTR_TRIEDALT) != 0)
350
351typedef struct {
352	dns_adbaddrinfo_t *		addrinfo;
353	fetchctx_t *			fctx;
354} dns_valarg_t;
355
356struct dns_fetch {
357	unsigned int			magic;
358	fetchctx_t *			private;
359};
360
361#define DNS_FETCH_MAGIC			ISC_MAGIC('F', 't', 'c', 'h')
362#define DNS_FETCH_VALID(fetch)		ISC_MAGIC_VALID(fetch, DNS_FETCH_MAGIC)
363
364typedef struct fctxbucket {
365	isc_task_t *			task;
366	isc_mutex_t			lock;
367	ISC_LIST(fetchctx_t)		fctxs;
368	isc_boolean_t			exiting;
369	isc_mem_t *			mctx;
370} fctxbucket_t;
371
372typedef struct alternate {
373	isc_boolean_t			isaddress;
374	union   {
375		isc_sockaddr_t		addr;
376		struct {
377			dns_name_t      name;
378			in_port_t       port;
379		} _n;
380	} _u;
381	ISC_LINK(struct alternate)      link;
382} alternate_t;
383
384typedef struct dns_badcache dns_badcache_t;
385struct dns_badcache {
386	dns_badcache_t *	next;
387	dns_rdatatype_t 	type;
388	isc_time_t		expire;
389	unsigned int		hashval;
390	dns_name_t		name;
391};
392#define DNS_BADCACHE_SIZE 1021
393#define DNS_BADCACHE_TTL(fctx) \
394	(((fctx)->res->lame_ttl > 30 ) ? (fctx)->res->lame_ttl : 30)
395
396struct dns_resolver {
397	/* Unlocked. */
398	unsigned int			magic;
399	isc_mem_t *			mctx;
400	isc_mutex_t			lock;
401	isc_mutex_t			nlock;
402	isc_mutex_t			primelock;
403	dns_rdataclass_t		rdclass;
404	isc_socketmgr_t *		socketmgr;
405	isc_timermgr_t *		timermgr;
406	isc_taskmgr_t *			taskmgr;
407	dns_view_t *			view;
408	isc_boolean_t			frozen;
409	unsigned int			options;
410	dns_dispatchmgr_t *		dispatchmgr;
411	dns_dispatchset_t *		dispatches4;
412	isc_boolean_t			exclusivev4;
413	dns_dispatchset_t *		dispatches6;
414	isc_boolean_t			exclusivev6;
415	unsigned int			nbuckets;
416	fctxbucket_t *			buckets;
417	isc_uint32_t			lame_ttl;
418	ISC_LIST(alternate_t)		alternates;
419	isc_uint16_t			udpsize;
420#if USE_ALGLOCK
421	isc_rwlock_t			alglock;
422#endif
423	dns_rbt_t *			algorithms;
424#if USE_MBSLOCK
425	isc_rwlock_t			mbslock;
426#endif
427	dns_rbt_t *			mustbesecure;
428	unsigned int			spillatmax;
429	unsigned int			spillatmin;
430	isc_timer_t *			spillattimer;
431	isc_boolean_t			zero_no_soa_ttl;
432	unsigned int			query_timeout;
433	unsigned int			maxdepth;
434
435	/* Locked by lock. */
436	unsigned int			references;
437	isc_boolean_t			exiting;
438	isc_eventlist_t			whenshutdown;
439	unsigned int			activebuckets;
440	isc_boolean_t			priming;
441	unsigned int			spillat;	/* clients-per-query */
442
443	/* Bad cache. */
444	dns_badcache_t  ** 		badcache;
445	unsigned int 			badcount;
446	unsigned int 			badhash;
447	unsigned int 			badsweep;
448
449	/* Locked by primelock. */
450	dns_fetch_t *			primefetch;
451	/* Locked by nlock. */
452	unsigned int			nfctx;
453};
454
455#define RES_MAGIC			ISC_MAGIC('R', 'e', 's', '!')
456#define VALID_RESOLVER(res)		ISC_MAGIC_VALID(res, RES_MAGIC)
457
458/*%
459 * Private addrinfo flags.  These must not conflict with DNS_FETCHOPT_NOEDNS0,
460 * which we also use as an addrinfo flag.
461 */
462#define FCTX_ADDRINFO_MARK              0x0001
463#define FCTX_ADDRINFO_FORWARDER         0x1000
464#define FCTX_ADDRINFO_TRIED             0x2000
465#define UNMARKED(a)                     (((a)->flags & FCTX_ADDRINFO_MARK) \
466					 == 0)
467#define ISFORWARDER(a)                  (((a)->flags & \
468					 FCTX_ADDRINFO_FORWARDER) != 0)
469#define TRIED(a)                        (((a)->flags & \
470					 FCTX_ADDRINFO_TRIED) != 0)
471
472#define NXDOMAIN(r) (((r)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
473#define NEGATIVE(r) (((r)->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
474
475static void destroy(dns_resolver_t *res);
476static void empty_bucket(dns_resolver_t *res);
477static isc_result_t resquery_send(resquery_t *query);
478static void resquery_response(isc_task_t *task, isc_event_t *event);
479static void resquery_connected(isc_task_t *task, isc_event_t *event);
480static void fctx_try(fetchctx_t *fctx, isc_boolean_t retrying,
481		     isc_boolean_t badcache);
482static void fctx_destroy(fetchctx_t *fctx);
483static isc_boolean_t fctx_unlink(fetchctx_t *fctx);
484static isc_result_t ncache_adderesult(dns_message_t *message,
485				      dns_db_t *cache, dns_dbnode_t *node,
486				      dns_rdatatype_t covers,
487				      isc_stdtime_t now, dns_ttl_t maxttl,
488				      isc_boolean_t optout,
489				      isc_boolean_t secure,
490				      dns_rdataset_t *ardataset,
491				      isc_result_t *eresultp);
492static void validated(isc_task_t *task, isc_event_t *event);
493static isc_boolean_t maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked);
494static void add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
495		    isc_result_t reason, badnstype_t badtype);
496static inline isc_result_t findnoqname(fetchctx_t *fctx, dns_name_t *name,
497				       dns_rdatatype_t type,
498				       dns_name_t **noqname);
499
500/*%
501 * Increment resolver-related statistics counters.
502 */
503static inline void
504inc_stats(dns_resolver_t *res, isc_statscounter_t counter) {
505	if (res->view->resstats != NULL)
506		isc_stats_increment(res->view->resstats, counter);
507}
508
509static isc_result_t
510valcreate(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, dns_name_t *name,
511	  dns_rdatatype_t type, dns_rdataset_t *rdataset,
512	  dns_rdataset_t *sigrdataset, unsigned int valoptions,
513	  isc_task_t *task)
514{
515	dns_validator_t *validator = NULL;
516	dns_valarg_t *valarg;
517	isc_result_t result;
518
519	valarg = isc_mem_get(fctx->mctx, sizeof(*valarg));
520	if (valarg == NULL)
521		return (ISC_R_NOMEMORY);
522
523	valarg->fctx = fctx;
524	valarg->addrinfo = addrinfo;
525
526	if (!ISC_LIST_EMPTY(fctx->validators))
527		INSIST((valoptions & DNS_VALIDATOR_DEFER) != 0);
528
529	result = dns_validator_create(fctx->res->view, name, type, rdataset,
530				      sigrdataset, fctx->rmessage,
531				      valoptions, task, validated, valarg,
532				      &validator);
533	if (result == ISC_R_SUCCESS) {
534		inc_stats(fctx->res, dns_resstatscounter_val);
535		if ((valoptions & DNS_VALIDATOR_DEFER) == 0) {
536			INSIST(fctx->validator == NULL);
537			fctx->validator = validator;
538		}
539		ISC_LIST_APPEND(fctx->validators, validator, link);
540	} else
541		isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
542	return (result);
543}
544
545static isc_boolean_t
546rrsig_fromchildzone(fetchctx_t *fctx, dns_rdataset_t *rdataset) {
547	dns_namereln_t namereln;
548	dns_rdata_rrsig_t rrsig;
549	dns_rdata_t rdata = DNS_RDATA_INIT;
550	int order;
551	isc_result_t result;
552	unsigned int labels;
553
554	for (result = dns_rdataset_first(rdataset);
555	     result == ISC_R_SUCCESS;
556	     result = dns_rdataset_next(rdataset)) {
557		dns_rdataset_current(rdataset, &rdata);
558		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
559		RUNTIME_CHECK(result == ISC_R_SUCCESS);
560		namereln = dns_name_fullcompare(&rrsig.signer, &fctx->domain,
561						&order, &labels);
562		if (namereln == dns_namereln_subdomain)
563			return (ISC_TRUE);
564		dns_rdata_reset(&rdata);
565	}
566	return (ISC_FALSE);
567}
568
569static isc_boolean_t
570fix_mustbedelegationornxdomain(dns_message_t *message, fetchctx_t *fctx) {
571	dns_name_t *name;
572	dns_name_t *domain = &fctx->domain;
573	dns_rdataset_t *rdataset;
574	dns_rdatatype_t type;
575	isc_result_t result;
576	isc_boolean_t keep_auth = ISC_FALSE;
577
578	if (message->rcode == dns_rcode_nxdomain)
579		return (ISC_FALSE);
580
581	/*
582	 * A DS RRset can appear anywhere in a zone, even for a delegation-only
583	 * zone.  So a response to an explicit query for this type should be
584	 * excluded from delegation-only fixup.
585	 *
586	 * SOA, NS, and DNSKEY can only exist at a zone apex, so a postive
587	 * response to a query for these types can never violate the
588	 * delegation-only assumption: if the query name is below a
589	 * zone cut, the response should normally be a referral, which should
590	 * be accepted; if the query name is below a zone cut but the server
591	 * happens to have authority for the zone of the query name, the
592	 * response is a (non-referral) answer.  But this does not violate
593	 * delegation-only because the query name must be in a different zone
594	 * due to the "apex-only" nature of these types.  Note that if the
595	 * remote server happens to have authority for a child zone of a
596	 * delegation-only zone, we may still incorrectly "fix" the response
597	 * with NXDOMAIN for queries for other types.  Unfortunately it's
598	 * generally impossible to differentiate this case from violation of
599	 * the delegation-only assumption.  Once the resolver learns the
600	 * correct zone cut, possibly via a separate query for an "apex-only"
601	 * type, queries for other types will be resolved correctly.
602	 *
603	 * A query for type ANY will be accepted if it hits an exceptional
604	 * type above in the answer section as it should be from a child
605	 * zone.
606	 *
607	 * Also accept answers with RRSIG records from the child zone.
608	 * Direct queries for RRSIG records should not be answered from
609	 * the parent zone.
610	 */
611
612	if (message->counts[DNS_SECTION_ANSWER] != 0 &&
613	    (fctx->type == dns_rdatatype_ns ||
614	     fctx->type == dns_rdatatype_ds ||
615	     fctx->type == dns_rdatatype_soa ||
616	     fctx->type == dns_rdatatype_any ||
617	     fctx->type == dns_rdatatype_rrsig ||
618	     fctx->type == dns_rdatatype_dnskey)) {
619		result = dns_message_firstname(message, DNS_SECTION_ANSWER);
620		while (result == ISC_R_SUCCESS) {
621			name = NULL;
622			dns_message_currentname(message, DNS_SECTION_ANSWER,
623						&name);
624			for (rdataset = ISC_LIST_HEAD(name->list);
625			     rdataset != NULL;
626			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
627				if (!dns_name_equal(name, &fctx->name))
628					continue;
629				type = rdataset->type;
630				/*
631				 * RRsig from child?
632				 */
633				if (type == dns_rdatatype_rrsig &&
634				    rrsig_fromchildzone(fctx, rdataset))
635					return (ISC_FALSE);
636				/*
637				 * Direct query for apex records or DS.
638				 */
639				if (fctx->type == type &&
640				    (type == dns_rdatatype_ds ||
641				     type == dns_rdatatype_ns ||
642				     type == dns_rdatatype_soa ||
643				     type == dns_rdatatype_dnskey))
644					return (ISC_FALSE);
645				/*
646				 * Indirect query for apex records or DS.
647				 */
648				if (fctx->type == dns_rdatatype_any &&
649				    (type == dns_rdatatype_ns ||
650				     type == dns_rdatatype_ds ||
651				     type == dns_rdatatype_soa ||
652				     type == dns_rdatatype_dnskey))
653					return (ISC_FALSE);
654			}
655			result = dns_message_nextname(message,
656						      DNS_SECTION_ANSWER);
657		}
658	}
659
660	/*
661	 * A NODATA response to a DS query?
662	 */
663	if (fctx->type == dns_rdatatype_ds &&
664	    message->counts[DNS_SECTION_ANSWER] == 0)
665		return (ISC_FALSE);
666
667	/* Look for referral or indication of answer from child zone? */
668	if (message->counts[DNS_SECTION_AUTHORITY] == 0)
669		goto munge;
670
671	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
672	while (result == ISC_R_SUCCESS) {
673		name = NULL;
674		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
675		for (rdataset = ISC_LIST_HEAD(name->list);
676		     rdataset != NULL;
677		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
678			type = rdataset->type;
679			if (type == dns_rdatatype_soa &&
680			    dns_name_equal(name, domain))
681				keep_auth = ISC_TRUE;
682
683			if (type != dns_rdatatype_ns &&
684			    type != dns_rdatatype_soa &&
685			    type != dns_rdatatype_rrsig)
686				continue;
687
688			if (type == dns_rdatatype_rrsig) {
689				if (rrsig_fromchildzone(fctx, rdataset))
690					return (ISC_FALSE);
691				else
692					continue;
693			}
694
695			/* NS or SOA records. */
696			if (dns_name_equal(name, domain)) {
697				/*
698				 * If a query for ANY causes a negative
699				 * response, we can be sure that this is
700				 * an empty node.  For other type of queries
701				 * we cannot differentiate an empty node
702				 * from a node that just doesn't have that
703				 * type of record.  We only accept the former
704				 * case.
705				 */
706				if (message->counts[DNS_SECTION_ANSWER] == 0 &&
707				    fctx->type == dns_rdatatype_any)
708					return (ISC_FALSE);
709			} else if (dns_name_issubdomain(name, domain)) {
710				/* Referral or answer from child zone. */
711				return (ISC_FALSE);
712			}
713		}
714		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
715	}
716
717 munge:
718	message->rcode = dns_rcode_nxdomain;
719	message->counts[DNS_SECTION_ANSWER] = 0;
720	if (!keep_auth)
721		message->counts[DNS_SECTION_AUTHORITY] = 0;
722	message->counts[DNS_SECTION_ADDITIONAL] = 0;
723	return (ISC_TRUE);
724}
725
726static inline isc_result_t
727fctx_starttimer(fetchctx_t *fctx) {
728	/*
729	 * Start the lifetime timer for fctx.
730	 *
731	 * This is also used for stopping the idle timer; in that
732	 * case we must purge events already posted to ensure that
733	 * no further idle events are delivered.
734	 */
735	return (isc_timer_reset(fctx->timer, isc_timertype_once,
736				&fctx->expires, NULL, ISC_TRUE));
737}
738
739static inline void
740fctx_stoptimer(fetchctx_t *fctx) {
741	isc_result_t result;
742
743	/*
744	 * We don't return a result if resetting the timer to inactive fails
745	 * since there's nothing to be done about it.  Resetting to inactive
746	 * should never fail anyway, since the code as currently written
747	 * cannot fail in that case.
748	 */
749	result = isc_timer_reset(fctx->timer, isc_timertype_inactive,
750				  NULL, NULL, ISC_TRUE);
751	if (result != ISC_R_SUCCESS) {
752		UNEXPECTED_ERROR(__FILE__, __LINE__,
753				 "isc_timer_reset(): %s",
754				 isc_result_totext(result));
755	}
756}
757
758
759static inline isc_result_t
760fctx_startidletimer(fetchctx_t *fctx, isc_interval_t *interval) {
761	/*
762	 * Start the idle timer for fctx.  The lifetime timer continues
763	 * to be in effect.
764	 */
765	return (isc_timer_reset(fctx->timer, isc_timertype_once,
766				&fctx->expires, interval, ISC_FALSE));
767}
768
769/*
770 * Stopping the idle timer is equivalent to calling fctx_starttimer(), but
771 * we use fctx_stopidletimer for readability in the code below.
772 */
773#define fctx_stopidletimer      fctx_starttimer
774
775
776static inline void
777resquery_destroy(resquery_t **queryp) {
778	resquery_t *query;
779
780	REQUIRE(queryp != NULL);
781	query = *queryp;
782	REQUIRE(!ISC_LINK_LINKED(query, link));
783
784	INSIST(query->tcpsocket == NULL);
785
786	query->fctx->nqueries--;
787	if (SHUTTINGDOWN(query->fctx)) {
788		dns_resolver_t *res = query->fctx->res;
789		if (maybe_destroy(query->fctx, ISC_FALSE))
790			empty_bucket(res);
791	}
792	query->magic = 0;
793	isc_mem_put(query->mctx, query, sizeof(*query));
794	*queryp = NULL;
795}
796
797static void
798fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp,
799		 isc_time_t *finish, isc_boolean_t no_response)
800{
801	fetchctx_t *fctx;
802	resquery_t *query;
803	unsigned int rtt, rttms;
804	unsigned int factor;
805	dns_adbfind_t *find;
806	dns_adbaddrinfo_t *addrinfo;
807	isc_socket_t *socket;
808
809	query = *queryp;
810	fctx = query->fctx;
811
812	FCTXTRACE("cancelquery");
813
814	REQUIRE(!RESQUERY_CANCELED(query));
815
816	query->attributes |= RESQUERY_ATTR_CANCELED;
817
818	/*
819	 * Should we update the RTT?
820	 */
821	if (finish != NULL || no_response) {
822		if (finish != NULL) {
823			/*
824			 * We have both the start and finish times for this
825			 * packet, so we can compute a real RTT.
826			 */
827			rtt = (unsigned int)isc_time_microdiff(finish,
828							       &query->start);
829			factor = DNS_ADB_RTTADJDEFAULT;
830
831			rttms = rtt / 1000;
832			if (rttms < DNS_RESOLVER_QRYRTTCLASS0) {
833				inc_stats(fctx->res,
834					  dns_resstatscounter_queryrtt0);
835			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS1) {
836				inc_stats(fctx->res,
837					  dns_resstatscounter_queryrtt1);
838			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS2) {
839				inc_stats(fctx->res,
840					  dns_resstatscounter_queryrtt2);
841			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS3) {
842				inc_stats(fctx->res,
843					  dns_resstatscounter_queryrtt3);
844			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS4) {
845				inc_stats(fctx->res,
846					  dns_resstatscounter_queryrtt4);
847			} else {
848				inc_stats(fctx->res,
849					  dns_resstatscounter_queryrtt5);
850			}
851		} else {
852			/*
853			 * We don't have an RTT for this query.  Maybe the
854			 * packet was lost, or maybe this server is very
855			 * slow.  We don't know.  Increase the RTT.
856			 */
857			INSIST(no_response);
858			rtt = query->addrinfo->srtt + 200000;
859			if (rtt > MAX_SINGLE_QUERY_TIMEOUT_US)
860				rtt = MAX_SINGLE_QUERY_TIMEOUT_US;
861			/*
862			 * Replace the current RTT with our value.
863			 */
864			factor = DNS_ADB_RTTADJREPLACE;
865		}
866		dns_adb_adjustsrtt(fctx->adb, query->addrinfo, rtt, factor);
867	}
868
869	/* Remember that the server has been tried. */
870	if (!TRIED(query->addrinfo)) {
871		dns_adb_changeflags(fctx->adb, query->addrinfo,
872				    FCTX_ADDRINFO_TRIED, FCTX_ADDRINFO_TRIED);
873	}
874
875	/*
876	 * Age RTTs of servers not tried.
877	 */
878	factor = DNS_ADB_RTTADJAGE;
879	if (finish != NULL)
880		for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
881		     addrinfo != NULL;
882		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
883			if (UNMARKED(addrinfo))
884				dns_adb_adjustsrtt(fctx->adb, addrinfo,
885						   0, factor);
886
887	if (finish != NULL && TRIEDFIND(fctx))
888		for (find = ISC_LIST_HEAD(fctx->finds);
889		     find != NULL;
890		     find = ISC_LIST_NEXT(find, publink))
891			for (addrinfo = ISC_LIST_HEAD(find->list);
892			     addrinfo != NULL;
893			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
894				if (UNMARKED(addrinfo))
895					dns_adb_adjustsrtt(fctx->adb, addrinfo,
896							   0, factor);
897
898	if (finish != NULL && TRIEDALT(fctx)) {
899		for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
900		     addrinfo != NULL;
901		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
902			if (UNMARKED(addrinfo))
903				dns_adb_adjustsrtt(fctx->adb, addrinfo,
904						   0, factor);
905		for (find = ISC_LIST_HEAD(fctx->altfinds);
906		     find != NULL;
907		     find = ISC_LIST_NEXT(find, publink))
908			for (addrinfo = ISC_LIST_HEAD(find->list);
909			     addrinfo != NULL;
910			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
911				if (UNMARKED(addrinfo))
912					dns_adb_adjustsrtt(fctx->adb, addrinfo,
913							   0, factor);
914	}
915
916	/*
917	 * Check for any outstanding socket events.  If they exist, cancel
918	 * them and let the event handlers finish the cleanup.  The resolver
919	 * only needs to worry about managing the connect and send events;
920	 * the dispatcher manages the recv events.
921	 */
922	if (RESQUERY_CONNECTING(query)) {
923		/*
924		 * Cancel the connect.
925		 */
926		if (query->tcpsocket != NULL) {
927			isc_socket_cancel(query->tcpsocket, NULL,
928					  ISC_SOCKCANCEL_CONNECT);
929		} else if (query->dispentry != NULL) {
930			INSIST(query->exclusivesocket);
931			socket = dns_dispatch_getentrysocket(query->dispentry);
932			if (socket != NULL)
933				isc_socket_cancel(socket, NULL,
934						  ISC_SOCKCANCEL_CONNECT);
935		}
936	} else if (RESQUERY_SENDING(query)) {
937		/*
938		 * Cancel the pending send.
939		 */
940		if (query->exclusivesocket && query->dispentry != NULL)
941			socket = dns_dispatch_getentrysocket(query->dispentry);
942		else
943			socket = dns_dispatch_getsocket(query->dispatch);
944		if (socket != NULL)
945			isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND);
946	}
947
948	if (query->dispentry != NULL)
949		dns_dispatch_removeresponse(&query->dispentry, deventp);
950
951	ISC_LIST_UNLINK(fctx->queries, query, link);
952
953	if (query->tsig != NULL)
954		isc_buffer_free(&query->tsig);
955
956	if (query->tsigkey != NULL)
957		dns_tsigkey_detach(&query->tsigkey);
958
959	if (query->dispatch != NULL)
960		dns_dispatch_detach(&query->dispatch);
961
962	if (! (RESQUERY_CONNECTING(query) || RESQUERY_SENDING(query)))
963		/*
964		 * It's safe to destroy the query now.
965		 */
966		resquery_destroy(&query);
967}
968
969static void
970fctx_cancelqueries(fetchctx_t *fctx, isc_boolean_t no_response) {
971	resquery_t *query, *next_query;
972
973	FCTXTRACE("cancelqueries");
974
975	for (query = ISC_LIST_HEAD(fctx->queries);
976	     query != NULL;
977	     query = next_query) {
978		next_query = ISC_LIST_NEXT(query, link);
979		fctx_cancelquery(&query, NULL, NULL, no_response);
980	}
981}
982
983static void
984fctx_cleanupfinds(fetchctx_t *fctx) {
985	dns_adbfind_t *find, *next_find;
986
987	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
988
989	for (find = ISC_LIST_HEAD(fctx->finds);
990	     find != NULL;
991	     find = next_find) {
992		next_find = ISC_LIST_NEXT(find, publink);
993		ISC_LIST_UNLINK(fctx->finds, find, publink);
994		dns_adb_destroyfind(&find);
995	}
996	fctx->find = NULL;
997}
998
999static void
1000fctx_cleanupaltfinds(fetchctx_t *fctx) {
1001	dns_adbfind_t *find, *next_find;
1002
1003	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1004
1005	for (find = ISC_LIST_HEAD(fctx->altfinds);
1006	     find != NULL;
1007	     find = next_find) {
1008		next_find = ISC_LIST_NEXT(find, publink);
1009		ISC_LIST_UNLINK(fctx->altfinds, find, publink);
1010		dns_adb_destroyfind(&find);
1011	}
1012	fctx->altfind = NULL;
1013}
1014
1015static void
1016fctx_cleanupforwaddrs(fetchctx_t *fctx) {
1017	dns_adbaddrinfo_t *addr, *next_addr;
1018
1019	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1020
1021	for (addr = ISC_LIST_HEAD(fctx->forwaddrs);
1022	     addr != NULL;
1023	     addr = next_addr) {
1024		next_addr = ISC_LIST_NEXT(addr, publink);
1025		ISC_LIST_UNLINK(fctx->forwaddrs, addr, publink);
1026		dns_adb_freeaddrinfo(fctx->adb, &addr);
1027	}
1028}
1029
1030static void
1031fctx_cleanupaltaddrs(fetchctx_t *fctx) {
1032	dns_adbaddrinfo_t *addr, *next_addr;
1033
1034	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1035
1036	for (addr = ISC_LIST_HEAD(fctx->altaddrs);
1037	     addr != NULL;
1038	     addr = next_addr) {
1039		next_addr = ISC_LIST_NEXT(addr, publink);
1040		ISC_LIST_UNLINK(fctx->altaddrs, addr, publink);
1041		dns_adb_freeaddrinfo(fctx->adb, &addr);
1042	}
1043}
1044
1045static inline void
1046fctx_stopeverything(fetchctx_t *fctx, isc_boolean_t no_response) {
1047	FCTXTRACE("stopeverything");
1048	fctx_cancelqueries(fctx, no_response);
1049	fctx_cleanupfinds(fctx);
1050	fctx_cleanupaltfinds(fctx);
1051	fctx_cleanupforwaddrs(fctx);
1052	fctx_cleanupaltaddrs(fctx);
1053	fctx_stoptimer(fctx);
1054}
1055
1056static inline void
1057fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) {
1058	dns_fetchevent_t *event, *next_event;
1059	isc_task_t *task;
1060	unsigned int count = 0;
1061	isc_interval_t i;
1062	isc_boolean_t logit = ISC_FALSE;
1063	isc_time_t now;
1064	unsigned int old_spillat;
1065	unsigned int new_spillat = 0;	/* initialized to silence
1066					   compiler warnings */
1067
1068	/*
1069	 * Caller must be holding the appropriate bucket lock.
1070	 */
1071	REQUIRE(fctx->state == fetchstate_done);
1072
1073	FCTXTRACE("sendevents");
1074
1075	/*
1076	 * Keep some record of fetch result for logging later (if required).
1077	 */
1078	fctx->result = result;
1079	fctx->exitline = line;
1080	TIME_NOW(&now);
1081	fctx->duration = isc_time_microdiff(&now, &fctx->start);
1082
1083	for (event = ISC_LIST_HEAD(fctx->events);
1084	     event != NULL;
1085	     event = next_event) {
1086		next_event = ISC_LIST_NEXT(event, ev_link);
1087		ISC_LIST_UNLINK(fctx->events, event, ev_link);
1088		task = event->ev_sender;
1089		event->ev_sender = fctx;
1090		event->vresult = fctx->vresult;
1091		if (!HAVE_ANSWER(fctx))
1092			event->result = result;
1093
1094		INSIST(result != ISC_R_SUCCESS ||
1095		       dns_rdataset_isassociated(event->rdataset) ||
1096		       fctx->type == dns_rdatatype_any ||
1097		       fctx->type == dns_rdatatype_rrsig ||
1098		       fctx->type == dns_rdatatype_sig);
1099
1100		/*
1101		 * Negative results must be indicated in event->result.
1102		 */
1103		if (dns_rdataset_isassociated(event->rdataset) &&
1104		    NEGATIVE(event->rdataset)) {
1105			INSIST(event->result == DNS_R_NCACHENXDOMAIN ||
1106			       event->result == DNS_R_NCACHENXRRSET);
1107		}
1108
1109		event->qtotal = fctx->totalqueries;
1110		isc_task_sendanddetach(&task, ISC_EVENT_PTR(&event));
1111		count++;
1112	}
1113
1114	if ((fctx->attributes & FCTX_ATTR_HAVEANSWER) != 0 &&
1115	    fctx->spilled &&
1116	    (count < fctx->res->spillatmax || fctx->res->spillatmax == 0)) {
1117		LOCK(&fctx->res->lock);
1118		if (count == fctx->res->spillat && !fctx->res->exiting) {
1119			old_spillat = fctx->res->spillat;
1120			fctx->res->spillat += 5;
1121			if (fctx->res->spillat > fctx->res->spillatmax &&
1122			    fctx->res->spillatmax != 0)
1123				fctx->res->spillat = fctx->res->spillatmax;
1124			new_spillat = fctx->res->spillat;
1125			if (new_spillat != old_spillat) {
1126				logit = ISC_TRUE;
1127			}
1128			isc_interval_set(&i, 20 * 60, 0);
1129			result = isc_timer_reset(fctx->res->spillattimer,
1130						 isc_timertype_ticker, NULL,
1131						 &i, ISC_TRUE);
1132			RUNTIME_CHECK(result == ISC_R_SUCCESS);
1133		}
1134		UNLOCK(&fctx->res->lock);
1135		if (logit)
1136			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
1137				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
1138				      "clients-per-query increased to %u",
1139				      new_spillat);
1140	}
1141}
1142
1143static inline void
1144log_edns(fetchctx_t *fctx) {
1145	char domainbuf[DNS_NAME_FORMATSIZE];
1146
1147	if (fctx->reason == NULL)
1148		return;
1149
1150	/*
1151	 * We do not know if fctx->domain is the actual domain the record
1152	 * lives in or a parent domain so we have a '?' after it.
1153	 */
1154	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
1155	isc_log_write(dns_lctx, DNS_LOGCATEGORY_EDNS_DISABLED,
1156		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1157		      "success resolving '%s' (in '%s'?) after %s",
1158		      fctx->info, domainbuf, fctx->reason);
1159
1160	fctx->reason = NULL;
1161}
1162
1163static void
1164fctx_done(fetchctx_t *fctx, isc_result_t result, int line) {
1165	dns_resolver_t *res;
1166	isc_boolean_t no_response;
1167
1168	REQUIRE(line >= 0);
1169
1170	FCTXTRACE("done");
1171
1172	res = fctx->res;
1173
1174	if (result == ISC_R_SUCCESS) {
1175		/*%
1176		 * Log any deferred EDNS timeout messages.
1177		 */
1178		log_edns(fctx);
1179		no_response = ISC_TRUE;
1180	 } else
1181		no_response = ISC_FALSE;
1182
1183	fctx->reason = NULL;
1184	fctx_stopeverything(fctx, no_response);
1185
1186	LOCK(&res->buckets[fctx->bucketnum].lock);
1187
1188	fctx->state = fetchstate_done;
1189	fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1190	fctx_sendevents(fctx, result, line);
1191
1192	UNLOCK(&res->buckets[fctx->bucketnum].lock);
1193}
1194
1195static void
1196process_sendevent(resquery_t *query, isc_event_t *event) {
1197	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
1198	isc_boolean_t retry = ISC_FALSE;
1199	isc_result_t result;
1200	fetchctx_t *fctx;
1201
1202	fctx = query->fctx;
1203
1204	if (RESQUERY_CANCELED(query)) {
1205		if (query->sends == 0 && query->connects == 0) {
1206			/*
1207			 * This query was canceled while the
1208			 * isc_socket_sendto/connect() was in progress.
1209			 */
1210			if (query->tcpsocket != NULL)
1211				isc_socket_detach(&query->tcpsocket);
1212			resquery_destroy(&query);
1213		}
1214	} else {
1215		switch (sevent->result) {
1216		case ISC_R_SUCCESS:
1217			break;
1218
1219		case ISC_R_HOSTUNREACH:
1220		case ISC_R_NETUNREACH:
1221		case ISC_R_NOPERM:
1222		case ISC_R_ADDRNOTAVAIL:
1223		case ISC_R_CONNREFUSED:
1224
1225			/*
1226			 * No route to remote.
1227			 */
1228			add_bad(fctx, query->addrinfo, sevent->result,
1229				badns_unreachable);
1230			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
1231			retry = ISC_TRUE;
1232			break;
1233
1234		default:
1235			fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
1236			break;
1237		}
1238	}
1239
1240	if (event->ev_type == ISC_SOCKEVENT_CONNECT)
1241		isc_event_free(&event);
1242
1243	if (retry) {
1244		/*
1245		 * Behave as if the idle timer has expired.  For TCP
1246		 * this may not actually reflect the latest timer.
1247		 */
1248		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1249		result = fctx_stopidletimer(fctx);
1250		if (result != ISC_R_SUCCESS)
1251			fctx_done(fctx, result, __LINE__);
1252		else
1253			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
1254	}
1255}
1256
1257static void
1258resquery_udpconnected(isc_task_t *task, isc_event_t *event) {
1259	resquery_t *query = event->ev_arg;
1260
1261	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
1262
1263	QTRACE("udpconnected");
1264
1265	UNUSED(task);
1266
1267	INSIST(RESQUERY_CONNECTING(query));
1268
1269	query->connects--;
1270
1271	process_sendevent(query, event);
1272}
1273
1274static void
1275resquery_senddone(isc_task_t *task, isc_event_t *event) {
1276	resquery_t *query = event->ev_arg;
1277
1278	REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE);
1279
1280	QTRACE("senddone");
1281
1282	/*
1283	 * XXXRTH
1284	 *
1285	 * Currently we don't wait for the senddone event before retrying
1286	 * a query.  This means that if we get really behind, we may end
1287	 * up doing extra work!
1288	 */
1289
1290	UNUSED(task);
1291
1292	INSIST(RESQUERY_SENDING(query));
1293
1294	query->sends--;
1295
1296	process_sendevent(query, event);
1297}
1298
1299static inline isc_result_t
1300fctx_addopt(dns_message_t *message, unsigned int version,
1301	    isc_uint16_t udpsize, dns_ednsopt_t *ednsopts, size_t count)
1302{
1303	dns_rdataset_t *rdataset = NULL;
1304	isc_result_t result;
1305
1306	result = dns_message_buildopt(message, &rdataset, version, udpsize,
1307				      DNS_MESSAGEEXTFLAG_DO, ednsopts, count);
1308	if (result != ISC_R_SUCCESS)
1309		return (result);
1310	return (dns_message_setopt(message, rdataset));
1311}
1312
1313static inline void
1314fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
1315	unsigned int seconds;
1316	unsigned int us;
1317
1318	/*
1319	 * We retry every .8 seconds the first two times through the address
1320	 * list, and then we do exponential back-off.
1321	 */
1322	if (fctx->restarts < 3)
1323		us = 800000;
1324	else
1325		us = (800000 << (fctx->restarts - 2));
1326
1327	/*
1328	 * Add a fudge factor to the expected rtt based on the current
1329	 * estimate.
1330	 */
1331	if (rtt < 50000)
1332		rtt += 50000;
1333	else if (rtt < 100000)
1334		rtt += 100000;
1335	else
1336		rtt += 200000;
1337
1338	/*
1339	 * Always wait for at least the expected rtt.
1340	 */
1341	if (us < rtt)
1342		us = rtt;
1343
1344	/*
1345	 * But don't ever wait for more than 10 seconds.
1346	 */
1347	if (us > MAX_SINGLE_QUERY_TIMEOUT_US)
1348		us = MAX_SINGLE_QUERY_TIMEOUT_US;
1349
1350	seconds = us / US_PER_SEC;
1351	us -= seconds * US_PER_SEC;
1352	isc_interval_set(&fctx->interval, seconds, us * 1000);
1353}
1354
1355static isc_result_t
1356fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
1357	   unsigned int options)
1358{
1359	dns_resolver_t *res;
1360	isc_task_t *task;
1361	isc_result_t result;
1362	resquery_t *query;
1363	isc_sockaddr_t addr;
1364	isc_boolean_t have_addr = ISC_FALSE;
1365	unsigned int srtt;
1366
1367	FCTXTRACE("query");
1368
1369	res = fctx->res;
1370	task = res->buckets[fctx->bucketnum].task;
1371
1372	srtt = addrinfo->srtt;
1373
1374	/*
1375	 * A forwarder needs to make multiple queries. Give it at least
1376	 * a second to do these in.
1377	 */
1378	if (ISFORWARDER(addrinfo) && srtt < 1000000)
1379		srtt = 1000000;
1380
1381	fctx_setretryinterval(fctx, srtt);
1382	result = fctx_startidletimer(fctx, &fctx->interval);
1383	if (result != ISC_R_SUCCESS)
1384		return (result);
1385
1386	INSIST(ISC_LIST_EMPTY(fctx->validators));
1387
1388	dns_message_reset(fctx->rmessage, DNS_MESSAGE_INTENTPARSE);
1389
1390	query = isc_mem_get(fctx->mctx, sizeof(*query));
1391	if (query == NULL) {
1392		result = ISC_R_NOMEMORY;
1393		goto stop_idle_timer;
1394	}
1395	query->mctx = fctx->mctx;
1396	query->options = options;
1397	query->attributes = 0;
1398	query->sends = 0;
1399	query->connects = 0;
1400	/*
1401	 * Note that the caller MUST guarantee that 'addrinfo' will remain
1402	 * valid until this query is canceled.
1403	 */
1404	query->addrinfo = addrinfo;
1405	TIME_NOW(&query->start);
1406
1407	/*
1408	 * If this is a TCP query, then we need to make a socket and
1409	 * a dispatch for it here.  Otherwise we use the resolver's
1410	 * shared dispatch.
1411	 */
1412	query->dispatchmgr = res->dispatchmgr;
1413	query->dispatch = NULL;
1414	query->exclusivesocket = ISC_FALSE;
1415	query->tcpsocket = NULL;
1416	if (res->view->peers != NULL) {
1417		dns_peer_t *peer = NULL;
1418		isc_netaddr_t dstip;
1419		isc_netaddr_fromsockaddr(&dstip, &addrinfo->sockaddr);
1420		result = dns_peerlist_peerbyaddr(res->view->peers,
1421						 &dstip, &peer);
1422		if (result == ISC_R_SUCCESS) {
1423			result = dns_peer_getquerysource(peer, &addr);
1424			if (result == ISC_R_SUCCESS)
1425				have_addr = ISC_TRUE;
1426		}
1427	}
1428
1429	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1430		int pf;
1431
1432		pf = isc_sockaddr_pf(&addrinfo->sockaddr);
1433		if (!have_addr) {
1434			switch (pf) {
1435			case PF_INET:
1436				result = dns_dispatch_getlocaladdress(
1437					      res->dispatches4->dispatches[0],
1438					      &addr);
1439				break;
1440			case PF_INET6:
1441				result = dns_dispatch_getlocaladdress(
1442					      res->dispatches6->dispatches[0],
1443					      &addr);
1444				break;
1445			default:
1446				result = ISC_R_NOTIMPLEMENTED;
1447				break;
1448			}
1449			if (result != ISC_R_SUCCESS)
1450				goto cleanup_query;
1451		}
1452		isc_sockaddr_setport(&addr, 0);
1453
1454		result = isc_socket_create(res->socketmgr, pf,
1455					   isc_sockettype_tcp,
1456					   &query->tcpsocket);
1457		if (result != ISC_R_SUCCESS)
1458			goto cleanup_query;
1459
1460#ifndef BROKEN_TCP_BIND_BEFORE_CONNECT
1461		result = isc_socket_bind(query->tcpsocket, &addr, 0);
1462		if (result != ISC_R_SUCCESS)
1463			goto cleanup_socket;
1464#endif
1465
1466		/*
1467		 * A dispatch will be created once the connect succeeds.
1468		 */
1469	} else {
1470		if (have_addr) {
1471			unsigned int attrs, attrmask;
1472			attrs = DNS_DISPATCHATTR_UDP;
1473			switch (isc_sockaddr_pf(&addr)) {
1474			case AF_INET:
1475				attrs |= DNS_DISPATCHATTR_IPV4;
1476				break;
1477			case AF_INET6:
1478				attrs |= DNS_DISPATCHATTR_IPV6;
1479				break;
1480			default:
1481				result = ISC_R_NOTIMPLEMENTED;
1482				goto cleanup_query;
1483			}
1484			attrmask = DNS_DISPATCHATTR_UDP;
1485			attrmask |= DNS_DISPATCHATTR_TCP;
1486			attrmask |= DNS_DISPATCHATTR_IPV4;
1487			attrmask |= DNS_DISPATCHATTR_IPV6;
1488			result = dns_dispatch_getudp(res->dispatchmgr,
1489						     res->socketmgr,
1490						     res->taskmgr, &addr,
1491						     4096, 1000, 32768, 16411,
1492						     16433, attrs, attrmask,
1493						     &query->dispatch);
1494			if (result != ISC_R_SUCCESS)
1495				goto cleanup_query;
1496		} else {
1497			switch (isc_sockaddr_pf(&addrinfo->sockaddr)) {
1498			case PF_INET:
1499				dns_dispatch_attach(
1500				    dns_resolver_dispatchv4(res),
1501				    &query->dispatch);
1502				query->exclusivesocket = res->exclusivev4;
1503				break;
1504			case PF_INET6:
1505				dns_dispatch_attach(
1506				    dns_resolver_dispatchv6(res),
1507				    &query->dispatch);
1508				query->exclusivesocket = res->exclusivev6;
1509				break;
1510			default:
1511				result = ISC_R_NOTIMPLEMENTED;
1512				goto cleanup_query;
1513			}
1514		}
1515		/*
1516		 * We should always have a valid dispatcher here.  If we
1517		 * don't support a protocol family, then its dispatcher
1518		 * will be NULL, but we shouldn't be finding addresses for
1519		 * protocol types we don't support, so the dispatcher
1520		 * we found should never be NULL.
1521		 */
1522		INSIST(query->dispatch != NULL);
1523	}
1524
1525	query->dispentry = NULL;
1526	query->fctx = fctx;
1527	query->tsig = NULL;
1528	query->tsigkey = NULL;
1529	ISC_LINK_INIT(query, link);
1530	query->magic = QUERY_MAGIC;
1531
1532	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1533		/*
1534		 * Connect to the remote server.
1535		 *
1536		 * XXXRTH  Should we attach to the socket?
1537		 */
1538		result = isc_socket_connect(query->tcpsocket,
1539					    &addrinfo->sockaddr, task,
1540					    resquery_connected, query);
1541		if (result != ISC_R_SUCCESS)
1542			goto cleanup_socket;
1543		query->connects++;
1544		QTRACE("connecting via TCP");
1545	} else {
1546		result = resquery_send(query);
1547		if (result != ISC_R_SUCCESS)
1548			goto cleanup_dispatch;
1549	}
1550
1551	fctx->querysent++;
1552	fctx->totalqueries++;
1553
1554	ISC_LIST_APPEND(fctx->queries, query, link);
1555	query->fctx->nqueries++;
1556	if (isc_sockaddr_pf(&addrinfo->sockaddr) == PF_INET)
1557		inc_stats(res, dns_resstatscounter_queryv4);
1558	else
1559		inc_stats(res, dns_resstatscounter_queryv6);
1560	if (res->view->resquerystats != NULL)
1561		dns_rdatatypestats_increment(res->view->resquerystats,
1562					     fctx->type);
1563
1564	return (ISC_R_SUCCESS);
1565
1566 cleanup_socket:
1567	isc_socket_detach(&query->tcpsocket);
1568
1569 cleanup_dispatch:
1570	if (query->dispatch != NULL)
1571		dns_dispatch_detach(&query->dispatch);
1572
1573 cleanup_query:
1574	if (query->connects == 0) {
1575		query->magic = 0;
1576		isc_mem_put(fctx->mctx, query, sizeof(*query));
1577	}
1578
1579 stop_idle_timer:
1580	RUNTIME_CHECK(fctx_stopidletimer(fctx) == ISC_R_SUCCESS);
1581
1582	return (result);
1583}
1584
1585static isc_boolean_t
1586bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1587	isc_sockaddr_t *sa;
1588
1589	for (sa = ISC_LIST_HEAD(fctx->bad_edns);
1590	     sa != NULL;
1591	     sa = ISC_LIST_NEXT(sa, link)) {
1592		if (isc_sockaddr_equal(sa, address))
1593			return (ISC_TRUE);
1594	}
1595
1596	return (ISC_FALSE);
1597}
1598
1599static void
1600add_bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1601	isc_sockaddr_t *sa;
1602
1603	if (bad_edns(fctx, address))
1604		return;
1605
1606	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1607	if (sa == NULL)
1608		return;
1609
1610	*sa = *address;
1611	ISC_LIST_INITANDAPPEND(fctx->bad_edns, sa, link);
1612}
1613
1614static isc_boolean_t
1615triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1616	isc_sockaddr_t *sa;
1617
1618	for (sa = ISC_LIST_HEAD(fctx->edns);
1619	     sa != NULL;
1620	     sa = ISC_LIST_NEXT(sa, link)) {
1621		if (isc_sockaddr_equal(sa, address))
1622			return (ISC_TRUE);
1623	}
1624
1625	return (ISC_FALSE);
1626}
1627
1628static void
1629add_triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1630	isc_sockaddr_t *sa;
1631
1632	if (triededns(fctx, address))
1633		return;
1634
1635	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1636	if (sa == NULL)
1637		return;
1638
1639	*sa = *address;
1640	ISC_LIST_INITANDAPPEND(fctx->edns, sa, link);
1641}
1642
1643static isc_boolean_t
1644triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1645	isc_sockaddr_t *sa;
1646
1647	for (sa = ISC_LIST_HEAD(fctx->edns512);
1648	     sa != NULL;
1649	     sa = ISC_LIST_NEXT(sa, link)) {
1650		if (isc_sockaddr_equal(sa, address))
1651			return (ISC_TRUE);
1652	}
1653
1654	return (ISC_FALSE);
1655}
1656
1657static void
1658add_triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1659	isc_sockaddr_t *sa;
1660
1661	if (triededns512(fctx, address))
1662		return;
1663
1664	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1665	if (sa == NULL)
1666		return;
1667
1668	*sa = *address;
1669	ISC_LIST_INITANDAPPEND(fctx->edns512, sa, link);
1670}
1671
1672static isc_result_t
1673resquery_send(resquery_t *query) {
1674	fetchctx_t *fctx;
1675	isc_result_t result;
1676	dns_name_t *qname = NULL;
1677	dns_rdataset_t *qrdataset = NULL;
1678	isc_region_t r;
1679	dns_resolver_t *res;
1680	isc_task_t *task;
1681	isc_socket_t *socket;
1682	isc_buffer_t tcpbuffer;
1683	isc_sockaddr_t *address;
1684	isc_buffer_t *buffer;
1685	isc_netaddr_t ipaddr;
1686	dns_tsigkey_t *tsigkey = NULL;
1687	dns_peer_t *peer = NULL;
1688	isc_boolean_t useedns;
1689	dns_compress_t cctx;
1690	isc_boolean_t cleanup_cctx = ISC_FALSE;
1691	isc_boolean_t secure_domain;
1692	isc_boolean_t connecting = ISC_FALSE;
1693	dns_ednsopt_t ednsopts[EDNSOPTS];
1694	unsigned ednsopt = 0;
1695
1696	fctx = query->fctx;
1697	QTRACE("send");
1698
1699	res = fctx->res;
1700	task = res->buckets[fctx->bucketnum].task;
1701	address = NULL;
1702
1703	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1704		/*
1705		 * Reserve space for the TCP message length.
1706		 */
1707		isc_buffer_init(&tcpbuffer, query->data, sizeof(query->data));
1708		isc_buffer_init(&query->buffer, query->data + 2,
1709				sizeof(query->data) - 2);
1710		buffer = &tcpbuffer;
1711	} else {
1712		isc_buffer_init(&query->buffer, query->data,
1713				sizeof(query->data));
1714		buffer = &query->buffer;
1715	}
1716
1717	result = dns_message_gettempname(fctx->qmessage, &qname);
1718	if (result != ISC_R_SUCCESS)
1719		goto cleanup_temps;
1720	result = dns_message_gettemprdataset(fctx->qmessage, &qrdataset);
1721	if (result != ISC_R_SUCCESS)
1722		goto cleanup_temps;
1723
1724	/*
1725	 * Get a query id from the dispatch.
1726	 */
1727	result = dns_dispatch_addresponse2(query->dispatch,
1728					   &query->addrinfo->sockaddr,
1729					   task,
1730					   resquery_response,
1731					   query,
1732					   &query->id,
1733					   &query->dispentry,
1734					   res->socketmgr);
1735	if (result != ISC_R_SUCCESS)
1736		goto cleanup_temps;
1737
1738	fctx->qmessage->opcode = dns_opcode_query;
1739
1740	/*
1741	 * Set up question.
1742	 */
1743	dns_name_init(qname, NULL);
1744	dns_name_clone(&fctx->name, qname);
1745	dns_rdataset_init(qrdataset);
1746	dns_rdataset_makequestion(qrdataset, res->rdclass, fctx->type);
1747	ISC_LIST_APPEND(qname->list, qrdataset, link);
1748	dns_message_addname(fctx->qmessage, qname, DNS_SECTION_QUESTION);
1749	qname = NULL;
1750	qrdataset = NULL;
1751
1752	/*
1753	 * Set RD if the client has requested that we do a recursive query,
1754	 * or if we're sending to a forwarder.
1755	 */
1756	if ((query->options & DNS_FETCHOPT_RECURSIVE) != 0 ||
1757	    ISFORWARDER(query->addrinfo))
1758		fctx->qmessage->flags |= DNS_MESSAGEFLAG_RD;
1759
1760	/*
1761	 * Set CD if the client says don't validate or the question is
1762	 * under a secure entry point.
1763	 */
1764	if ((query->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
1765		fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1766	} else if (res->view->enablevalidation) {
1767		result = dns_view_issecuredomain(res->view, &fctx->name,
1768						 &secure_domain);
1769		if (result != ISC_R_SUCCESS)
1770			secure_domain = ISC_FALSE;
1771		if (res->view->dlv != NULL)
1772			secure_domain = ISC_TRUE;
1773		if (secure_domain)
1774			fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1775	}
1776
1777	/*
1778	 * We don't have to set opcode because it defaults to query.
1779	 */
1780	fctx->qmessage->id = query->id;
1781
1782	/*
1783	 * Convert the question to wire format.
1784	 */
1785	result = dns_compress_init(&cctx, -1, fctx->res->mctx);
1786	if (result != ISC_R_SUCCESS)
1787		goto cleanup_message;
1788	cleanup_cctx = ISC_TRUE;
1789
1790	result = dns_message_renderbegin(fctx->qmessage, &cctx,
1791					 &query->buffer);
1792	if (result != ISC_R_SUCCESS)
1793		goto cleanup_message;
1794
1795	result = dns_message_rendersection(fctx->qmessage,
1796					   DNS_SECTION_QUESTION, 0);
1797	if (result != ISC_R_SUCCESS)
1798		goto cleanup_message;
1799
1800	peer = NULL;
1801	isc_netaddr_fromsockaddr(&ipaddr, &query->addrinfo->sockaddr);
1802	(void) dns_peerlist_peerbyaddr(fctx->res->view->peers, &ipaddr, &peer);
1803
1804	/*
1805	 * The ADB does not know about servers with "edns no".  Check this,
1806	 * and then inform the ADB for future use.
1807	 */
1808	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0 &&
1809	    peer != NULL &&
1810	    dns_peer_getsupportedns(peer, &useedns) == ISC_R_SUCCESS &&
1811	    !useedns)
1812	{
1813		query->options |= DNS_FETCHOPT_NOEDNS0;
1814		dns_adb_changeflags(fctx->adb, query->addrinfo,
1815				    DNS_FETCHOPT_NOEDNS0,
1816				    DNS_FETCHOPT_NOEDNS0);
1817	}
1818
1819	/* Sync NOEDNS0 flag in addrinfo->flags and options now. */
1820	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) != 0)
1821		query->options |= DNS_FETCHOPT_NOEDNS0;
1822
1823	/*
1824	 * Handle timeouts by reducing the UDP response size to 512 bytes
1825	 * then if that doesn't work disabling EDNS (includes DO) and CD.
1826	 *
1827	 * These timeout can be due to:
1828	 *	* broken nameservers that don't respond to EDNS queries.
1829	 *	* broken/misconfigured firewalls and NAT implementations
1830	 *	  that don't handle IP fragmentation.
1831	 *	* broken/misconfigured firewalls that don't handle responses
1832	 *	  greater than 512 bytes.
1833	 *	* broken/misconfigured firewalls that don't handle EDNS, DO
1834	 *	  or CD.
1835	 *	* packet loss / link outage.
1836	 */
1837	if (fctx->timeout) {
1838		if ((triededns512(fctx, &query->addrinfo->sockaddr) ||
1839		     fctx->timeouts >= (MAX_EDNS0_TIMEOUTS * 2)) &&
1840		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1841			query->options |= DNS_FETCHOPT_NOEDNS0;
1842			fctx->reason = "disabling EDNS";
1843		} else if ((triededns(fctx, &query->addrinfo->sockaddr) ||
1844			    fctx->timeouts >= MAX_EDNS0_TIMEOUTS) &&
1845			   (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1846			query->options |= DNS_FETCHOPT_EDNS512;
1847			fctx->reason = "reducing the advertised EDNS UDP "
1848				       "packet size to 512 octets";
1849		}
1850		fctx->timeout = ISC_FALSE;
1851	}
1852
1853	/*
1854	 * Use EDNS0, unless the caller doesn't want it, or we know that
1855	 * the remote server doesn't like it.
1856	 */
1857	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1858		if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0) {
1859			unsigned int version = 0;       /* Default version. */
1860			unsigned int flags;
1861			isc_uint16_t udpsize = res->udpsize;
1862			isc_boolean_t reqnsid = res->view->requestnsid;
1863
1864			flags = query->addrinfo->flags;
1865			if ((flags & DNS_FETCHOPT_EDNSVERSIONSET) != 0) {
1866				version = flags & DNS_FETCHOPT_EDNSVERSIONMASK;
1867				version >>= DNS_FETCHOPT_EDNSVERSIONSHIFT;
1868			}
1869			if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1870				udpsize = 512;
1871			else if (peer != NULL)
1872				(void)dns_peer_getudpsize(peer, &udpsize);
1873
1874			/* request NSID for current view or peer? */
1875			if (peer != NULL)
1876				(void) dns_peer_getrequestnsid(peer, &reqnsid);
1877			if (reqnsid) {
1878				INSIST(ednsopt < EDNSOPTS);
1879				ednsopts[ednsopt].code = DNS_OPT_NSID;
1880				ednsopts[ednsopt].length = 0;
1881				ednsopts[ednsopt].value = NULL;
1882				ednsopt++;
1883			}
1884			result = fctx_addopt(fctx->qmessage, version,
1885					     udpsize, ednsopts, ednsopt);
1886			if (reqnsid && result == ISC_R_SUCCESS) {
1887				query->options |= DNS_FETCHOPT_WANTNSID;
1888			} else if (result != ISC_R_SUCCESS) {
1889				/*
1890				 * We couldn't add the OPT, but we'll press on.
1891				 * We're not using EDNS0, so set the NOEDNS0
1892				 * bit.
1893				 */
1894				query->options |= DNS_FETCHOPT_NOEDNS0;
1895			}
1896		} else {
1897			/*
1898			 * We know this server doesn't like EDNS0, so we
1899			 * won't use it.  Set the NOEDNS0 bit since we're
1900			 * not using EDNS0.
1901			 */
1902			query->options |= DNS_FETCHOPT_NOEDNS0;
1903		}
1904	}
1905
1906	/*
1907	 * If we need EDNS0 to do this query and aren't using it, we lose.
1908	 */
1909	if (NEEDEDNS0(fctx) && (query->options & DNS_FETCHOPT_NOEDNS0) != 0) {
1910		result = DNS_R_SERVFAIL;
1911		goto cleanup_message;
1912	}
1913
1914	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0)
1915		add_triededns(fctx, &query->addrinfo->sockaddr);
1916
1917	if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1918		add_triededns512(fctx, &query->addrinfo->sockaddr);
1919
1920	/*
1921	 * Clear CD if EDNS is not in use.
1922	 */
1923	if ((query->options & DNS_FETCHOPT_NOEDNS0) != 0)
1924		fctx->qmessage->flags &= ~DNS_MESSAGEFLAG_CD;
1925
1926	/*
1927	 * Add TSIG record tailored to the current recipient.
1928	 */
1929	result = dns_view_getpeertsig(fctx->res->view, &ipaddr, &tsigkey);
1930	if (result != ISC_R_SUCCESS && result != ISC_R_NOTFOUND)
1931		goto cleanup_message;
1932
1933	if (tsigkey != NULL) {
1934		result = dns_message_settsigkey(fctx->qmessage, tsigkey);
1935		dns_tsigkey_detach(&tsigkey);
1936		if (result != ISC_R_SUCCESS)
1937			goto cleanup_message;
1938	}
1939
1940	result = dns_message_rendersection(fctx->qmessage,
1941					   DNS_SECTION_ADDITIONAL, 0);
1942	if (result != ISC_R_SUCCESS)
1943		goto cleanup_message;
1944
1945	result = dns_message_renderend(fctx->qmessage);
1946	if (result != ISC_R_SUCCESS)
1947		goto cleanup_message;
1948
1949	dns_compress_invalidate(&cctx);
1950	cleanup_cctx = ISC_FALSE;
1951
1952	if (dns_message_gettsigkey(fctx->qmessage) != NULL) {
1953		dns_tsigkey_attach(dns_message_gettsigkey(fctx->qmessage),
1954				   &query->tsigkey);
1955		result = dns_message_getquerytsig(fctx->qmessage,
1956						  fctx->res->mctx,
1957						  &query->tsig);
1958		if (result != ISC_R_SUCCESS)
1959			goto cleanup_message;
1960	}
1961
1962	/*
1963	 * If using TCP, write the length of the message at the beginning
1964	 * of the buffer.
1965	 */
1966	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1967		isc_buffer_usedregion(&query->buffer, &r);
1968		isc_buffer_putuint16(&tcpbuffer, (isc_uint16_t)r.length);
1969		isc_buffer_add(&tcpbuffer, r.length);
1970	}
1971
1972	/*
1973	 * We're now done with the query message.
1974	 */
1975	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
1976
1977	if (query->exclusivesocket)
1978		socket = dns_dispatch_getentrysocket(query->dispentry);
1979	else
1980		socket = dns_dispatch_getsocket(query->dispatch);
1981	/*
1982	 * Send the query!
1983	 */
1984	if ((query->options & DNS_FETCHOPT_TCP) == 0) {
1985		address = &query->addrinfo->sockaddr;
1986		if (query->exclusivesocket) {
1987			result = isc_socket_connect(socket, address, task,
1988						    resquery_udpconnected,
1989						    query);
1990			if (result != ISC_R_SUCCESS)
1991				goto cleanup_message;
1992			connecting = ISC_TRUE;
1993			query->connects++;
1994		}
1995	}
1996	isc_buffer_usedregion(buffer, &r);
1997
1998	/*
1999	 * XXXRTH  Make sure we don't send to ourselves!  We should probably
2000	 *		prune out these addresses when we get them from the ADB.
2001	 */
2002	ISC_EVENT_INIT(&query->sendevent, sizeof(query->sendevent), 0, NULL,
2003		       ISC_SOCKEVENT_SENDDONE, resquery_senddone, query,
2004		       NULL, NULL, NULL);
2005	result = isc_socket_sendto2(socket, &r, task, address, NULL,
2006				    &query->sendevent, 0);
2007	if (result != ISC_R_SUCCESS) {
2008		if (connecting) {
2009			/*
2010			 * This query is still connecting.
2011			 * Mark it as canceled so that it will just be
2012			 * cleaned up when the connected event is received.
2013			 * Keep fctx around until the event is processed.
2014			 */
2015			query->fctx->nqueries++;
2016			query->attributes |= RESQUERY_ATTR_CANCELED;
2017		}
2018		goto cleanup_message;
2019	}
2020
2021	query->sends++;
2022
2023	QTRACE("sent");
2024
2025	return (ISC_R_SUCCESS);
2026
2027 cleanup_message:
2028	if (cleanup_cctx)
2029		dns_compress_invalidate(&cctx);
2030
2031	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
2032
2033	/*
2034	 * Stop the dispatcher from listening.
2035	 */
2036	dns_dispatch_removeresponse(&query->dispentry, NULL);
2037
2038 cleanup_temps:
2039	if (qname != NULL)
2040		dns_message_puttempname(fctx->qmessage, &qname);
2041	if (qrdataset != NULL)
2042		dns_message_puttemprdataset(fctx->qmessage, &qrdataset);
2043
2044	return (result);
2045}
2046
2047static void
2048resquery_connected(isc_task_t *task, isc_event_t *event) {
2049	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
2050	resquery_t *query = event->ev_arg;
2051	isc_boolean_t retry = ISC_FALSE;
2052	isc_interval_t interval;
2053	isc_result_t result;
2054	unsigned int attrs;
2055	fetchctx_t *fctx;
2056
2057	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
2058	REQUIRE(VALID_QUERY(query));
2059
2060	QTRACE("connected");
2061
2062	UNUSED(task);
2063
2064	/*
2065	 * XXXRTH
2066	 *
2067	 * Currently we don't wait for the connect event before retrying
2068	 * a query.  This means that if we get really behind, we may end
2069	 * up doing extra work!
2070	 */
2071
2072	query->connects--;
2073	fctx = query->fctx;
2074
2075	if (RESQUERY_CANCELED(query)) {
2076		/*
2077		 * This query was canceled while the connect() was in
2078		 * progress.
2079		 */
2080		isc_socket_detach(&query->tcpsocket);
2081		resquery_destroy(&query);
2082	} else {
2083		switch (sevent->result) {
2084		case ISC_R_SUCCESS:
2085
2086			/*
2087			 * Extend the idle timer for TCP.  20 seconds
2088			 * should be long enough for a TCP connection to be
2089			 * established, a single DNS request to be sent,
2090			 * and the response received.
2091			 */
2092			isc_interval_set(&interval, 20, 0);
2093			result = fctx_startidletimer(query->fctx, &interval);
2094			if (result != ISC_R_SUCCESS) {
2095				fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2096				fctx_done(fctx, result, __LINE__);
2097				break;
2098			}
2099			/*
2100			 * We are connected.  Create a dispatcher and
2101			 * send the query.
2102			 */
2103			attrs = 0;
2104			attrs |= DNS_DISPATCHATTR_TCP;
2105			attrs |= DNS_DISPATCHATTR_PRIVATE;
2106			attrs |= DNS_DISPATCHATTR_CONNECTED;
2107			if (isc_sockaddr_pf(&query->addrinfo->sockaddr) ==
2108			    AF_INET)
2109				attrs |= DNS_DISPATCHATTR_IPV4;
2110			else
2111				attrs |= DNS_DISPATCHATTR_IPV6;
2112			attrs |= DNS_DISPATCHATTR_MAKEQUERY;
2113
2114			result = dns_dispatch_createtcp(query->dispatchmgr,
2115						     query->tcpsocket,
2116						     query->fctx->res->taskmgr,
2117						     4096, 2, 1, 1, 3, attrs,
2118						     &query->dispatch);
2119
2120			/*
2121			 * Regardless of whether dns_dispatch_create()
2122			 * succeeded or not, we don't need our reference
2123			 * to the socket anymore.
2124			 */
2125			isc_socket_detach(&query->tcpsocket);
2126
2127			if (result == ISC_R_SUCCESS)
2128				result = resquery_send(query);
2129
2130			if (result != ISC_R_SUCCESS) {
2131				fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2132				fctx_done(fctx, result, __LINE__);
2133			}
2134			break;
2135
2136		case ISC_R_NETUNREACH:
2137		case ISC_R_HOSTUNREACH:
2138		case ISC_R_CONNREFUSED:
2139		case ISC_R_NOPERM:
2140		case ISC_R_ADDRNOTAVAIL:
2141		case ISC_R_CONNECTIONRESET:
2142			/*
2143			 * No route to remote.
2144			 */
2145			isc_socket_detach(&query->tcpsocket);
2146			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
2147			retry = ISC_TRUE;
2148			break;
2149
2150		default:
2151			isc_socket_detach(&query->tcpsocket);
2152			fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2153			break;
2154		}
2155	}
2156
2157	isc_event_free(&event);
2158
2159	if (retry) {
2160		/*
2161		 * Behave as if the idle timer has expired.  For TCP
2162		 * connections this may not actually reflect the latest timer.
2163		 */
2164		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2165		result = fctx_stopidletimer(fctx);
2166		if (result != ISC_R_SUCCESS)
2167			fctx_done(fctx, result, __LINE__);
2168		else
2169			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
2170	}
2171}
2172
2173static void
2174fctx_finddone(isc_task_t *task, isc_event_t *event) {
2175	fetchctx_t *fctx;
2176	dns_adbfind_t *find;
2177	dns_resolver_t *res;
2178	isc_boolean_t want_try = ISC_FALSE;
2179	isc_boolean_t want_done = ISC_FALSE;
2180	isc_boolean_t bucket_empty = ISC_FALSE;
2181	unsigned int bucketnum;
2182	isc_boolean_t destroy = ISC_FALSE;
2183
2184	find = event->ev_sender;
2185	fctx = event->ev_arg;
2186	REQUIRE(VALID_FCTX(fctx));
2187	res = fctx->res;
2188
2189	UNUSED(task);
2190
2191	FCTXTRACE("finddone");
2192
2193	bucketnum = fctx->bucketnum;
2194	LOCK(&res->buckets[bucketnum].lock);
2195
2196	INSIST(fctx->pending > 0);
2197	fctx->pending--;
2198
2199	if (ADDRWAIT(fctx)) {
2200		/*
2201		 * The fetch is waiting for a name to be found.
2202		 */
2203		INSIST(!SHUTTINGDOWN(fctx));
2204		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2205		if (event->ev_type == DNS_EVENT_ADBMOREADDRESSES) {
2206			want_try = ISC_TRUE;
2207			fctx->totalqueries += find->qtotal;
2208		} else {
2209			fctx->findfail++;
2210			if (fctx->pending == 0) {
2211				/*
2212				 * We've got nothing else to wait for and don't
2213				 * know the answer.  There's nothing to do but
2214				 * fail the fctx.
2215				 */
2216				want_done = ISC_TRUE;
2217			}
2218		}
2219	} else if (SHUTTINGDOWN(fctx) && fctx->pending == 0 &&
2220		   fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) {
2221
2222		if (fctx->references == 0) {
2223			bucket_empty = fctx_unlink(fctx);
2224			destroy = ISC_TRUE;
2225		}
2226	}
2227	UNLOCK(&res->buckets[bucketnum].lock);
2228
2229	isc_event_free(&event);
2230	dns_adb_destroyfind(&find);
2231
2232	if (want_try)
2233		fctx_try(fctx, ISC_TRUE, ISC_FALSE);
2234	else if (want_done)
2235		fctx_done(fctx, ISC_R_FAILURE, __LINE__);
2236	else if (destroy) {
2237			fctx_destroy(fctx);
2238		if (bucket_empty)
2239			empty_bucket(res);
2240	}
2241}
2242
2243
2244static inline isc_boolean_t
2245bad_server(fetchctx_t *fctx, isc_sockaddr_t *address) {
2246	isc_sockaddr_t *sa;
2247
2248	for (sa = ISC_LIST_HEAD(fctx->bad);
2249	     sa != NULL;
2250	     sa = ISC_LIST_NEXT(sa, link)) {
2251		if (isc_sockaddr_equal(sa, address))
2252			return (ISC_TRUE);
2253	}
2254
2255	return (ISC_FALSE);
2256}
2257
2258static inline isc_boolean_t
2259mark_bad(fetchctx_t *fctx) {
2260	dns_adbfind_t *curr;
2261	dns_adbaddrinfo_t *addrinfo;
2262	isc_boolean_t all_bad = ISC_TRUE;
2263
2264	/*
2265	 * Mark all known bad servers, so we don't try to talk to them
2266	 * again.
2267	 */
2268
2269	/*
2270	 * Mark any bad nameservers.
2271	 */
2272	for (curr = ISC_LIST_HEAD(fctx->finds);
2273	     curr != NULL;
2274	     curr = ISC_LIST_NEXT(curr, publink)) {
2275		for (addrinfo = ISC_LIST_HEAD(curr->list);
2276		     addrinfo != NULL;
2277		     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2278			if (bad_server(fctx, &addrinfo->sockaddr))
2279				addrinfo->flags |= FCTX_ADDRINFO_MARK;
2280			else
2281				all_bad = ISC_FALSE;
2282		}
2283	}
2284
2285	/*
2286	 * Mark any bad forwarders.
2287	 */
2288	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
2289	     addrinfo != NULL;
2290	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2291		if (bad_server(fctx, &addrinfo->sockaddr))
2292			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2293		else
2294			all_bad = ISC_FALSE;
2295	}
2296
2297	/*
2298	 * Mark any bad alternates.
2299	 */
2300	for (curr = ISC_LIST_HEAD(fctx->altfinds);
2301	     curr != NULL;
2302	     curr = ISC_LIST_NEXT(curr, publink)) {
2303		for (addrinfo = ISC_LIST_HEAD(curr->list);
2304		     addrinfo != NULL;
2305		     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2306			if (bad_server(fctx, &addrinfo->sockaddr))
2307				addrinfo->flags |= FCTX_ADDRINFO_MARK;
2308			else
2309				all_bad = ISC_FALSE;
2310		}
2311	}
2312
2313	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
2314	     addrinfo != NULL;
2315	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2316		if (bad_server(fctx, &addrinfo->sockaddr))
2317			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2318		else
2319			all_bad = ISC_FALSE;
2320	}
2321
2322	return (all_bad);
2323}
2324
2325static void
2326add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_result_t reason,
2327	badnstype_t badtype)
2328{
2329	char namebuf[DNS_NAME_FORMATSIZE];
2330	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
2331	char classbuf[64];
2332	char typebuf[64];
2333	char code[64];
2334	isc_buffer_t b;
2335	isc_sockaddr_t *sa;
2336	const char *spc = "";
2337	isc_sockaddr_t *address = &addrinfo->sockaddr;
2338
2339	if (reason == DNS_R_LAME)
2340		fctx->lamecount++;
2341	else {
2342		switch (badtype) {
2343		case badns_unreachable:
2344			fctx->neterr++;
2345			break;
2346		case badns_response:
2347			fctx->badresp++;
2348			break;
2349		case badns_validation:
2350			break;	/* counted as 'valfail' */
2351		}
2352	}
2353
2354	if (bad_server(fctx, address)) {
2355		/*
2356		 * We already know this server is bad.
2357		 */
2358		return;
2359	}
2360
2361	FCTXTRACE("add_bad");
2362
2363	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
2364	if (sa == NULL)
2365		return;
2366	*sa = *address;
2367	ISC_LIST_INITANDAPPEND(fctx->bad, sa, link);
2368
2369	if (reason == DNS_R_LAME)       /* already logged */
2370		return;
2371
2372	if (reason == DNS_R_UNEXPECTEDRCODE &&
2373	    fctx->rmessage->rcode == dns_rcode_servfail &&
2374	    ISFORWARDER(addrinfo))
2375		return;
2376
2377	if (reason == DNS_R_UNEXPECTEDRCODE) {
2378		isc_buffer_init(&b, code, sizeof(code) - 1);
2379		dns_rcode_totext(fctx->rmessage->rcode, &b);
2380		code[isc_buffer_usedlength(&b)] = '\0';
2381		spc = " ";
2382	} else if (reason == DNS_R_UNEXPECTEDOPCODE) {
2383		isc_buffer_init(&b, code, sizeof(code) - 1);
2384		dns_opcode_totext((dns_opcode_t)fctx->rmessage->opcode, &b);
2385		code[isc_buffer_usedlength(&b)] = '\0';
2386		spc = " ";
2387	} else {
2388		code[0] = '\0';
2389	}
2390	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
2391	dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
2392	dns_rdataclass_format(fctx->res->rdclass, classbuf, sizeof(classbuf));
2393	isc_sockaddr_format(address, addrbuf, sizeof(addrbuf));
2394	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
2395		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
2396		      "error (%s%s%s) resolving '%s/%s/%s': %s",
2397		      dns_result_totext(reason), spc, code,
2398		      namebuf, typebuf, classbuf, addrbuf);
2399}
2400
2401/*
2402 * Sort addrinfo list by RTT.
2403 */
2404static void
2405sort_adbfind(dns_adbfind_t *find) {
2406	dns_adbaddrinfo_t *best, *curr;
2407	dns_adbaddrinfolist_t sorted;
2408
2409	/* Lame N^2 bubble sort. */
2410	ISC_LIST_INIT(sorted);
2411	while (!ISC_LIST_EMPTY(find->list)) {
2412		best = ISC_LIST_HEAD(find->list);
2413		curr = ISC_LIST_NEXT(best, publink);
2414		while (curr != NULL) {
2415			if (curr->srtt < best->srtt)
2416				best = curr;
2417			curr = ISC_LIST_NEXT(curr, publink);
2418		}
2419		ISC_LIST_UNLINK(find->list, best, publink);
2420		ISC_LIST_APPEND(sorted, best, publink);
2421	}
2422	find->list = sorted;
2423}
2424
2425/*
2426 * Sort a list of finds by server RTT.
2427 */
2428static void
2429sort_finds(dns_adbfindlist_t *findlist) {
2430	dns_adbfind_t *best, *curr;
2431	dns_adbfindlist_t sorted;
2432	dns_adbaddrinfo_t *addrinfo, *bestaddrinfo;
2433
2434	/* Sort each find's addrinfo list by SRTT. */
2435	for (curr = ISC_LIST_HEAD(*findlist);
2436	     curr != NULL;
2437	     curr = ISC_LIST_NEXT(curr, publink))
2438		sort_adbfind(curr);
2439
2440	/* Lame N^2 bubble sort. */
2441	ISC_LIST_INIT(sorted);
2442	while (!ISC_LIST_EMPTY(*findlist)) {
2443		best = ISC_LIST_HEAD(*findlist);
2444		bestaddrinfo = ISC_LIST_HEAD(best->list);
2445		INSIST(bestaddrinfo != NULL);
2446		curr = ISC_LIST_NEXT(best, publink);
2447		while (curr != NULL) {
2448			addrinfo = ISC_LIST_HEAD(curr->list);
2449			INSIST(addrinfo != NULL);
2450			if (addrinfo->srtt < bestaddrinfo->srtt) {
2451				best = curr;
2452				bestaddrinfo = addrinfo;
2453			}
2454			curr = ISC_LIST_NEXT(curr, publink);
2455		}
2456		ISC_LIST_UNLINK(*findlist, best, publink);
2457		ISC_LIST_APPEND(sorted, best, publink);
2458	}
2459	*findlist = sorted;
2460}
2461
2462static void
2463findname(fetchctx_t *fctx, dns_name_t *name, in_port_t port,
2464	 unsigned int options, unsigned int flags, isc_stdtime_t now,
2465	 isc_boolean_t *need_alternate)
2466{
2467	dns_adbaddrinfo_t *ai;
2468	dns_adbfind_t *find;
2469	dns_resolver_t *res;
2470	isc_boolean_t unshared;
2471	isc_result_t result;
2472
2473	res = fctx->res;
2474	unshared = ISC_TF((fctx->options & DNS_FETCHOPT_UNSHARED) != 0);
2475	/*
2476	 * If this name is a subdomain of the query domain, tell
2477	 * the ADB to start looking using zone/hint data. This keeps us
2478	 * from getting stuck if the nameserver is beneath the zone cut
2479	 * and we don't know its address (e.g. because the A record has
2480	 * expired).
2481	 */
2482	if (dns_name_issubdomain(name, &fctx->domain))
2483		options |= DNS_ADBFIND_STARTATZONE;
2484	options |= DNS_ADBFIND_GLUEOK;
2485	options |= DNS_ADBFIND_HINTOK;
2486
2487	/*
2488	 * See what we know about this address.
2489	 */
2490	find = NULL;
2491	result = dns_adb_createfind2(fctx->adb,
2492				     res->buckets[fctx->bucketnum].task,
2493				     fctx_finddone, fctx, name,
2494				     &fctx->name, fctx->type,
2495				     options, now, NULL,
2496				     res->view->dstport,
2497				     fctx->depth + 1, &find);
2498	if (result != ISC_R_SUCCESS) {
2499		if (result == DNS_R_ALIAS) {
2500			/*
2501			 * XXXRTH  Follow the CNAME/DNAME chain?
2502			 */
2503			dns_adb_destroyfind(&find);
2504			fctx->adberr++;
2505		}
2506	} else if (!ISC_LIST_EMPTY(find->list)) {
2507		/*
2508		 * We have at least some of the addresses for the
2509		 * name.
2510		 */
2511		INSIST((find->options & DNS_ADBFIND_WANTEVENT) == 0);
2512		if (flags != 0 || port != 0) {
2513			for (ai = ISC_LIST_HEAD(find->list);
2514			     ai != NULL;
2515			     ai = ISC_LIST_NEXT(ai, publink)) {
2516				ai->flags |= flags;
2517				if (port != 0)
2518					isc_sockaddr_setport(&ai->sockaddr,
2519							     port);
2520			}
2521		}
2522		if ((flags & FCTX_ADDRINFO_FORWARDER) != 0)
2523			ISC_LIST_APPEND(fctx->altfinds, find, publink);
2524		else
2525			ISC_LIST_APPEND(fctx->finds, find, publink);
2526	} else {
2527		/*
2528		 * We don't know any of the addresses for this
2529		 * name.
2530		 */
2531		if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
2532			/*
2533			 * We're looking for them and will get an
2534			 * event about it later.
2535			 */
2536			fctx->pending++;
2537			/*
2538			 * Bootstrap.
2539			 */
2540			if (need_alternate != NULL &&
2541			    !*need_alternate && unshared &&
2542			    ((res->dispatches4 == NULL &&
2543			      find->result_v6 != DNS_R_NXDOMAIN) ||
2544			     (res->dispatches6 == NULL &&
2545			      find->result_v4 != DNS_R_NXDOMAIN)))
2546				*need_alternate = ISC_TRUE;
2547		} else {
2548			if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0)
2549				fctx->lamecount++; /* cached lame server */
2550			else
2551				fctx->adberr++; /* unreachable server, etc. */
2552
2553			/*
2554			 * If we know there are no addresses for
2555			 * the family we are using then try to add
2556			 * an alternative server.
2557			 */
2558			if (need_alternate != NULL && !*need_alternate &&
2559			    ((res->dispatches4 == NULL &&
2560			      find->result_v6 == DNS_R_NXRRSET) ||
2561			     (res->dispatches6 == NULL &&
2562			      find->result_v4 == DNS_R_NXRRSET)))
2563				*need_alternate = ISC_TRUE;
2564			dns_adb_destroyfind(&find);
2565		}
2566	}
2567}
2568
2569static isc_boolean_t
2570isstrictsubdomain(dns_name_t *name1, dns_name_t *name2) {
2571	int order;
2572	unsigned int nlabels;
2573	dns_namereln_t namereln;
2574
2575	namereln = dns_name_fullcompare(name1, name2, &order, &nlabels);
2576	return (ISC_TF(namereln == dns_namereln_subdomain));
2577}
2578
2579static isc_result_t
2580fctx_getaddresses(fetchctx_t *fctx, isc_boolean_t badcache) {
2581	dns_rdata_t rdata = DNS_RDATA_INIT;
2582	isc_result_t result;
2583	dns_resolver_t *res;
2584	isc_stdtime_t now;
2585	unsigned int stdoptions = 0;
2586	isc_sockaddr_t *sa;
2587	dns_adbaddrinfo_t *ai;
2588	isc_boolean_t all_bad;
2589	dns_rdata_ns_t ns;
2590	isc_boolean_t need_alternate = ISC_FALSE;
2591
2592	FCTXTRACE("getaddresses");
2593
2594	/*
2595	 * Don't pound on remote servers.  (Failsafe!)
2596	 */
2597	fctx->restarts++;
2598	if (fctx->restarts > 10) {
2599		FCTXTRACE("too many restarts");
2600		return (DNS_R_SERVFAIL);
2601	}
2602
2603	res = fctx->res;
2604
2605	if (fctx->depth > res->maxdepth) {
2606		FCTXTRACE("too much NS indirection");
2607		return (DNS_R_SERVFAIL);
2608	}
2609
2610	/*
2611	 * Forwarders.
2612	 */
2613
2614	INSIST(ISC_LIST_EMPTY(fctx->forwaddrs));
2615	INSIST(ISC_LIST_EMPTY(fctx->altaddrs));
2616
2617	/*
2618	 * If this fctx has forwarders, use them; otherwise use any
2619	 * selective forwarders specified in the view; otherwise use the
2620	 * resolver's forwarders (if any).
2621	 */
2622	sa = ISC_LIST_HEAD(fctx->forwarders);
2623	if (sa == NULL) {
2624		dns_forwarders_t *forwarders = NULL;
2625		dns_name_t *name = &fctx->name;
2626		dns_name_t suffix;
2627		unsigned int labels;
2628		dns_fixedname_t fixed;
2629		dns_name_t *domain;
2630
2631		/*
2632		 * DS records are found in the parent server.
2633		 * Strip label to get the correct forwarder (if any).
2634		 */
2635		if (dns_rdatatype_atparent(fctx->type) &&
2636		    dns_name_countlabels(name) > 1) {
2637			dns_name_init(&suffix, NULL);
2638			labels = dns_name_countlabels(name);
2639			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
2640			name = &suffix;
2641		}
2642
2643		dns_fixedname_init(&fixed);
2644		domain = dns_fixedname_name(&fixed);
2645		result = dns_fwdtable_find2(fctx->res->view->fwdtable, name,
2646					    domain, &forwarders);
2647		if (result == ISC_R_SUCCESS) {
2648			sa = ISC_LIST_HEAD(forwarders->addrs);
2649			fctx->fwdpolicy = forwarders->fwdpolicy;
2650			if (fctx->fwdpolicy == dns_fwdpolicy_only &&
2651			    isstrictsubdomain(domain, &fctx->domain)) {
2652				dns_name_free(&fctx->domain, fctx->mctx);
2653				dns_name_init(&fctx->domain, NULL);
2654				result = dns_name_dup(domain, fctx->mctx,
2655						      &fctx->domain);
2656				if (result != ISC_R_SUCCESS)
2657					return (result);
2658			}
2659		}
2660	}
2661
2662	while (sa != NULL) {
2663		if ((isc_sockaddr_pf(sa) == AF_INET &&
2664			 fctx->res->dispatches4 == NULL) ||
2665		    (isc_sockaddr_pf(sa) == AF_INET6 &&
2666			fctx->res->dispatches6 == NULL)) {
2667				sa = ISC_LIST_NEXT(sa, link);
2668				continue;
2669		}
2670		ai = NULL;
2671		result = dns_adb_findaddrinfo(fctx->adb,
2672					      sa, &ai, 0);  /* XXXMLG */
2673		if (result == ISC_R_SUCCESS) {
2674			dns_adbaddrinfo_t *cur;
2675			ai->flags |= FCTX_ADDRINFO_FORWARDER;
2676			cur = ISC_LIST_HEAD(fctx->forwaddrs);
2677			while (cur != NULL && cur->srtt < ai->srtt)
2678				cur = ISC_LIST_NEXT(cur, publink);
2679			if (cur != NULL)
2680				ISC_LIST_INSERTBEFORE(fctx->forwaddrs, cur,
2681						      ai, publink);
2682			else
2683				ISC_LIST_APPEND(fctx->forwaddrs, ai, publink);
2684		}
2685		sa = ISC_LIST_NEXT(sa, link);
2686	}
2687
2688	/*
2689	 * If the forwarding policy is "only", we don't need the addresses
2690	 * of the nameservers.
2691	 */
2692	if (fctx->fwdpolicy == dns_fwdpolicy_only)
2693		goto out;
2694
2695	/*
2696	 * Normal nameservers.
2697	 */
2698
2699	stdoptions = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_EMPTYEVENT;
2700	if (fctx->restarts == 1) {
2701		/*
2702		 * To avoid sending out a flood of queries likely to
2703		 * result in NXRRSET, we suppress fetches for address
2704		 * families we don't have the first time through,
2705		 * provided that we have addresses in some family we
2706		 * can use.
2707		 *
2708		 * We don't want to set this option all the time, since
2709		 * if fctx->restarts > 1, we've clearly been having trouble
2710		 * with the addresses we had, so getting more could help.
2711		 */
2712		stdoptions |= DNS_ADBFIND_AVOIDFETCHES;
2713	}
2714	if (res->dispatches4 != NULL)
2715		stdoptions |= DNS_ADBFIND_INET;
2716	if (res->dispatches6 != NULL)
2717		stdoptions |= DNS_ADBFIND_INET6;
2718	isc_stdtime_get(&now);
2719
2720	INSIST(ISC_LIST_EMPTY(fctx->finds));
2721	INSIST(ISC_LIST_EMPTY(fctx->altfinds));
2722
2723	for (result = dns_rdataset_first(&fctx->nameservers);
2724	     result == ISC_R_SUCCESS;
2725	     result = dns_rdataset_next(&fctx->nameservers))
2726	{
2727		dns_rdataset_current(&fctx->nameservers, &rdata);
2728		/*
2729		 * Extract the name from the NS record.
2730		 */
2731		result = dns_rdata_tostruct(&rdata, &ns, NULL);
2732		if (result != ISC_R_SUCCESS)
2733			continue;
2734
2735		findname(fctx, &ns.name, 0, stdoptions, 0, now,
2736			 &need_alternate);
2737		dns_rdata_reset(&rdata);
2738		dns_rdata_freestruct(&ns);
2739	}
2740	if (result != ISC_R_NOMORE)
2741		return (result);
2742
2743	/*
2744	 * Do we need to use 6 to 4?
2745	 */
2746	if (need_alternate) {
2747		int family;
2748		alternate_t *a;
2749		family = (res->dispatches6 != NULL) ? AF_INET6 : AF_INET;
2750		for (a = ISC_LIST_HEAD(fctx->res->alternates);
2751		     a != NULL;
2752		     a = ISC_LIST_NEXT(a, link)) {
2753			if (!a->isaddress) {
2754				findname(fctx, &a->_u._n.name, a->_u._n.port,
2755					 stdoptions, FCTX_ADDRINFO_FORWARDER,
2756					 now, NULL);
2757				continue;
2758			}
2759			if (isc_sockaddr_pf(&a->_u.addr) != family)
2760				continue;
2761			ai = NULL;
2762			result = dns_adb_findaddrinfo(fctx->adb, &a->_u.addr,
2763						      &ai, 0);
2764			if (result == ISC_R_SUCCESS) {
2765				dns_adbaddrinfo_t *cur;
2766				ai->flags |= FCTX_ADDRINFO_FORWARDER;
2767				cur = ISC_LIST_HEAD(fctx->altaddrs);
2768				while (cur != NULL && cur->srtt < ai->srtt)
2769					cur = ISC_LIST_NEXT(cur, publink);
2770				if (cur != NULL)
2771					ISC_LIST_INSERTBEFORE(fctx->altaddrs,
2772							      cur, ai, publink);
2773				else
2774					ISC_LIST_APPEND(fctx->altaddrs, ai,
2775							publink);
2776			}
2777		}
2778	}
2779
2780 out:
2781	/*
2782	 * Mark all known bad servers.
2783	 */
2784	all_bad = mark_bad(fctx);
2785
2786	/*
2787	 * How are we doing?
2788	 */
2789	if (all_bad) {
2790		/*
2791		 * We've got no addresses.
2792		 */
2793		if (fctx->pending > 0) {
2794			/*
2795			 * We're fetching the addresses, but don't have any
2796			 * yet.   Tell the caller to wait for an answer.
2797			 */
2798			result = DNS_R_WAIT;
2799		} else {
2800			isc_time_t expire;
2801			isc_interval_t i;
2802			/*
2803			 * We've lost completely.  We don't know any
2804			 * addresses, and the ADB has told us it can't get
2805			 * them.
2806			 */
2807			FCTXTRACE("no addresses");
2808			isc_interval_set(&i, DNS_BADCACHE_TTL(fctx), 0);
2809			result = isc_time_nowplusinterval(&expire, &i);
2810			if (badcache &&
2811			    (fctx->type == dns_rdatatype_dnskey ||
2812			     fctx->type == dns_rdatatype_dlv ||
2813			     fctx->type == dns_rdatatype_ds) &&
2814			     result == ISC_R_SUCCESS)
2815				dns_resolver_addbadcache(fctx->res,
2816							 &fctx->name,
2817							 fctx->type, &expire);
2818			result = ISC_R_FAILURE;
2819		}
2820	} else {
2821		/*
2822		 * We've found some addresses.  We might still be looking
2823		 * for more addresses.
2824		 */
2825		sort_finds(&fctx->finds);
2826		sort_finds(&fctx->altfinds);
2827		result = ISC_R_SUCCESS;
2828	}
2829
2830	return (result);
2831}
2832
2833static inline void
2834possibly_mark(fetchctx_t *fctx, dns_adbaddrinfo_t *addr)
2835{
2836	isc_netaddr_t na;
2837	char buf[ISC_NETADDR_FORMATSIZE];
2838	isc_sockaddr_t *sa;
2839	isc_boolean_t aborted = ISC_FALSE;
2840	isc_boolean_t bogus;
2841	dns_acl_t *blackhole;
2842	isc_netaddr_t ipaddr;
2843	dns_peer_t *peer = NULL;
2844	dns_resolver_t *res;
2845	const char *msg = NULL;
2846
2847	sa = &addr->sockaddr;
2848
2849	res = fctx->res;
2850	isc_netaddr_fromsockaddr(&ipaddr, sa);
2851	blackhole = dns_dispatchmgr_getblackhole(res->dispatchmgr);
2852	(void) dns_peerlist_peerbyaddr(res->view->peers, &ipaddr, &peer);
2853
2854	if (blackhole != NULL) {
2855		int match;
2856
2857		if (dns_acl_match(&ipaddr, NULL, blackhole,
2858				  &res->view->aclenv,
2859				  &match, NULL) == ISC_R_SUCCESS &&
2860		    match > 0)
2861			aborted = ISC_TRUE;
2862	}
2863
2864	if (peer != NULL &&
2865	    dns_peer_getbogus(peer, &bogus) == ISC_R_SUCCESS &&
2866	    bogus)
2867		aborted = ISC_TRUE;
2868
2869	if (aborted) {
2870		addr->flags |= FCTX_ADDRINFO_MARK;
2871		msg = "ignoring blackholed / bogus server: ";
2872	} else if (isc_sockaddr_ismulticast(sa)) {
2873		addr->flags |= FCTX_ADDRINFO_MARK;
2874		msg = "ignoring multicast address: ";
2875	} else if (isc_sockaddr_isexperimental(sa)) {
2876		addr->flags |= FCTX_ADDRINFO_MARK;
2877		msg = "ignoring experimental address: ";
2878	} else if (sa->type.sa.sa_family != AF_INET6) {
2879		return;
2880	} else if (IN6_IS_ADDR_V4MAPPED(&sa->type.sin6.sin6_addr)) {
2881		addr->flags |= FCTX_ADDRINFO_MARK;
2882		msg = "ignoring IPv6 mapped IPV4 address: ";
2883	} else if (IN6_IS_ADDR_V4COMPAT(&sa->type.sin6.sin6_addr)) {
2884		addr->flags |= FCTX_ADDRINFO_MARK;
2885		msg = "ignoring IPv6 compatibility IPV4 address: ";
2886	} else
2887		return;
2888
2889	if (!isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(3)))
2890		return;
2891
2892	isc_netaddr_fromsockaddr(&na, sa);
2893	isc_netaddr_format(&na, buf, sizeof(buf));
2894	FCTXTRACE2(msg, buf);
2895}
2896
2897static inline dns_adbaddrinfo_t *
2898fctx_nextaddress(fetchctx_t *fctx) {
2899	dns_adbfind_t *find, *start;
2900	dns_adbaddrinfo_t *addrinfo;
2901	dns_adbaddrinfo_t *faddrinfo;
2902
2903	/*
2904	 * Return the next untried address, if any.
2905	 */
2906
2907	/*
2908	 * Find the first unmarked forwarder (if any).
2909	 */
2910	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
2911	     addrinfo != NULL;
2912	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2913		if (!UNMARKED(addrinfo))
2914			continue;
2915		possibly_mark(fctx, addrinfo);
2916		if (UNMARKED(addrinfo)) {
2917			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2918			fctx->find = NULL;
2919			return (addrinfo);
2920		}
2921	}
2922
2923	/*
2924	 * No forwarders.  Move to the next find.
2925	 */
2926
2927	fctx->attributes |= FCTX_ATTR_TRIEDFIND;
2928
2929	find = fctx->find;
2930	if (find == NULL)
2931		find = ISC_LIST_HEAD(fctx->finds);
2932	else {
2933		find = ISC_LIST_NEXT(find, publink);
2934		if (find == NULL)
2935			find = ISC_LIST_HEAD(fctx->finds);
2936	}
2937
2938	/*
2939	 * Find the first unmarked addrinfo.
2940	 */
2941	addrinfo = NULL;
2942	if (find != NULL) {
2943		start = find;
2944		do {
2945			for (addrinfo = ISC_LIST_HEAD(find->list);
2946			     addrinfo != NULL;
2947			     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2948				if (!UNMARKED(addrinfo))
2949					continue;
2950				possibly_mark(fctx, addrinfo);
2951				if (UNMARKED(addrinfo)) {
2952					addrinfo->flags |= FCTX_ADDRINFO_MARK;
2953					break;
2954				}
2955			}
2956			if (addrinfo != NULL)
2957				break;
2958			find = ISC_LIST_NEXT(find, publink);
2959			if (find == NULL)
2960				find = ISC_LIST_HEAD(fctx->finds);
2961		} while (find != start);
2962	}
2963
2964	fctx->find = find;
2965	if (addrinfo != NULL)
2966		return (addrinfo);
2967
2968	/*
2969	 * No nameservers left.  Try alternates.
2970	 */
2971
2972	fctx->attributes |= FCTX_ATTR_TRIEDALT;
2973
2974	find = fctx->altfind;
2975	if (find == NULL)
2976		find = ISC_LIST_HEAD(fctx->altfinds);
2977	else {
2978		find = ISC_LIST_NEXT(find, publink);
2979		if (find == NULL)
2980			find = ISC_LIST_HEAD(fctx->altfinds);
2981	}
2982
2983	/*
2984	 * Find the first unmarked addrinfo.
2985	 */
2986	addrinfo = NULL;
2987	if (find != NULL) {
2988		start = find;
2989		do {
2990			for (addrinfo = ISC_LIST_HEAD(find->list);
2991			     addrinfo != NULL;
2992			     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2993				if (!UNMARKED(addrinfo))
2994					continue;
2995				possibly_mark(fctx, addrinfo);
2996				if (UNMARKED(addrinfo)) {
2997					addrinfo->flags |= FCTX_ADDRINFO_MARK;
2998					break;
2999				}
3000			}
3001			if (addrinfo != NULL)
3002				break;
3003			find = ISC_LIST_NEXT(find, publink);
3004			if (find == NULL)
3005				find = ISC_LIST_HEAD(fctx->altfinds);
3006		} while (find != start);
3007	}
3008
3009	faddrinfo = addrinfo;
3010
3011	/*
3012	 * See if we have a better alternate server by address.
3013	 */
3014
3015	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
3016	     addrinfo != NULL;
3017	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
3018		if (!UNMARKED(addrinfo))
3019			continue;
3020		possibly_mark(fctx, addrinfo);
3021		if (UNMARKED(addrinfo) &&
3022		    (faddrinfo == NULL ||
3023		     addrinfo->srtt < faddrinfo->srtt)) {
3024			if (faddrinfo != NULL)
3025				faddrinfo->flags &= ~FCTX_ADDRINFO_MARK;
3026			addrinfo->flags |= FCTX_ADDRINFO_MARK;
3027			break;
3028		}
3029	}
3030
3031	if (addrinfo == NULL) {
3032		addrinfo = faddrinfo;
3033		fctx->altfind = find;
3034	}
3035
3036	return (addrinfo);
3037}
3038
3039static void
3040fctx_try(fetchctx_t *fctx, isc_boolean_t retrying, isc_boolean_t badcache) {
3041	isc_result_t result;
3042	dns_adbaddrinfo_t *addrinfo;
3043
3044	FCTXTRACE("try");
3045
3046	REQUIRE(!ADDRWAIT(fctx));
3047
3048	if (fctx->totalqueries > DEFAULT_MAX_QUERIES)
3049		fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
3050
3051	addrinfo = fctx_nextaddress(fctx);
3052	if (addrinfo == NULL) {
3053		/*
3054		 * We have no more addresses.  Start over.
3055		 */
3056		fctx_cancelqueries(fctx, ISC_TRUE);
3057		fctx_cleanupfinds(fctx);
3058		fctx_cleanupaltfinds(fctx);
3059		fctx_cleanupforwaddrs(fctx);
3060		fctx_cleanupaltaddrs(fctx);
3061		result = fctx_getaddresses(fctx, badcache);
3062		if (result == DNS_R_WAIT) {
3063			/*
3064			 * Sleep waiting for addresses.
3065			 */
3066			FCTXTRACE("addrwait");
3067			fctx->attributes |= FCTX_ATTR_ADDRWAIT;
3068			return;
3069		} else if (result != ISC_R_SUCCESS) {
3070			/*
3071			 * Something bad happened.
3072			 */
3073			fctx_done(fctx, result, __LINE__);
3074			return;
3075		}
3076
3077		addrinfo = fctx_nextaddress(fctx);
3078		/*
3079		 * While we may have addresses from the ADB, they
3080		 * might be bad ones.  In this case, return SERVFAIL.
3081		 */
3082		if (addrinfo == NULL) {
3083			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
3084			return;
3085		}
3086	}
3087
3088	result = fctx_query(fctx, addrinfo, fctx->options);
3089	if (result != ISC_R_SUCCESS)
3090		fctx_done(fctx, result, __LINE__);
3091	else if (retrying)
3092		inc_stats(fctx->res, dns_resstatscounter_retry);
3093}
3094
3095static isc_boolean_t
3096fctx_unlink(fetchctx_t *fctx) {
3097	dns_resolver_t *res;
3098	unsigned int bucketnum;
3099
3100	/*
3101	 * Caller must be holding the bucket lock.
3102	 */
3103
3104	REQUIRE(VALID_FCTX(fctx));
3105	REQUIRE(fctx->state == fetchstate_done ||
3106		fctx->state == fetchstate_init);
3107	REQUIRE(ISC_LIST_EMPTY(fctx->events));
3108	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
3109	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
3110	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
3111	REQUIRE(fctx->pending == 0);
3112	REQUIRE(fctx->references == 0);
3113	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
3114
3115	FCTXTRACE("unlink");
3116
3117	res = fctx->res;
3118	bucketnum = fctx->bucketnum;
3119
3120	ISC_LIST_UNLINK(res->buckets[bucketnum].fctxs, fctx, link);
3121
3122	LOCK(&res->nlock);
3123	res->nfctx--;
3124	UNLOCK(&res->nlock);
3125
3126	if (res->buckets[bucketnum].exiting &&
3127	    ISC_LIST_EMPTY(res->buckets[bucketnum].fctxs))
3128		return (ISC_TRUE);
3129
3130	return (ISC_FALSE);
3131}
3132
3133static void
3134fctx_destroy(fetchctx_t *fctx) {
3135	isc_sockaddr_t *sa, *next_sa;
3136
3137	REQUIRE(VALID_FCTX(fctx));
3138	REQUIRE(fctx->state == fetchstate_done ||
3139		fctx->state == fetchstate_init);
3140	REQUIRE(ISC_LIST_EMPTY(fctx->events));
3141	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
3142	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
3143	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
3144	REQUIRE(fctx->pending == 0);
3145	REQUIRE(fctx->references == 0);
3146	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
3147	REQUIRE(!ISC_LINK_LINKED(fctx, link));
3148
3149	FCTXTRACE("destroy");
3150
3151	/*
3152	 * Free bad.
3153	 */
3154	for (sa = ISC_LIST_HEAD(fctx->bad);
3155	     sa != NULL;
3156	     sa = next_sa) {
3157		next_sa = ISC_LIST_NEXT(sa, link);
3158		ISC_LIST_UNLINK(fctx->bad, sa, link);
3159		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3160	}
3161
3162	for (sa = ISC_LIST_HEAD(fctx->edns);
3163	     sa != NULL;
3164	     sa = next_sa) {
3165		next_sa = ISC_LIST_NEXT(sa, link);
3166		ISC_LIST_UNLINK(fctx->edns, sa, link);
3167		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3168	}
3169
3170	for (sa = ISC_LIST_HEAD(fctx->edns512);
3171	     sa != NULL;
3172	     sa = next_sa) {
3173		next_sa = ISC_LIST_NEXT(sa, link);
3174		ISC_LIST_UNLINK(fctx->edns512, sa, link);
3175		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3176	}
3177
3178	for (sa = ISC_LIST_HEAD(fctx->bad_edns);
3179	     sa != NULL;
3180	     sa = next_sa) {
3181		next_sa = ISC_LIST_NEXT(sa, link);
3182		ISC_LIST_UNLINK(fctx->bad_edns, sa, link);
3183		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3184	}
3185
3186	isc_timer_detach(&fctx->timer);
3187	dns_message_destroy(&fctx->rmessage);
3188	dns_message_destroy(&fctx->qmessage);
3189	if (dns_name_countlabels(&fctx->domain) > 0)
3190		dns_name_free(&fctx->domain, fctx->mctx);
3191	if (dns_rdataset_isassociated(&fctx->nameservers))
3192		dns_rdataset_disassociate(&fctx->nameservers);
3193	dns_name_free(&fctx->name, fctx->mctx);
3194	dns_db_detach(&fctx->cache);
3195	dns_adb_detach(&fctx->adb);
3196	isc_mem_free(fctx->mctx, fctx->info);
3197	isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx));
3198}
3199
3200/*
3201 * Fetch event handlers.
3202 */
3203
3204static void
3205fctx_timeout(isc_task_t *task, isc_event_t *event) {
3206	fetchctx_t *fctx = event->ev_arg;
3207	isc_timerevent_t *tevent = (isc_timerevent_t *)event;
3208	resquery_t *query;
3209
3210	REQUIRE(VALID_FCTX(fctx));
3211
3212	UNUSED(task);
3213
3214	FCTXTRACE("timeout");
3215
3216	inc_stats(fctx->res, dns_resstatscounter_querytimeout);
3217
3218	if (event->ev_type == ISC_TIMEREVENT_LIFE) {
3219		fctx->reason = NULL;
3220		fctx_done(fctx, ISC_R_TIMEDOUT, __LINE__);
3221	} else {
3222		isc_result_t result;
3223
3224		fctx->timeouts++;
3225		fctx->timeout = ISC_TRUE;
3226		/*
3227		 * We could cancel the running queries here, or we could let
3228		 * them keep going.  Since we normally use separate sockets for
3229		 * different queries, we adopt the former approach to reduce
3230		 * the number of open sockets: cancel the oldest query if it
3231		 * expired after the query had started (this is usually the
3232		 * case but is not always so, depending on the task schedule
3233		 * timing).
3234		 */
3235		query = ISC_LIST_HEAD(fctx->queries);
3236		if (query != NULL &&
3237		    isc_time_compare(&tevent->due, &query->start) >= 0) {
3238			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
3239		}
3240		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
3241		/*
3242		 * Our timer has triggered.  Reestablish the fctx lifetime
3243		 * timer.
3244		 */
3245		result = fctx_starttimer(fctx);
3246		if (result != ISC_R_SUCCESS)
3247			fctx_done(fctx, result, __LINE__);
3248		else
3249			/*
3250			 * Keep trying.
3251			 */
3252			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
3253	}
3254
3255	isc_event_free(&event);
3256}
3257
3258static void
3259fctx_shutdown(fetchctx_t *fctx) {
3260	isc_event_t *cevent;
3261
3262	/*
3263	 * Start the shutdown process for fctx, if it isn't already underway.
3264	 */
3265
3266	FCTXTRACE("shutdown");
3267
3268	/*
3269	 * The caller must be holding the appropriate bucket lock.
3270	 */
3271
3272	if (fctx->want_shutdown)
3273		return;
3274
3275	fctx->want_shutdown = ISC_TRUE;
3276
3277	/*
3278	 * Unless we're still initializing (in which case the
3279	 * control event is still outstanding), we need to post
3280	 * the control event to tell the fetch we want it to
3281	 * exit.
3282	 */
3283	if (fctx->state != fetchstate_init) {
3284		cevent = &fctx->control_event;
3285		isc_task_send(fctx->res->buckets[fctx->bucketnum].task,
3286			      &cevent);
3287	}
3288}
3289
3290static void
3291fctx_doshutdown(isc_task_t *task, isc_event_t *event) {
3292	fetchctx_t *fctx = event->ev_arg;
3293	isc_boolean_t bucket_empty = ISC_FALSE;
3294	dns_resolver_t *res;
3295	unsigned int bucketnum;
3296	dns_validator_t *validator;
3297	isc_boolean_t destroy = ISC_FALSE;
3298
3299	REQUIRE(VALID_FCTX(fctx));
3300
3301	UNUSED(task);
3302
3303	res = fctx->res;
3304	bucketnum = fctx->bucketnum;
3305
3306	FCTXTRACE("doshutdown");
3307
3308	/*
3309	 * An fctx that is shutting down is no longer in ADDRWAIT mode.
3310	 */
3311	fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
3312
3313	/*
3314	 * Cancel all pending validators.  Note that this must be done
3315	 * without the bucket lock held, since that could cause deadlock.
3316	 */
3317	validator = ISC_LIST_HEAD(fctx->validators);
3318	while (validator != NULL) {
3319		dns_validator_cancel(validator);
3320		validator = ISC_LIST_NEXT(validator, link);
3321	}
3322
3323	if (fctx->nsfetch != NULL)
3324		dns_resolver_cancelfetch(fctx->nsfetch);
3325
3326	/*
3327	 * Shut down anything that is still running on behalf of this
3328	 * fetch.  To avoid deadlock with the ADB, we must do this
3329	 * before we lock the bucket lock.
3330	 */
3331	fctx_stopeverything(fctx, ISC_FALSE);
3332
3333	LOCK(&res->buckets[bucketnum].lock);
3334
3335	fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
3336
3337	INSIST(fctx->state == fetchstate_active ||
3338	       fctx->state == fetchstate_done);
3339	INSIST(fctx->want_shutdown);
3340
3341	if (fctx->state != fetchstate_done) {
3342		fctx->state = fetchstate_done;
3343		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
3344	}
3345
3346	if (fctx->references == 0 && fctx->pending == 0 &&
3347	    fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) {
3348		bucket_empty = fctx_unlink(fctx);
3349		destroy = ISC_TRUE;
3350	}
3351
3352	UNLOCK(&res->buckets[bucketnum].lock);
3353
3354	if (destroy) {
3355		fctx_destroy(fctx);
3356		if (bucket_empty)
3357			empty_bucket(res);
3358	}
3359}
3360
3361static void
3362fctx_start(isc_task_t *task, isc_event_t *event) {
3363	fetchctx_t *fctx = event->ev_arg;
3364	isc_boolean_t done = ISC_FALSE, bucket_empty = ISC_FALSE;
3365	dns_resolver_t *res;
3366	unsigned int bucketnum;
3367	isc_boolean_t destroy = ISC_FALSE;
3368
3369	REQUIRE(VALID_FCTX(fctx));
3370
3371	UNUSED(task);
3372
3373	res = fctx->res;
3374	bucketnum = fctx->bucketnum;
3375
3376	FCTXTRACE("start");
3377
3378	LOCK(&res->buckets[bucketnum].lock);
3379
3380	INSIST(fctx->state == fetchstate_init);
3381	if (fctx->want_shutdown) {
3382		/*
3383		 * We haven't started this fctx yet, and we've been requested
3384		 * to shut it down.
3385		 */
3386		fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
3387		fctx->state = fetchstate_done;
3388		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
3389		/*
3390		 * Since we haven't started, we INSIST that we have no
3391		 * pending ADB finds and no pending validations.
3392		 */
3393		INSIST(fctx->pending == 0);
3394		INSIST(fctx->nqueries == 0);
3395		INSIST(ISC_LIST_EMPTY(fctx->validators));
3396		if (fctx->references == 0) {
3397			/*
3398			 * It's now safe to destroy this fctx.
3399			 */
3400			bucket_empty = fctx_unlink(fctx);
3401			destroy = ISC_TRUE;
3402		}
3403		done = ISC_TRUE;
3404	} else {
3405		/*
3406		 * Normal fctx startup.
3407		 */
3408		fctx->state = fetchstate_active;
3409		fctx->totalqueries = 0;
3410		/*
3411		 * Reset the control event for later use in shutting down
3412		 * the fctx.
3413		 */
3414		ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
3415			       DNS_EVENT_FETCHCONTROL, fctx_doshutdown, fctx,
3416			       NULL, NULL, NULL);
3417	}
3418
3419	UNLOCK(&res->buckets[bucketnum].lock);
3420
3421	if (!done) {
3422		isc_result_t result;
3423
3424		INSIST(!destroy);
3425
3426		/*
3427		 * All is well.  Start working on the fetch.
3428		 */
3429		result = fctx_starttimer(fctx);
3430		if (result != ISC_R_SUCCESS)
3431			fctx_done(fctx, result, __LINE__);
3432		else
3433			fctx_try(fctx, ISC_FALSE, ISC_FALSE);
3434	} else if (destroy) {
3435			fctx_destroy(fctx);
3436		if (bucket_empty)
3437			empty_bucket(res);
3438	}
3439}
3440
3441/*
3442 * Fetch Creation, Joining, and Cancelation.
3443 */
3444
3445static inline isc_result_t
3446fctx_join(fetchctx_t *fctx, isc_task_t *task, isc_sockaddr_t *client,
3447	  dns_messageid_t id, isc_taskaction_t action, void *arg,
3448	  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
3449	  dns_fetch_t *fetch)
3450{
3451	isc_task_t *clone;
3452	dns_fetchevent_t *event;
3453
3454	FCTXTRACE("join");
3455
3456	/*
3457	 * We store the task we're going to send this event to in the
3458	 * sender field.  We'll make the fetch the sender when we actually
3459	 * send the event.
3460	 */
3461	clone = NULL;
3462	isc_task_attach(task, &clone);
3463	event = (dns_fetchevent_t *)
3464		isc_event_allocate(fctx->res->mctx, clone, DNS_EVENT_FETCHDONE,
3465				   action, arg, sizeof(*event));
3466	if (event == NULL) {
3467		isc_task_detach(&clone);
3468		return (ISC_R_NOMEMORY);
3469	}
3470	event->result = DNS_R_SERVFAIL;
3471	event->qtype = fctx->type;
3472	event->db = NULL;
3473	event->node = NULL;
3474	event->rdataset = rdataset;
3475	event->sigrdataset = sigrdataset;
3476	event->fetch = fetch;
3477	event->client = client;
3478	event->id = id;
3479	event->qtotal = 0;
3480	dns_fixedname_init(&event->foundname);
3481
3482	/*
3483	 * Make sure that we can store the sigrdataset in the
3484	 * first event if it is needed by any of the events.
3485	 */
3486	if (event->sigrdataset != NULL)
3487		ISC_LIST_PREPEND(fctx->events, event, ev_link);
3488	else
3489		ISC_LIST_APPEND(fctx->events, event, ev_link);
3490	fctx->references++;
3491	fctx->client = client;
3492
3493	fetch->magic = DNS_FETCH_MAGIC;
3494	fetch->private = fctx;
3495
3496	return (ISC_R_SUCCESS);
3497}
3498
3499static inline void
3500log_ns_ttl(fetchctx_t *fctx, const char *where) {
3501	char namebuf[DNS_NAME_FORMATSIZE];
3502	char domainbuf[DNS_NAME_FORMATSIZE];
3503
3504	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3505	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
3506	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3507		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
3508		      "log_ns_ttl: fctx %p: %s: %s (in '%s'?): %u %u",
3509		      fctx, where, namebuf, domainbuf,
3510		      fctx->ns_ttl_ok, fctx->ns_ttl);
3511}
3512
3513static isc_result_t
3514fctx_create(dns_resolver_t *res, dns_name_t *name, dns_rdatatype_t type,
3515	    dns_name_t *domain, dns_rdataset_t *nameservers,
3516	    unsigned int options, unsigned int bucketnum, unsigned int depth,
3517	    fetchctx_t **fctxp)
3518{
3519	fetchctx_t *fctx;
3520	isc_result_t result;
3521	isc_result_t iresult;
3522	isc_interval_t interval;
3523	dns_fixedname_t fixed;
3524	unsigned int findoptions = 0;
3525	char buf[DNS_NAME_FORMATSIZE + DNS_RDATATYPE_FORMATSIZE];
3526	char typebuf[DNS_RDATATYPE_FORMATSIZE];
3527	dns_name_t suffix;
3528	isc_mem_t *mctx;
3529
3530	/*
3531	 * Caller must be holding the lock for bucket number 'bucketnum'.
3532	 */
3533	REQUIRE(fctxp != NULL && *fctxp == NULL);
3534
3535	mctx = res->buckets[bucketnum].mctx;
3536	fctx = isc_mem_get(mctx, sizeof(*fctx));
3537	if (fctx == NULL)
3538		return (ISC_R_NOMEMORY);
3539	dns_name_format(name, buf, sizeof(buf));
3540	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
3541	strcat(buf, "/");       /* checked */
3542	strcat(buf, typebuf);   /* checked */
3543	fctx->info = isc_mem_strdup(mctx, buf);
3544	if (fctx->info == NULL) {
3545		result = ISC_R_NOMEMORY;
3546		goto cleanup_fetch;
3547	}
3548	FCTXTRACE("create");
3549	dns_name_init(&fctx->name, NULL);
3550	result = dns_name_dup(name, mctx, &fctx->name);
3551	if (result != ISC_R_SUCCESS)
3552		goto cleanup_info;
3553	dns_name_init(&fctx->domain, NULL);
3554	dns_rdataset_init(&fctx->nameservers);
3555
3556	fctx->type = type;
3557	fctx->options = options;
3558	/*
3559	 * Note!  We do not attach to the task.  We are relying on the
3560	 * resolver to ensure that this task doesn't go away while we are
3561	 * using it.
3562	 */
3563	fctx->res = res;
3564	fctx->references = 0;
3565	fctx->bucketnum = bucketnum;
3566	fctx->state = fetchstate_init;
3567	fctx->want_shutdown = ISC_FALSE;
3568	fctx->cloned = ISC_FALSE;
3569	fctx->depth = depth;
3570	ISC_LIST_INIT(fctx->queries);
3571	ISC_LIST_INIT(fctx->finds);
3572	ISC_LIST_INIT(fctx->altfinds);
3573	ISC_LIST_INIT(fctx->forwaddrs);
3574	ISC_LIST_INIT(fctx->altaddrs);
3575	ISC_LIST_INIT(fctx->forwarders);
3576	fctx->fwdpolicy = dns_fwdpolicy_none;
3577	ISC_LIST_INIT(fctx->bad);
3578	ISC_LIST_INIT(fctx->edns);
3579	ISC_LIST_INIT(fctx->edns512);
3580	ISC_LIST_INIT(fctx->bad_edns);
3581	ISC_LIST_INIT(fctx->validators);
3582	fctx->validator = NULL;
3583	fctx->find = NULL;
3584	fctx->altfind = NULL;
3585	fctx->pending = 0;
3586	fctx->restarts = 0;
3587	fctx->querysent = 0;
3588	fctx->totalqueries = 0;
3589	fctx->referrals = 0;
3590	TIME_NOW(&fctx->start);
3591	fctx->timeouts = 0;
3592	fctx->lamecount = 0;
3593	fctx->adberr = 0;
3594	fctx->neterr = 0;
3595	fctx->badresp = 0;
3596	fctx->findfail = 0;
3597	fctx->valfail = 0;
3598	fctx->result = ISC_R_FAILURE;
3599	fctx->vresult = ISC_R_SUCCESS;
3600	fctx->exitline = -1;	/* sentinel */
3601	fctx->logged = ISC_FALSE;
3602	fctx->attributes = 0;
3603	fctx->spilled = ISC_FALSE;
3604	fctx->nqueries = 0;
3605	fctx->reason = NULL;
3606	fctx->rand_buf = 0;
3607	fctx->rand_bits = 0;
3608	fctx->timeout = ISC_FALSE;
3609	fctx->addrinfo = NULL;
3610	fctx->client = NULL;
3611	fctx->ns_ttl = 0;
3612	fctx->ns_ttl_ok = ISC_FALSE;
3613
3614	dns_name_init(&fctx->nsname, NULL);
3615	fctx->nsfetch = NULL;
3616	dns_rdataset_init(&fctx->nsrrset);
3617
3618	if (domain == NULL) {
3619		dns_forwarders_t *forwarders = NULL;
3620		unsigned int labels;
3621		dns_name_t *fwdname = name;
3622
3623		/*
3624		 * DS records are found in the parent server.
3625		 * Strip label to get the correct forwarder (if any).
3626		 */
3627		if (dns_rdatatype_atparent(fctx->type) &&
3628		    dns_name_countlabels(name) > 1) {
3629			dns_name_init(&suffix, NULL);
3630			labels = dns_name_countlabels(name);
3631			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
3632			fwdname = &suffix;
3633		}
3634		dns_fixedname_init(&fixed);
3635		domain = dns_fixedname_name(&fixed);
3636		result = dns_fwdtable_find2(fctx->res->view->fwdtable, fwdname,
3637					    domain, &forwarders);
3638		if (result == ISC_R_SUCCESS)
3639			fctx->fwdpolicy = forwarders->fwdpolicy;
3640
3641		if (fctx->fwdpolicy != dns_fwdpolicy_only) {
3642			/*
3643			 * The caller didn't supply a query domain and
3644			 * nameservers, and we're not in forward-only mode,
3645			 * so find the best nameservers to use.
3646			 */
3647			if (dns_rdatatype_atparent(fctx->type))
3648				findoptions |= DNS_DBFIND_NOEXACT;
3649			result = dns_view_findzonecut(res->view, fwdname,
3650						      domain, 0, findoptions,
3651						      ISC_TRUE,
3652						      &fctx->nameservers,
3653						      NULL);
3654			if (result != ISC_R_SUCCESS)
3655				goto cleanup_name;
3656
3657			result = dns_name_dup(domain, mctx, &fctx->domain);
3658			if (result != ISC_R_SUCCESS) {
3659				dns_rdataset_disassociate(&fctx->nameservers);
3660				goto cleanup_name;
3661			}
3662			fctx->ns_ttl = fctx->nameservers.ttl;
3663			fctx->ns_ttl_ok = ISC_TRUE;
3664		} else {
3665			/*
3666			 * We're in forward-only mode.  Set the query domain.
3667			 */
3668			result = dns_name_dup(domain, mctx, &fctx->domain);
3669			if (result != ISC_R_SUCCESS)
3670				goto cleanup_name;
3671		}
3672	} else {
3673		result = dns_name_dup(domain, mctx, &fctx->domain);
3674		if (result != ISC_R_SUCCESS)
3675			goto cleanup_name;
3676		dns_rdataset_clone(nameservers, &fctx->nameservers);
3677		fctx->ns_ttl = fctx->nameservers.ttl;
3678		fctx->ns_ttl_ok = ISC_TRUE;
3679	}
3680
3681	log_ns_ttl(fctx, "fctx_create");
3682
3683	INSIST(dns_name_issubdomain(&fctx->name, &fctx->domain));
3684
3685	fctx->qmessage = NULL;
3686	result = dns_message_create(mctx, DNS_MESSAGE_INTENTRENDER,
3687				    &fctx->qmessage);
3688
3689	if (result != ISC_R_SUCCESS)
3690		goto cleanup_domain;
3691
3692	fctx->rmessage = NULL;
3693	result = dns_message_create(mctx, DNS_MESSAGE_INTENTPARSE,
3694				    &fctx->rmessage);
3695
3696	if (result != ISC_R_SUCCESS)
3697		goto cleanup_qmessage;
3698
3699	/*
3700	 * Compute an expiration time for the entire fetch.
3701	 */
3702	isc_interval_set(&interval, res->query_timeout, 0);
3703	iresult = isc_time_nowplusinterval(&fctx->expires, &interval);
3704	if (iresult != ISC_R_SUCCESS) {
3705		UNEXPECTED_ERROR(__FILE__, __LINE__,
3706				 "isc_time_nowplusinterval: %s",
3707				 isc_result_totext(iresult));
3708		result = ISC_R_UNEXPECTED;
3709		goto cleanup_rmessage;
3710	}
3711
3712	/*
3713	 * Default retry interval initialization.  We set the interval now
3714	 * mostly so it won't be uninitialized.  It will be set to the
3715	 * correct value before a query is issued.
3716	 */
3717	isc_interval_set(&fctx->interval, 2, 0);
3718
3719	/*
3720	 * Create an inactive timer.  It will be made active when the fetch
3721	 * is actually started.
3722	 */
3723	fctx->timer = NULL;
3724	iresult = isc_timer_create(res->timermgr, isc_timertype_inactive,
3725				   NULL, NULL,
3726				   res->buckets[bucketnum].task, fctx_timeout,
3727				   fctx, &fctx->timer);
3728	if (iresult != ISC_R_SUCCESS) {
3729		UNEXPECTED_ERROR(__FILE__, __LINE__,
3730				 "isc_timer_create: %s",
3731				 isc_result_totext(iresult));
3732		result = ISC_R_UNEXPECTED;
3733		goto cleanup_rmessage;
3734	}
3735
3736	/*
3737	 * Attach to the view's cache and adb.
3738	 */
3739	fctx->cache = NULL;
3740	dns_db_attach(res->view->cachedb, &fctx->cache);
3741	fctx->adb = NULL;
3742	dns_adb_attach(res->view->adb, &fctx->adb);
3743	fctx->mctx = NULL;
3744	isc_mem_attach(mctx, &fctx->mctx);
3745
3746	ISC_LIST_INIT(fctx->events);
3747	ISC_LINK_INIT(fctx, link);
3748	fctx->magic = FCTX_MAGIC;
3749
3750	ISC_LIST_APPEND(res->buckets[bucketnum].fctxs, fctx, link);
3751
3752	LOCK(&res->nlock);
3753	res->nfctx++;
3754	UNLOCK(&res->nlock);
3755
3756	*fctxp = fctx;
3757
3758	return (ISC_R_SUCCESS);
3759
3760 cleanup_rmessage:
3761	dns_message_destroy(&fctx->rmessage);
3762
3763 cleanup_qmessage:
3764	dns_message_destroy(&fctx->qmessage);
3765
3766 cleanup_domain:
3767	if (dns_name_countlabels(&fctx->domain) > 0)
3768		dns_name_free(&fctx->domain, mctx);
3769	if (dns_rdataset_isassociated(&fctx->nameservers))
3770		dns_rdataset_disassociate(&fctx->nameservers);
3771
3772 cleanup_name:
3773	dns_name_free(&fctx->name, mctx);
3774
3775 cleanup_info:
3776	isc_mem_free(mctx, fctx->info);
3777
3778 cleanup_fetch:
3779	isc_mem_put(mctx, fctx, sizeof(*fctx));
3780
3781	return (result);
3782}
3783
3784/*
3785 * Handle Responses
3786 */
3787static inline isc_boolean_t
3788is_lame(fetchctx_t *fctx) {
3789	dns_message_t *message = fctx->rmessage;
3790	dns_name_t *name;
3791	dns_rdataset_t *rdataset;
3792	isc_result_t result;
3793
3794	if (message->rcode != dns_rcode_noerror &&
3795	    message->rcode != dns_rcode_nxdomain)
3796		return (ISC_FALSE);
3797
3798	if (message->counts[DNS_SECTION_ANSWER] != 0)
3799		return (ISC_FALSE);
3800
3801	if (message->counts[DNS_SECTION_AUTHORITY] == 0)
3802		return (ISC_FALSE);
3803
3804	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
3805	while (result == ISC_R_SUCCESS) {
3806		name = NULL;
3807		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
3808		for (rdataset = ISC_LIST_HEAD(name->list);
3809		     rdataset != NULL;
3810		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
3811			dns_namereln_t namereln;
3812			int order;
3813			unsigned int labels;
3814			if (rdataset->type != dns_rdatatype_ns)
3815				continue;
3816			namereln = dns_name_fullcompare(name, &fctx->domain,
3817							&order, &labels);
3818			if (namereln == dns_namereln_equal &&
3819			    (message->flags & DNS_MESSAGEFLAG_AA) != 0)
3820				return (ISC_FALSE);
3821			if (namereln == dns_namereln_subdomain)
3822				return (ISC_FALSE);
3823			return (ISC_TRUE);
3824		}
3825		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
3826	}
3827
3828	return (ISC_FALSE);
3829}
3830
3831static inline void
3832log_lame(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo) {
3833	char namebuf[DNS_NAME_FORMATSIZE];
3834	char domainbuf[DNS_NAME_FORMATSIZE];
3835	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3836
3837	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3838	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
3839	isc_sockaddr_format(&addrinfo->sockaddr, addrbuf, sizeof(addrbuf));
3840	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
3841		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
3842		      "lame server resolving '%s' (in '%s'?): %s",
3843		      namebuf, domainbuf, addrbuf);
3844}
3845
3846static inline void
3847log_formerr(fetchctx_t *fctx, const char *format, ...) {
3848	char nsbuf[ISC_SOCKADDR_FORMATSIZE];
3849	char clbuf[ISC_SOCKADDR_FORMATSIZE];
3850	const char *clmsg = "";
3851	char msgbuf[2048];
3852	va_list args;
3853
3854	va_start(args, format);
3855	vsnprintf(msgbuf, sizeof(msgbuf), format, args);
3856	va_end(args);
3857
3858	isc_sockaddr_format(&fctx->addrinfo->sockaddr, nsbuf, sizeof(nsbuf));
3859
3860	if (fctx->client != NULL) {
3861		clmsg = " for client ";
3862		isc_sockaddr_format(fctx->client, clbuf, sizeof(clbuf));
3863	} else {
3864		clbuf[0] = '\0';
3865	}
3866
3867	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3868		      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
3869		      "DNS format error from %s resolving %s%s%s: %s",
3870		      nsbuf, fctx->info, clmsg, clbuf, msgbuf);
3871}
3872
3873static inline isc_result_t
3874same_question(fetchctx_t *fctx) {
3875	isc_result_t result;
3876	dns_message_t *message = fctx->rmessage;
3877	dns_name_t *name;
3878	dns_rdataset_t *rdataset;
3879
3880	/*
3881	 * Caller must be holding the fctx lock.
3882	 */
3883
3884	/*
3885	 * XXXRTH  Currently we support only one question.
3886	 */
3887	if (message->counts[DNS_SECTION_QUESTION] != 1) {
3888		log_formerr(fctx, "too many questions");
3889		return (DNS_R_FORMERR);
3890	}
3891
3892	result = dns_message_firstname(message, DNS_SECTION_QUESTION);
3893	if (result != ISC_R_SUCCESS)
3894		return (result);
3895	name = NULL;
3896	dns_message_currentname(message, DNS_SECTION_QUESTION, &name);
3897	rdataset = ISC_LIST_HEAD(name->list);
3898	INSIST(rdataset != NULL);
3899	INSIST(ISC_LIST_NEXT(rdataset, link) == NULL);
3900
3901	if (fctx->type != rdataset->type ||
3902	    fctx->res->rdclass != rdataset->rdclass ||
3903	    !dns_name_equal(&fctx->name, name)) {
3904		char namebuf[DNS_NAME_FORMATSIZE];
3905		char class[DNS_RDATACLASS_FORMATSIZE];
3906		char type[DNS_RDATATYPE_FORMATSIZE];
3907
3908		dns_name_format(name, namebuf, sizeof(namebuf));
3909		dns_rdataclass_format(rdataset->rdclass, class, sizeof(class));
3910		dns_rdatatype_format(rdataset->type, type, sizeof(type));
3911		log_formerr(fctx, "question section mismatch: got %s/%s/%s",
3912			    namebuf, class, type);
3913		return (DNS_R_FORMERR);
3914	}
3915
3916	return (ISC_R_SUCCESS);
3917}
3918
3919static void
3920clone_results(fetchctx_t *fctx) {
3921	dns_fetchevent_t *event, *hevent;
3922	isc_result_t result;
3923	dns_name_t *name, *hname;
3924
3925	FCTXTRACE("clone_results");
3926
3927	/*
3928	 * Set up any other events to have the same data as the first
3929	 * event.
3930	 *
3931	 * Caller must be holding the appropriate lock.
3932	 */
3933
3934	fctx->cloned = ISC_TRUE;
3935	hevent = ISC_LIST_HEAD(fctx->events);
3936	if (hevent == NULL)
3937		return;
3938	hname = dns_fixedname_name(&hevent->foundname);
3939	for (event = ISC_LIST_NEXT(hevent, ev_link);
3940	     event != NULL;
3941	     event = ISC_LIST_NEXT(event, ev_link)) {
3942		name = dns_fixedname_name(&event->foundname);
3943		result = dns_name_copy(hname, name, NULL);
3944		if (result != ISC_R_SUCCESS)
3945			event->result = result;
3946		else
3947			event->result = hevent->result;
3948		dns_db_attach(hevent->db, &event->db);
3949		dns_db_attachnode(hevent->db, hevent->node, &event->node);
3950		INSIST(hevent->rdataset != NULL);
3951		INSIST(event->rdataset != NULL);
3952		if (dns_rdataset_isassociated(hevent->rdataset))
3953			dns_rdataset_clone(hevent->rdataset, event->rdataset);
3954		INSIST(! (hevent->sigrdataset == NULL &&
3955			  event->sigrdataset != NULL));
3956		if (hevent->sigrdataset != NULL &&
3957		    dns_rdataset_isassociated(hevent->sigrdataset) &&
3958		    event->sigrdataset != NULL)
3959			dns_rdataset_clone(hevent->sigrdataset,
3960					   event->sigrdataset);
3961	}
3962}
3963
3964#define CACHE(r)        (((r)->attributes & DNS_RDATASETATTR_CACHE) != 0)
3965#define ANSWER(r)       (((r)->attributes & DNS_RDATASETATTR_ANSWER) != 0)
3966#define ANSWERSIG(r)    (((r)->attributes & DNS_RDATASETATTR_ANSWERSIG) != 0)
3967#define EXTERNAL(r)     (((r)->attributes & DNS_RDATASETATTR_EXTERNAL) != 0)
3968#define CHAINING(r)     (((r)->attributes & DNS_RDATASETATTR_CHAINING) != 0)
3969#define CHASE(r)        (((r)->attributes & DNS_RDATASETATTR_CHASE) != 0)
3970#define CHECKNAMES(r)   (((r)->attributes & DNS_RDATASETATTR_CHECKNAMES) != 0)
3971
3972
3973/*
3974 * Destroy '*fctx' if it is ready to be destroyed (i.e., if it has
3975 * no references and is no longer waiting for any events).
3976 *
3977 * Requires:
3978 *      '*fctx' is shutting down.
3979 *
3980 * Returns:
3981 *	true if the resolver is exiting and this is the last fctx in the bucket.
3982 */
3983static isc_boolean_t
3984maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked) {
3985	unsigned int bucketnum;
3986	isc_boolean_t bucket_empty = ISC_FALSE;
3987	dns_resolver_t *res = fctx->res;
3988	dns_validator_t *validator, *next_validator;
3989	isc_boolean_t destroy = ISC_FALSE;
3990
3991	REQUIRE(SHUTTINGDOWN(fctx));
3992
3993	bucketnum = fctx->bucketnum;
3994	if (!locked)
3995		LOCK(&res->buckets[bucketnum].lock);
3996	if (fctx->pending != 0 || fctx->nqueries != 0)
3997		goto unlock;
3998
3999	for (validator = ISC_LIST_HEAD(fctx->validators);
4000	     validator != NULL; validator = next_validator) {
4001		next_validator = ISC_LIST_NEXT(validator, link);
4002		dns_validator_cancel(validator);
4003	}
4004
4005	if (fctx->references == 0 && ISC_LIST_EMPTY(fctx->validators)) {
4006		bucket_empty = fctx_unlink(fctx);
4007		destroy = ISC_TRUE;
4008	}
4009 unlock:
4010	if (!locked)
4011		UNLOCK(&res->buckets[bucketnum].lock);
4012	if (destroy)
4013		fctx_destroy(fctx);
4014	return (bucket_empty);
4015}
4016
4017/*
4018 * The validator has finished.
4019 */
4020static void
4021validated(isc_task_t *task, isc_event_t *event) {
4022	dns_adbaddrinfo_t *addrinfo;
4023	dns_dbnode_t *node = NULL;
4024	dns_dbnode_t *nsnode = NULL;
4025	dns_fetchevent_t *hevent;
4026	dns_name_t *name;
4027	dns_rdataset_t *ardataset = NULL;
4028	dns_rdataset_t *asigrdataset = NULL;
4029	dns_rdataset_t *rdataset;
4030	dns_rdataset_t *sigrdataset;
4031	dns_resolver_t *res;
4032	dns_valarg_t *valarg;
4033	dns_validatorevent_t *vevent;
4034	fetchctx_t *fctx;
4035	isc_boolean_t chaining;
4036	isc_boolean_t negative;
4037	isc_boolean_t sentresponse;
4038	isc_result_t eresult = ISC_R_SUCCESS;
4039	isc_result_t result = ISC_R_SUCCESS;
4040	isc_stdtime_t now;
4041	isc_uint32_t ttl;
4042
4043	UNUSED(task); /* for now */
4044
4045	REQUIRE(event->ev_type == DNS_EVENT_VALIDATORDONE);
4046	valarg = event->ev_arg;
4047	fctx = valarg->fctx;
4048	res = fctx->res;
4049	addrinfo = valarg->addrinfo;
4050	REQUIRE(VALID_FCTX(fctx));
4051	REQUIRE(!ISC_LIST_EMPTY(fctx->validators));
4052
4053	vevent = (dns_validatorevent_t *)event;
4054	fctx->vresult = vevent->result;
4055
4056	FCTXTRACE("received validation completion event");
4057
4058	LOCK(&res->buckets[fctx->bucketnum].lock);
4059
4060	ISC_LIST_UNLINK(fctx->validators, vevent->validator, link);
4061	fctx->validator = NULL;
4062
4063	/*
4064	 * Destroy the validator early so that we can
4065	 * destroy the fctx if necessary.
4066	 */
4067	dns_validator_destroy(&vevent->validator);
4068	isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
4069
4070	negative = ISC_TF(vevent->rdataset == NULL);
4071
4072	sentresponse = ISC_TF((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0);
4073
4074	/*
4075	 * If shutting down, ignore the results.  Check to see if we're
4076	 * done waiting for validator completions and ADB pending events; if
4077	 * so, destroy the fctx.
4078	 */
4079	if (SHUTTINGDOWN(fctx) && !sentresponse) {
4080		isc_uint32_t bucketnum = fctx->bucketnum;
4081		isc_boolean_t bucket_empty;
4082		bucket_empty = maybe_destroy(fctx, ISC_TRUE);
4083		UNLOCK(&res->buckets[bucketnum].lock);
4084		if (bucket_empty)
4085			empty_bucket(res);
4086		goto cleanup_event;
4087	}
4088
4089	isc_stdtime_get(&now);
4090
4091	/*
4092	 * If chaining, we need to make sure that the right result code is
4093	 * returned, and that the rdatasets are bound.
4094	 */
4095	if (vevent->result == ISC_R_SUCCESS &&
4096	    !negative &&
4097	    vevent->rdataset != NULL &&
4098	    CHAINING(vevent->rdataset))
4099	{
4100		if (vevent->rdataset->type == dns_rdatatype_cname)
4101			eresult = DNS_R_CNAME;
4102		else {
4103			INSIST(vevent->rdataset->type == dns_rdatatype_dname);
4104			eresult = DNS_R_DNAME;
4105		}
4106		chaining = ISC_TRUE;
4107	} else
4108		chaining = ISC_FALSE;
4109
4110	/*
4111	 * Either we're not shutting down, or we are shutting down but want
4112	 * to cache the result anyway (if this was a validation started by
4113	 * a query with cd set)
4114	 */
4115
4116	hevent = ISC_LIST_HEAD(fctx->events);
4117	if (hevent != NULL) {
4118		if (!negative && !chaining &&
4119		    (fctx->type == dns_rdatatype_any ||
4120		     fctx->type == dns_rdatatype_rrsig ||
4121		     fctx->type == dns_rdatatype_sig)) {
4122			/*
4123			 * Don't bind rdatasets; the caller
4124			 * will iterate the node.
4125			 */
4126		} else {
4127			ardataset = hevent->rdataset;
4128			asigrdataset = hevent->sigrdataset;
4129		}
4130	}
4131
4132	if (vevent->result != ISC_R_SUCCESS) {
4133		FCTXTRACE("validation failed");
4134		inc_stats(res, dns_resstatscounter_valfail);
4135		fctx->valfail++;
4136		fctx->vresult = vevent->result;
4137		if (fctx->vresult != DNS_R_BROKENCHAIN) {
4138			result = ISC_R_NOTFOUND;
4139			if (vevent->rdataset != NULL)
4140				result = dns_db_findnode(fctx->cache,
4141							 vevent->name,
4142							 ISC_TRUE, &node);
4143			if (result == ISC_R_SUCCESS)
4144				(void)dns_db_deleterdataset(fctx->cache, node,
4145							     NULL,
4146							    vevent->type, 0);
4147			if (result == ISC_R_SUCCESS &&
4148			     vevent->sigrdataset != NULL)
4149				(void)dns_db_deleterdataset(fctx->cache, node,
4150							    NULL,
4151							    dns_rdatatype_rrsig,
4152							    vevent->type);
4153			if (result == ISC_R_SUCCESS)
4154				dns_db_detachnode(fctx->cache, &node);
4155		}
4156		if (fctx->vresult == DNS_R_BROKENCHAIN && !negative) {
4157			/*
4158			 * Cache the data as pending for later validation.
4159			 */
4160			result = ISC_R_NOTFOUND;
4161			if (vevent->rdataset != NULL)
4162				result = dns_db_findnode(fctx->cache,
4163							 vevent->name,
4164							 ISC_TRUE, &node);
4165			if (result == ISC_R_SUCCESS) {
4166				(void)dns_db_addrdataset(fctx->cache, node,
4167							 NULL, now,
4168							 vevent->rdataset, 0,
4169							 NULL);
4170			}
4171			if (result == ISC_R_SUCCESS &&
4172			    vevent->sigrdataset != NULL)
4173				(void)dns_db_addrdataset(fctx->cache, node,
4174							 NULL, now,
4175							 vevent->sigrdataset,
4176							 0, NULL);
4177			if (result == ISC_R_SUCCESS)
4178				dns_db_detachnode(fctx->cache, &node);
4179		}
4180		result = fctx->vresult;
4181		add_bad(fctx, addrinfo, result, badns_validation);
4182		isc_event_free(&event);
4183		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4184		INSIST(fctx->validator == NULL);
4185		fctx->validator = ISC_LIST_HEAD(fctx->validators);
4186		if (fctx->validator != NULL)
4187			dns_validator_send(fctx->validator);
4188		else if (sentresponse)
4189			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4190		else if (result == DNS_R_BROKENCHAIN) {
4191			isc_result_t tresult;
4192			isc_time_t expire;
4193			isc_interval_t i;
4194
4195			isc_interval_set(&i, DNS_BADCACHE_TTL(fctx), 0);
4196			tresult = isc_time_nowplusinterval(&expire, &i);
4197			if (negative &&
4198			    (fctx->type == dns_rdatatype_dnskey ||
4199			     fctx->type == dns_rdatatype_dlv ||
4200			     fctx->type == dns_rdatatype_ds) &&
4201			     tresult == ISC_R_SUCCESS)
4202				dns_resolver_addbadcache(res, &fctx->name,
4203							 fctx->type, &expire);
4204			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4205		} else
4206			fctx_try(fctx, ISC_TRUE, ISC_TRUE); /* Locks bucket. */
4207		return;
4208	}
4209
4210
4211	if (negative) {
4212		dns_rdatatype_t covers;
4213		FCTXTRACE("nonexistence validation OK");
4214
4215		inc_stats(res, dns_resstatscounter_valnegsuccess);
4216
4217		if (fctx->rmessage->rcode == dns_rcode_nxdomain)
4218			covers = dns_rdatatype_any;
4219		else
4220			covers = fctx->type;
4221
4222		result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE,
4223					 &node);
4224		if (result != ISC_R_SUCCESS)
4225			goto noanswer_response;
4226
4227		/*
4228		 * If we are asking for a SOA record set the cache time
4229		 * to zero to facilitate locating the containing zone of
4230		 * a arbitrary zone.
4231		 */
4232		ttl = res->view->maxncachettl;
4233		if (fctx->type == dns_rdatatype_soa &&
4234		    covers == dns_rdatatype_any && res->zero_no_soa_ttl)
4235			ttl = 0;
4236
4237		result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
4238					   covers, now, ttl, vevent->optout,
4239					   vevent->secure, ardataset, &eresult);
4240		if (result != ISC_R_SUCCESS)
4241			goto noanswer_response;
4242		goto answer_response;
4243	} else
4244		inc_stats(res, dns_resstatscounter_valsuccess);
4245
4246	FCTXTRACE("validation OK");
4247
4248	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL) {
4249		result = dns_rdataset_addnoqname(vevent->rdataset,
4250				   vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF]);
4251		RUNTIME_CHECK(result == ISC_R_SUCCESS);
4252		INSIST(vevent->sigrdataset != NULL);
4253		vevent->sigrdataset->ttl = vevent->rdataset->ttl;
4254		if (vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER] != NULL) {
4255			result = dns_rdataset_addclosest(vevent->rdataset,
4256				 vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER]);
4257			RUNTIME_CHECK(result == ISC_R_SUCCESS);
4258		}
4259	} else if (vevent->rdataset->trust == dns_trust_answer &&
4260		   vevent->rdataset->type != dns_rdatatype_rrsig)
4261	{
4262		isc_result_t tresult;
4263		dns_name_t *noqname = NULL;
4264		tresult = findnoqname(fctx, vevent->name,
4265				      vevent->rdataset->type, &noqname);
4266		if (tresult == ISC_R_SUCCESS && noqname != NULL) {
4267			tresult = dns_rdataset_addnoqname(vevent->rdataset,
4268							  noqname);
4269			RUNTIME_CHECK(tresult == ISC_R_SUCCESS);
4270		}
4271	}
4272
4273	/*
4274	 * The data was already cached as pending data.
4275	 * Re-cache it as secure and bind the cached
4276	 * rdatasets to the first event on the fetch
4277	 * event list.
4278	 */
4279	result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE, &node);
4280	if (result != ISC_R_SUCCESS)
4281		goto noanswer_response;
4282
4283	result = dns_db_addrdataset(fctx->cache, node, NULL, now,
4284				    vevent->rdataset, 0, ardataset);
4285	if (result != ISC_R_SUCCESS &&
4286	    result != DNS_R_UNCHANGED)
4287		goto noanswer_response;
4288	if (ardataset != NULL && NEGATIVE(ardataset)) {
4289		if (NXDOMAIN(ardataset))
4290			eresult = DNS_R_NCACHENXDOMAIN;
4291		else
4292			eresult = DNS_R_NCACHENXRRSET;
4293	} else if (vevent->sigrdataset != NULL) {
4294		result = dns_db_addrdataset(fctx->cache, node, NULL, now,
4295					    vevent->sigrdataset, 0,
4296					    asigrdataset);
4297		if (result != ISC_R_SUCCESS &&
4298		    result != DNS_R_UNCHANGED)
4299			goto noanswer_response;
4300	}
4301
4302	if (sentresponse) {
4303		isc_boolean_t bucket_empty = ISC_FALSE;
4304		/*
4305		 * If we only deferred the destroy because we wanted to cache
4306		 * the data, destroy now.
4307		 */
4308		dns_db_detachnode(fctx->cache, &node);
4309		if (SHUTTINGDOWN(fctx))
4310			bucket_empty = maybe_destroy(fctx, ISC_TRUE);
4311		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4312		if (bucket_empty)
4313			empty_bucket(res);
4314		goto cleanup_event;
4315	}
4316
4317	if (!ISC_LIST_EMPTY(fctx->validators)) {
4318		INSIST(!negative);
4319		INSIST(fctx->type == dns_rdatatype_any ||
4320		       fctx->type == dns_rdatatype_rrsig ||
4321		       fctx->type == dns_rdatatype_sig);
4322		/*
4323		 * Don't send a response yet - we have
4324		 * more rdatasets that still need to
4325		 * be validated.
4326		 */
4327		dns_db_detachnode(fctx->cache, &node);
4328		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4329		dns_validator_send(ISC_LIST_HEAD(fctx->validators));
4330		goto cleanup_event;
4331	}
4332
4333 answer_response:
4334	/*
4335	 * Cache any NS/NSEC records that happened to be validated.
4336	 */
4337	result = dns_message_firstname(fctx->rmessage, DNS_SECTION_AUTHORITY);
4338	while (result == ISC_R_SUCCESS) {
4339		name = NULL;
4340		dns_message_currentname(fctx->rmessage, DNS_SECTION_AUTHORITY,
4341					&name);
4342		for (rdataset = ISC_LIST_HEAD(name->list);
4343		     rdataset != NULL;
4344		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
4345			if ((rdataset->type != dns_rdatatype_ns &&
4346			     rdataset->type != dns_rdatatype_nsec) ||
4347			    rdataset->trust != dns_trust_secure)
4348				continue;
4349			for (sigrdataset = ISC_LIST_HEAD(name->list);
4350			     sigrdataset != NULL;
4351			     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4352				if (sigrdataset->type != dns_rdatatype_rrsig ||
4353				    sigrdataset->covers != rdataset->type)
4354					continue;
4355				break;
4356			}
4357			if (sigrdataset == NULL ||
4358			    sigrdataset->trust != dns_trust_secure)
4359				continue;
4360			result = dns_db_findnode(fctx->cache, name, ISC_TRUE,
4361						 &nsnode);
4362			if (result != ISC_R_SUCCESS)
4363				continue;
4364
4365			result = dns_db_addrdataset(fctx->cache, nsnode, NULL,
4366						    now, rdataset, 0, NULL);
4367			if (result == ISC_R_SUCCESS)
4368				result = dns_db_addrdataset(fctx->cache, nsnode,
4369							    NULL, now,
4370							    sigrdataset, 0,
4371							    NULL);
4372			dns_db_detachnode(fctx->cache, &nsnode);
4373			if (result != ISC_R_SUCCESS)
4374				continue;
4375		}
4376		result = dns_message_nextname(fctx->rmessage,
4377					      DNS_SECTION_AUTHORITY);
4378	}
4379
4380	result = ISC_R_SUCCESS;
4381
4382	/*
4383	 * Respond with an answer, positive or negative,
4384	 * as opposed to an error.  'node' must be non-NULL.
4385	 */
4386
4387	fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4388
4389	if (hevent != NULL) {
4390		/*
4391		 * Negative results must be indicated in event->result.
4392		 */
4393		if (dns_rdataset_isassociated(hevent->rdataset) &&
4394		    NEGATIVE(hevent->rdataset)) {
4395			INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
4396			       eresult == DNS_R_NCACHENXRRSET);
4397		}
4398		hevent->result = eresult;
4399		RUNTIME_CHECK(dns_name_copy(vevent->name,
4400			      dns_fixedname_name(&hevent->foundname), NULL)
4401			      == ISC_R_SUCCESS);
4402		dns_db_attach(fctx->cache, &hevent->db);
4403		dns_db_transfernode(fctx->cache, &node, &hevent->node);
4404		clone_results(fctx);
4405	}
4406
4407 noanswer_response:
4408	if (node != NULL)
4409		dns_db_detachnode(fctx->cache, &node);
4410
4411	UNLOCK(&res->buckets[fctx->bucketnum].lock);
4412	fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4413
4414 cleanup_event:
4415	INSIST(node == NULL);
4416	isc_event_free(&event);
4417}
4418
4419static void
4420fctx_log(void *arg, int level, const char *fmt, ...) {
4421	char msgbuf[2048];
4422	va_list args;
4423	fetchctx_t *fctx = arg;
4424
4425	va_start(args, fmt);
4426	vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
4427	va_end(args);
4428
4429	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4430		      DNS_LOGMODULE_RESOLVER, level,
4431		      "fctx %p(%s): %s", fctx, fctx->info, msgbuf);
4432}
4433
4434static inline isc_result_t
4435findnoqname(fetchctx_t *fctx, dns_name_t *name, dns_rdatatype_t type,
4436	    dns_name_t **noqnamep)
4437{
4438	dns_rdataset_t *nrdataset, *next, *sigrdataset;
4439	dns_rdata_rrsig_t rrsig;
4440	isc_result_t result;
4441	unsigned int labels;
4442	dns_section_t section;
4443	dns_name_t *zonename;
4444	dns_fixedname_t fzonename;
4445	dns_name_t *closest;
4446	dns_fixedname_t fclosest;
4447	dns_name_t *nearest;
4448	dns_fixedname_t fnearest;
4449	dns_rdatatype_t found = dns_rdatatype_none;
4450	dns_name_t *noqname = NULL;
4451
4452	FCTXTRACE("findnoqname");
4453
4454	REQUIRE(noqnamep != NULL && *noqnamep == NULL);
4455
4456	/*
4457	 * Find the SIG for this rdataset, if we have it.
4458	 */
4459	for (sigrdataset = ISC_LIST_HEAD(name->list);
4460	     sigrdataset != NULL;
4461	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4462		if (sigrdataset->type == dns_rdatatype_rrsig &&
4463		    sigrdataset->covers == type)
4464			break;
4465	}
4466
4467	if (sigrdataset == NULL)
4468		return (ISC_R_NOTFOUND);
4469
4470	labels = dns_name_countlabels(name);
4471
4472	for (result = dns_rdataset_first(sigrdataset);
4473	     result == ISC_R_SUCCESS;
4474	     result = dns_rdataset_next(sigrdataset)) {
4475		dns_rdata_t rdata = DNS_RDATA_INIT;
4476		dns_rdataset_current(sigrdataset, &rdata);
4477		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
4478		RUNTIME_CHECK(result == ISC_R_SUCCESS);
4479		/* Wildcard has rrsig.labels < labels - 1. */
4480		if (rrsig.labels + 1U >= labels)
4481			continue;
4482		break;
4483	}
4484
4485	if (result == ISC_R_NOMORE)
4486		return (ISC_R_NOTFOUND);
4487	if (result != ISC_R_SUCCESS)
4488		return (result);
4489
4490	dns_fixedname_init(&fzonename);
4491	zonename = dns_fixedname_name(&fzonename);
4492	dns_fixedname_init(&fclosest);
4493	closest = dns_fixedname_name(&fclosest);
4494	dns_fixedname_init(&fnearest);
4495	nearest = dns_fixedname_name(&fnearest);
4496
4497#define NXND(x) ((x) == ISC_R_SUCCESS)
4498
4499	section = DNS_SECTION_AUTHORITY;
4500	for (result = dns_message_firstname(fctx->rmessage, section);
4501	     result == ISC_R_SUCCESS;
4502	     result = dns_message_nextname(fctx->rmessage, section)) {
4503		dns_name_t *nsec = NULL;
4504		dns_message_currentname(fctx->rmessage, section, &nsec);
4505		for (nrdataset = ISC_LIST_HEAD(nsec->list);
4506		      nrdataset != NULL; nrdataset = next) {
4507			isc_boolean_t data = ISC_FALSE, exists = ISC_FALSE;
4508			isc_boolean_t optout = ISC_FALSE, unknown = ISC_FALSE;
4509			isc_boolean_t setclosest = ISC_FALSE;
4510			isc_boolean_t setnearest = ISC_FALSE;
4511
4512			next = ISC_LIST_NEXT(nrdataset, link);
4513			if (nrdataset->type != dns_rdatatype_nsec &&
4514			    nrdataset->type != dns_rdatatype_nsec3)
4515				continue;
4516
4517			if (nrdataset->type == dns_rdatatype_nsec &&
4518			    NXND(dns_nsec_noexistnodata(type, name, nsec,
4519							nrdataset, &exists,
4520							&data, NULL, fctx_log,
4521							fctx)))
4522			{
4523				if (!exists) {
4524					noqname = nsec;
4525					found = dns_rdatatype_nsec;
4526				}
4527			}
4528
4529			if (nrdataset->type == dns_rdatatype_nsec3 &&
4530			    NXND(dns_nsec3_noexistnodata(type, name, nsec,
4531							 nrdataset, zonename,
4532							 &exists, &data,
4533							 &optout, &unknown,
4534							 &setclosest,
4535							 &setnearest,
4536							 closest, nearest,
4537							 fctx_log, fctx)))
4538			{
4539				if (!exists && setnearest) {
4540					noqname = nsec;
4541					found = dns_rdatatype_nsec3;
4542				}
4543			}
4544		}
4545	}
4546	if (result == ISC_R_NOMORE)
4547		result = ISC_R_SUCCESS;
4548	if (noqname != NULL) {
4549		for (sigrdataset = ISC_LIST_HEAD(noqname->list);
4550		     sigrdataset != NULL;
4551		     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4552			if (sigrdataset->type == dns_rdatatype_rrsig &&
4553			    sigrdataset->covers == found)
4554				break;
4555		}
4556		if (sigrdataset != NULL)
4557			*noqnamep = noqname;
4558	}
4559	return (result);
4560}
4561
4562static inline isc_result_t
4563cache_name(fetchctx_t *fctx, dns_name_t *name, dns_adbaddrinfo_t *addrinfo,
4564	   isc_stdtime_t now)
4565{
4566	dns_rdataset_t *rdataset, *sigrdataset;
4567	dns_rdataset_t *addedrdataset, *ardataset, *asigrdataset;
4568	dns_rdataset_t *valrdataset = NULL, *valsigrdataset = NULL;
4569	dns_dbnode_t *node, **anodep;
4570	dns_db_t **adbp;
4571	dns_name_t *aname;
4572	dns_resolver_t *res;
4573	isc_boolean_t need_validation, secure_domain, have_answer;
4574	isc_result_t result, eresult;
4575	dns_fetchevent_t *event;
4576	unsigned int options;
4577	isc_task_t *task;
4578	isc_boolean_t fail;
4579	unsigned int valoptions = 0;
4580
4581	/*
4582	 * The appropriate bucket lock must be held.
4583	 */
4584
4585	res = fctx->res;
4586	need_validation = ISC_FALSE;
4587	POST(need_validation);
4588	secure_domain = ISC_FALSE;
4589	have_answer = ISC_FALSE;
4590	eresult = ISC_R_SUCCESS;
4591	task = res->buckets[fctx->bucketnum].task;
4592
4593	/*
4594	 * Is DNSSEC validation required for this name?
4595	 */
4596	if (res->view->enablevalidation) {
4597		result = dns_view_issecuredomain(res->view, name,
4598						 &secure_domain);
4599		if (result != ISC_R_SUCCESS)
4600			return (result);
4601
4602		if (!secure_domain && res->view->dlv != NULL) {
4603			valoptions = DNS_VALIDATOR_DLV;
4604			secure_domain = ISC_TRUE;
4605		}
4606	}
4607
4608	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
4609		need_validation = ISC_FALSE;
4610	else
4611		need_validation = secure_domain;
4612
4613	adbp = NULL;
4614	aname = NULL;
4615	anodep = NULL;
4616	ardataset = NULL;
4617	asigrdataset = NULL;
4618	event = NULL;
4619	if ((name->attributes & DNS_NAMEATTR_ANSWER) != 0 &&
4620	    !need_validation) {
4621		have_answer = ISC_TRUE;
4622		event = ISC_LIST_HEAD(fctx->events);
4623		if (event != NULL) {
4624			adbp = &event->db;
4625			aname = dns_fixedname_name(&event->foundname);
4626			result = dns_name_copy(name, aname, NULL);
4627			if (result != ISC_R_SUCCESS)
4628				return (result);
4629			anodep = &event->node;
4630			/*
4631			 * If this is an ANY, SIG or RRSIG query, we're not
4632			 * going to return any rdatasets, unless we encountered
4633			 * a CNAME or DNAME as "the answer".  In this case,
4634			 * we're going to return DNS_R_CNAME or DNS_R_DNAME
4635			 * and we must set up the rdatasets.
4636			 */
4637			if ((fctx->type != dns_rdatatype_any &&
4638			     fctx->type != dns_rdatatype_rrsig &&
4639			     fctx->type != dns_rdatatype_sig) ||
4640			    (name->attributes & DNS_NAMEATTR_CHAINING) != 0) {
4641				ardataset = event->rdataset;
4642				asigrdataset = event->sigrdataset;
4643			}
4644		}
4645	}
4646
4647	/*
4648	 * Find or create the cache node.
4649	 */
4650	node = NULL;
4651	result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
4652	if (result != ISC_R_SUCCESS)
4653		return (result);
4654
4655	/*
4656	 * Cache or validate each cacheable rdataset.
4657	 */
4658	fail = ISC_TF((fctx->res->options & DNS_RESOLVER_CHECKNAMESFAIL) != 0);
4659	for (rdataset = ISC_LIST_HEAD(name->list);
4660	     rdataset != NULL;
4661	     rdataset = ISC_LIST_NEXT(rdataset, link)) {
4662		if (!CACHE(rdataset))
4663			continue;
4664		if (CHECKNAMES(rdataset)) {
4665			char namebuf[DNS_NAME_FORMATSIZE];
4666			char typebuf[DNS_RDATATYPE_FORMATSIZE];
4667			char classbuf[DNS_RDATATYPE_FORMATSIZE];
4668
4669			dns_name_format(name, namebuf, sizeof(namebuf));
4670			dns_rdatatype_format(rdataset->type, typebuf,
4671					     sizeof(typebuf));
4672			dns_rdataclass_format(rdataset->rdclass, classbuf,
4673					      sizeof(classbuf));
4674			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4675				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
4676				      "check-names %s %s/%s/%s",
4677				      fail ? "failure" : "warning",
4678				      namebuf, typebuf, classbuf);
4679			if (fail) {
4680				if (ANSWER(rdataset)) {
4681					dns_db_detachnode(fctx->cache, &node);
4682					return (DNS_R_BADNAME);
4683				}
4684				continue;
4685			}
4686		}
4687
4688		/*
4689		 * Enforce the configure maximum cache TTL.
4690		 */
4691		if (rdataset->ttl > res->view->maxcachettl)
4692			rdataset->ttl = res->view->maxcachettl;
4693
4694		/*
4695		 * Find the SIG for this rdataset, if we have it.
4696		 */
4697		for (sigrdataset = ISC_LIST_HEAD(name->list);
4698		     sigrdataset != NULL;
4699		     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4700			if (sigrdataset->type == dns_rdatatype_rrsig &&
4701			    sigrdataset->covers == rdataset->type)
4702				break;
4703		}
4704
4705		/*
4706		 * If this RRset is in a secure domain, is in bailiwick,
4707		 * and is not glue, attempt DNSSEC validation.	(We do not
4708		 * attempt to validate glue or out-of-bailiwick data--even
4709		 * though there might be some performance benefit to doing
4710		 * so--because it makes it simpler and safer to ensure that
4711		 * records from a secure domain are only cached if validated
4712		 * within the context of a query to the domain that owns
4713		 * them.)
4714		 */
4715		if (secure_domain && rdataset->trust != dns_trust_glue &&
4716		    !EXTERNAL(rdataset)) {
4717			dns_trust_t trust;
4718
4719			/*
4720			 * RRSIGs are validated as part of validating the
4721			 * type they cover.
4722			 */
4723			if (rdataset->type == dns_rdatatype_rrsig)
4724				continue;
4725
4726			if (sigrdataset == NULL) {
4727				if (!ANSWER(rdataset) && need_validation) {
4728					/*
4729					 * Ignore non-answer rdatasets that
4730					 * are missing signatures.
4731					 */
4732					continue;
4733				}
4734			}
4735
4736			/*
4737			 * Normalize the rdataset and sigrdataset TTLs.
4738			 */
4739			if (sigrdataset != NULL) {
4740				rdataset->ttl = ISC_MIN(rdataset->ttl,
4741							sigrdataset->ttl);
4742				sigrdataset->ttl = rdataset->ttl;
4743			}
4744
4745			/*
4746			 * Cache this rdataset/sigrdataset pair as
4747			 * pending data.  Track whether it was additional
4748			 * or not.
4749			 */
4750			if (rdataset->trust == dns_trust_additional)
4751				trust = dns_trust_pending_additional;
4752			else
4753				trust = dns_trust_pending_answer;
4754
4755			rdataset->trust = trust;
4756			if (sigrdataset != NULL)
4757				sigrdataset->trust = trust;
4758			if (!need_validation || !ANSWER(rdataset)) {
4759				if (ANSWER(rdataset) &&
4760				   rdataset->type != dns_rdatatype_rrsig) {
4761					isc_result_t tresult;
4762					dns_name_t *noqname = NULL;
4763					tresult = findnoqname(fctx, name,
4764							      rdataset->type,
4765							      &noqname);
4766					if (tresult == ISC_R_SUCCESS &&
4767					    noqname != NULL) {
4768						tresult =
4769						     dns_rdataset_addnoqname(
4770							    rdataset, noqname);
4771						RUNTIME_CHECK(tresult ==
4772							      ISC_R_SUCCESS);
4773					}
4774				}
4775				addedrdataset = ardataset;
4776				result = dns_db_addrdataset(fctx->cache, node,
4777							    NULL, now, rdataset,
4778							    0, addedrdataset);
4779				if (result == DNS_R_UNCHANGED) {
4780					result = ISC_R_SUCCESS;
4781					if (!need_validation &&
4782					    ardataset != NULL &&
4783					    NEGATIVE(ardataset)) {
4784						/*
4785						 * The answer in the cache is
4786						 * better than the answer we
4787						 * found, and is a negative
4788						 * cache entry, so we must set
4789						 * eresult appropriately.
4790						 */
4791						if (NXDOMAIN(ardataset))
4792							eresult =
4793							   DNS_R_NCACHENXDOMAIN;
4794						else
4795							eresult =
4796							   DNS_R_NCACHENXRRSET;
4797						/*
4798						 * We have a negative response
4799						 * from the cache so don't
4800						 * attempt to add the RRSIG
4801						 * rrset.
4802						 */
4803						continue;
4804					}
4805				}
4806				if (result != ISC_R_SUCCESS)
4807					break;
4808				if (sigrdataset != NULL) {
4809					addedrdataset = asigrdataset;
4810					result = dns_db_addrdataset(fctx->cache,
4811								node, NULL, now,
4812								sigrdataset, 0,
4813								addedrdataset);
4814					if (result == DNS_R_UNCHANGED)
4815						result = ISC_R_SUCCESS;
4816					if (result != ISC_R_SUCCESS)
4817						break;
4818				} else if (!ANSWER(rdataset))
4819					continue;
4820			}
4821
4822			if (ANSWER(rdataset) && need_validation) {
4823				if (fctx->type != dns_rdatatype_any &&
4824				    fctx->type != dns_rdatatype_rrsig &&
4825				    fctx->type != dns_rdatatype_sig) {
4826					/*
4827					 * This is The Answer.  We will
4828					 * validate it, but first we cache
4829					 * the rest of the response - it may
4830					 * contain useful keys.
4831					 */
4832					INSIST(valrdataset == NULL &&
4833					       valsigrdataset == NULL);
4834					valrdataset = rdataset;
4835					valsigrdataset = sigrdataset;
4836				} else {
4837					/*
4838					 * This is one of (potentially)
4839					 * multiple answers to an ANY
4840					 * or SIG query.  To keep things
4841					 * simple, we just start the
4842					 * validator right away rather
4843					 * than caching first and
4844					 * having to remember which
4845					 * rdatasets needed validation.
4846					 */
4847					result = valcreate(fctx, addrinfo,
4848							   name, rdataset->type,
4849							   rdataset,
4850							   sigrdataset,
4851							   valoptions, task);
4852					/*
4853					 * Defer any further validations.
4854					 * This prevents multiple validators
4855					 * from manipulating fctx->rmessage
4856					 * simultaneously.
4857					 */
4858					valoptions |= DNS_VALIDATOR_DEFER;
4859				}
4860			} else if (CHAINING(rdataset)) {
4861				if (rdataset->type == dns_rdatatype_cname)
4862					eresult = DNS_R_CNAME;
4863				else {
4864					INSIST(rdataset->type ==
4865					       dns_rdatatype_dname);
4866					eresult = DNS_R_DNAME;
4867				}
4868			}
4869		} else if (!EXTERNAL(rdataset)) {
4870			/*
4871			 * It's OK to cache this rdataset now.
4872			 */
4873			if (ANSWER(rdataset))
4874				addedrdataset = ardataset;
4875			else if (ANSWERSIG(rdataset))
4876				addedrdataset = asigrdataset;
4877			else
4878				addedrdataset = NULL;
4879			if (CHAINING(rdataset)) {
4880				if (rdataset->type == dns_rdatatype_cname)
4881					eresult = DNS_R_CNAME;
4882				else {
4883					INSIST(rdataset->type ==
4884					       dns_rdatatype_dname);
4885					eresult = DNS_R_DNAME;
4886				}
4887			}
4888			if (rdataset->trust == dns_trust_glue &&
4889			    (rdataset->type == dns_rdatatype_ns ||
4890			     (rdataset->type == dns_rdatatype_rrsig &&
4891			      rdataset->covers == dns_rdatatype_ns))) {
4892				/*
4893				 * If the trust level is 'dns_trust_glue'
4894				 * then we are adding data from a referral
4895				 * we got while executing the search algorithm.
4896				 * New referral data always takes precedence
4897				 * over the existing cache contents.
4898				 */
4899				options = DNS_DBADD_FORCE;
4900			} else
4901				options = 0;
4902
4903			if (ANSWER(rdataset) &&
4904			   rdataset->type != dns_rdatatype_rrsig) {
4905				isc_result_t tresult;
4906				dns_name_t *noqname = NULL;
4907				tresult = findnoqname(fctx, name,
4908						      rdataset->type, &noqname);
4909				if (tresult == ISC_R_SUCCESS &&
4910				    noqname != NULL) {
4911					tresult = dns_rdataset_addnoqname(
4912							    rdataset, noqname);
4913					RUNTIME_CHECK(tresult == ISC_R_SUCCESS);
4914				}
4915			}
4916
4917			/*
4918			 * Now we can add the rdataset.
4919			 */
4920			result = dns_db_addrdataset(fctx->cache,
4921						    node, NULL, now,
4922						    rdataset,
4923						    options,
4924						    addedrdataset);
4925
4926			if (result == DNS_R_UNCHANGED) {
4927				if (ANSWER(rdataset) &&
4928				    ardataset != NULL &&
4929				    NEGATIVE(ardataset)) {
4930					/*
4931					 * The answer in the cache is better
4932					 * than the answer we found, and is
4933					 * a negative cache entry, so we
4934					 * must set eresult appropriately.
4935					 */
4936					if (NXDOMAIN(ardataset))
4937						eresult = DNS_R_NCACHENXDOMAIN;
4938					else
4939						eresult = DNS_R_NCACHENXRRSET;
4940				}
4941				result = ISC_R_SUCCESS;
4942			} else if (result != ISC_R_SUCCESS)
4943				break;
4944		}
4945	}
4946
4947	if (valrdataset != NULL)
4948		result = valcreate(fctx, addrinfo, name, fctx->type,
4949				   valrdataset, valsigrdataset, valoptions,
4950				   task);
4951
4952	if (result == ISC_R_SUCCESS && have_answer) {
4953		fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4954		if (event != NULL) {
4955			/*
4956			 * Negative results must be indicated in event->result.
4957			 */
4958			if (dns_rdataset_isassociated(event->rdataset) &&
4959			    NEGATIVE(event->rdataset)) {
4960				INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
4961				       eresult == DNS_R_NCACHENXRRSET);
4962			}
4963			event->result = eresult;
4964			dns_db_attach(fctx->cache, adbp);
4965			dns_db_transfernode(fctx->cache, &node, anodep);
4966			clone_results(fctx);
4967		}
4968	}
4969
4970	if (node != NULL)
4971		dns_db_detachnode(fctx->cache, &node);
4972
4973	return (result);
4974}
4975
4976static inline isc_result_t
4977cache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_stdtime_t now)
4978{
4979	isc_result_t result;
4980	dns_section_t section;
4981	dns_name_t *name;
4982
4983	FCTXTRACE("cache_message");
4984
4985	fctx->attributes &= ~FCTX_ATTR_WANTCACHE;
4986
4987	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4988
4989	for (section = DNS_SECTION_ANSWER;
4990	     section <= DNS_SECTION_ADDITIONAL;
4991	     section++) {
4992		result = dns_message_firstname(fctx->rmessage, section);
4993		while (result == ISC_R_SUCCESS) {
4994			name = NULL;
4995			dns_message_currentname(fctx->rmessage, section,
4996						&name);
4997			if ((name->attributes & DNS_NAMEATTR_CACHE) != 0) {
4998				result = cache_name(fctx, name, addrinfo, now);
4999				if (result != ISC_R_SUCCESS)
5000					break;
5001			}
5002			result = dns_message_nextname(fctx->rmessage, section);
5003		}
5004		if (result != ISC_R_NOMORE)
5005			break;
5006	}
5007	if (result == ISC_R_NOMORE)
5008		result = ISC_R_SUCCESS;
5009
5010	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
5011
5012	return (result);
5013}
5014
5015/*
5016 * Do what dns_ncache_addoptout() does, and then compute an appropriate eresult.
5017 */
5018static isc_result_t
5019ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node,
5020		  dns_rdatatype_t covers, isc_stdtime_t now, dns_ttl_t maxttl,
5021		  isc_boolean_t optout, isc_boolean_t secure,
5022		  dns_rdataset_t *ardataset, isc_result_t *eresultp)
5023{
5024	isc_result_t result;
5025	dns_rdataset_t rdataset;
5026
5027	if (ardataset == NULL) {
5028		dns_rdataset_init(&rdataset);
5029		ardataset = &rdataset;
5030	}
5031	if (secure)
5032		result = dns_ncache_addoptout(message, cache, node, covers,
5033					      now, maxttl, optout, ardataset);
5034	else
5035		result = dns_ncache_add(message, cache, node, covers, now,
5036					maxttl, ardataset);
5037	if (result == DNS_R_UNCHANGED || result == ISC_R_SUCCESS) {
5038		/*
5039		 * If the cache now contains a negative entry and we
5040		 * care about whether it is DNS_R_NCACHENXDOMAIN or
5041		 * DNS_R_NCACHENXRRSET then extract it.
5042		 */
5043		if (NEGATIVE(ardataset)) {
5044			/*
5045			 * The cache data is a negative cache entry.
5046			 */
5047			if (NXDOMAIN(ardataset))
5048				*eresultp = DNS_R_NCACHENXDOMAIN;
5049			else
5050				*eresultp = DNS_R_NCACHENXRRSET;
5051		} else {
5052			/*
5053			 * Either we don't care about the nature of the
5054			 * cache rdataset (because no fetch is interested
5055			 * in the outcome), or the cache rdataset is not
5056			 * a negative cache entry.  Whichever case it is,
5057			 * we can return success.
5058			 *
5059			 * XXXRTH  There's a CNAME/DNAME problem here.
5060			 */
5061			*eresultp = ISC_R_SUCCESS;
5062		}
5063		result = ISC_R_SUCCESS;
5064	}
5065	if (ardataset == &rdataset && dns_rdataset_isassociated(ardataset))
5066		dns_rdataset_disassociate(ardataset);
5067
5068	return (result);
5069}
5070
5071static inline isc_result_t
5072ncache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
5073	       dns_rdatatype_t covers, isc_stdtime_t now)
5074{
5075	isc_result_t result, eresult;
5076	dns_name_t *name;
5077	dns_resolver_t *res;
5078	dns_db_t **adbp;
5079	dns_dbnode_t *node, **anodep;
5080	dns_rdataset_t *ardataset;
5081	isc_boolean_t need_validation, secure_domain;
5082	dns_name_t *aname;
5083	dns_fetchevent_t *event;
5084	isc_uint32_t ttl;
5085	unsigned int valoptions = 0;
5086
5087	FCTXTRACE("ncache_message");
5088
5089	fctx->attributes &= ~FCTX_ATTR_WANTNCACHE;
5090
5091	res = fctx->res;
5092	need_validation = ISC_FALSE;
5093	POST(need_validation);
5094	secure_domain = ISC_FALSE;
5095	eresult = ISC_R_SUCCESS;
5096	name = &fctx->name;
5097	node = NULL;
5098
5099	/*
5100	 * XXXMPA remove when we follow cnames and adjust the setting
5101	 * of FCTX_ATTR_WANTNCACHE in noanswer_response().
5102	 */
5103	INSIST(fctx->rmessage->counts[DNS_SECTION_ANSWER] == 0);
5104
5105	/*
5106	 * Is DNSSEC validation required for this name?
5107	 */
5108	if (fctx->res->view->enablevalidation) {
5109		result = dns_view_issecuredomain(res->view, name,
5110						 &secure_domain);
5111		if (result != ISC_R_SUCCESS)
5112			return (result);
5113
5114		if (!secure_domain && res->view->dlv != NULL) {
5115			valoptions = DNS_VALIDATOR_DLV;
5116			secure_domain = ISC_TRUE;
5117		}
5118	}
5119
5120	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
5121		need_validation = ISC_FALSE;
5122	else
5123		need_validation = secure_domain;
5124
5125	if (secure_domain) {
5126		/*
5127		 * Mark all rdatasets as pending.
5128		 */
5129		dns_rdataset_t *trdataset;
5130		dns_name_t *tname;
5131
5132		result = dns_message_firstname(fctx->rmessage,
5133					       DNS_SECTION_AUTHORITY);
5134		while (result == ISC_R_SUCCESS) {
5135			tname = NULL;
5136			dns_message_currentname(fctx->rmessage,
5137						DNS_SECTION_AUTHORITY,
5138						&tname);
5139			for (trdataset = ISC_LIST_HEAD(tname->list);
5140			     trdataset != NULL;
5141			     trdataset = ISC_LIST_NEXT(trdataset, link))
5142				trdataset->trust = dns_trust_pending_answer;
5143			result = dns_message_nextname(fctx->rmessage,
5144						      DNS_SECTION_AUTHORITY);
5145		}
5146		if (result != ISC_R_NOMORE)
5147			return (result);
5148
5149	}
5150
5151	if (need_validation) {
5152		/*
5153		 * Do negative response validation.
5154		 */
5155		result = valcreate(fctx, addrinfo, name, fctx->type,
5156				   NULL, NULL, valoptions,
5157				   res->buckets[fctx->bucketnum].task);
5158		/*
5159		 * If validation is necessary, return now.  Otherwise continue
5160		 * to process the message, letting the validation complete
5161		 * in its own good time.
5162		 */
5163		return (result);
5164	}
5165
5166	LOCK(&res->buckets[fctx->bucketnum].lock);
5167
5168	adbp = NULL;
5169	aname = NULL;
5170	anodep = NULL;
5171	ardataset = NULL;
5172	if (!HAVE_ANSWER(fctx)) {
5173		event = ISC_LIST_HEAD(fctx->events);
5174		if (event != NULL) {
5175			adbp = &event->db;
5176			aname = dns_fixedname_name(&event->foundname);
5177			result = dns_name_copy(name, aname, NULL);
5178			if (result != ISC_R_SUCCESS)
5179				goto unlock;
5180			anodep = &event->node;
5181			ardataset = event->rdataset;
5182		}
5183	} else
5184		event = NULL;
5185
5186	result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
5187	if (result != ISC_R_SUCCESS)
5188		goto unlock;
5189
5190	/*
5191	 * If we are asking for a SOA record set the cache time
5192	 * to zero to facilitate locating the containing zone of
5193	 * a arbitrary zone.
5194	 */
5195	ttl = fctx->res->view->maxncachettl;
5196	if (fctx->type == dns_rdatatype_soa &&
5197	    covers == dns_rdatatype_any &&
5198	    fctx->res->zero_no_soa_ttl)
5199		ttl = 0;
5200
5201	result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
5202				   covers, now, ttl, ISC_FALSE,
5203				   ISC_FALSE, ardataset, &eresult);
5204	if (result != ISC_R_SUCCESS)
5205		goto unlock;
5206
5207	if (!HAVE_ANSWER(fctx)) {
5208		fctx->attributes |= FCTX_ATTR_HAVEANSWER;
5209		if (event != NULL) {
5210			event->result = eresult;
5211			dns_db_attach(fctx->cache, adbp);
5212			dns_db_transfernode(fctx->cache, &node, anodep);
5213			clone_results(fctx);
5214		}
5215	}
5216
5217 unlock:
5218	UNLOCK(&res->buckets[fctx->bucketnum].lock);
5219
5220	if (node != NULL)
5221		dns_db_detachnode(fctx->cache, &node);
5222
5223	return (result);
5224}
5225
5226static inline void
5227mark_related(dns_name_t *name, dns_rdataset_t *rdataset,
5228	     isc_boolean_t external, isc_boolean_t gluing)
5229{
5230	name->attributes |= DNS_NAMEATTR_CACHE;
5231	if (gluing) {
5232		rdataset->trust = dns_trust_glue;
5233		/*
5234		 * Glue with 0 TTL causes problems.  We force the TTL to
5235		 * 1 second to prevent this.
5236		 */
5237		if (rdataset->ttl == 0)
5238			rdataset->ttl = 1;
5239	} else
5240		rdataset->trust = dns_trust_additional;
5241	/*
5242	 * Avoid infinite loops by only marking new rdatasets.
5243	 */
5244	if (!CACHE(rdataset)) {
5245		name->attributes |= DNS_NAMEATTR_CHASE;
5246		rdataset->attributes |= DNS_RDATASETATTR_CHASE;
5247	}
5248	rdataset->attributes |= DNS_RDATASETATTR_CACHE;
5249	if (external)
5250		rdataset->attributes |= DNS_RDATASETATTR_EXTERNAL;
5251}
5252
5253static isc_result_t
5254check_section(void *arg, dns_name_t *addname, dns_rdatatype_t type,
5255	      dns_section_t section)
5256{
5257	fetchctx_t *fctx = arg;
5258	isc_result_t result;
5259	dns_name_t *name;
5260	dns_rdataset_t *rdataset;
5261	isc_boolean_t external;
5262	dns_rdatatype_t rtype;
5263	isc_boolean_t gluing;
5264
5265	REQUIRE(VALID_FCTX(fctx));
5266
5267#if CHECK_FOR_GLUE_IN_ANSWER
5268	if (section == DNS_SECTION_ANSWER && type != dns_rdatatype_a)
5269		return (ISC_R_SUCCESS);
5270#endif
5271
5272	if (GLUING(fctx))
5273		gluing = ISC_TRUE;
5274	else
5275		gluing = ISC_FALSE;
5276	name = NULL;
5277	rdataset = NULL;
5278	result = dns_message_findname(fctx->rmessage, section, addname,
5279				      dns_rdatatype_any, 0, &name, NULL);
5280	if (result == ISC_R_SUCCESS) {
5281		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
5282		if (type == dns_rdatatype_a) {
5283			for (rdataset = ISC_LIST_HEAD(name->list);
5284			     rdataset != NULL;
5285			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5286				if (rdataset->type == dns_rdatatype_rrsig)
5287					rtype = rdataset->covers;
5288				else
5289					rtype = rdataset->type;
5290				if (rtype == dns_rdatatype_a ||
5291				    rtype == dns_rdatatype_aaaa)
5292					mark_related(name, rdataset, external,
5293						     gluing);
5294			}
5295		} else {
5296			result = dns_message_findtype(name, type, 0,
5297						      &rdataset);
5298			if (result == ISC_R_SUCCESS) {
5299				mark_related(name, rdataset, external, gluing);
5300				/*
5301				 * Do we have its SIG too?
5302				 */
5303				rdataset = NULL;
5304				result = dns_message_findtype(name,
5305						      dns_rdatatype_rrsig,
5306						      type, &rdataset);
5307				if (result == ISC_R_SUCCESS)
5308					mark_related(name, rdataset, external,
5309						     gluing);
5310			}
5311		}
5312	}
5313
5314	return (ISC_R_SUCCESS);
5315}
5316
5317static isc_result_t
5318check_related(void *arg, dns_name_t *addname, dns_rdatatype_t type) {
5319	return (check_section(arg, addname, type, DNS_SECTION_ADDITIONAL));
5320}
5321
5322#ifndef CHECK_FOR_GLUE_IN_ANSWER
5323#define CHECK_FOR_GLUE_IN_ANSWER 0
5324#endif
5325#if CHECK_FOR_GLUE_IN_ANSWER
5326static isc_result_t
5327check_answer(void *arg, dns_name_t *addname, dns_rdatatype_t type) {
5328	return (check_section(arg, addname, type, DNS_SECTION_ANSWER));
5329}
5330#endif
5331
5332static void
5333chase_additional(fetchctx_t *fctx) {
5334	isc_boolean_t rescan;
5335	dns_section_t section = DNS_SECTION_ADDITIONAL;
5336	isc_result_t result;
5337
5338 again:
5339	rescan = ISC_FALSE;
5340
5341	for (result = dns_message_firstname(fctx->rmessage, section);
5342	     result == ISC_R_SUCCESS;
5343	     result = dns_message_nextname(fctx->rmessage, section)) {
5344		dns_name_t *name = NULL;
5345		dns_rdataset_t *rdataset;
5346		dns_message_currentname(fctx->rmessage, DNS_SECTION_ADDITIONAL,
5347					&name);
5348		if ((name->attributes & DNS_NAMEATTR_CHASE) == 0)
5349			continue;
5350		name->attributes &= ~DNS_NAMEATTR_CHASE;
5351		for (rdataset = ISC_LIST_HEAD(name->list);
5352		     rdataset != NULL;
5353		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5354			if (CHASE(rdataset)) {
5355				rdataset->attributes &= ~DNS_RDATASETATTR_CHASE;
5356				(void)dns_rdataset_additionaldata(rdataset,
5357								  check_related,
5358								  fctx);
5359				rescan = ISC_TRUE;
5360			}
5361		}
5362	}
5363	if (rescan)
5364		goto again;
5365}
5366
5367static inline isc_result_t
5368cname_target(dns_rdataset_t *rdataset, dns_name_t *tname) {
5369	isc_result_t result;
5370	dns_rdata_t rdata = DNS_RDATA_INIT;
5371	dns_rdata_cname_t cname;
5372
5373	result = dns_rdataset_first(rdataset);
5374	if (result != ISC_R_SUCCESS)
5375		return (result);
5376	dns_rdataset_current(rdataset, &rdata);
5377	result = dns_rdata_tostruct(&rdata, &cname, NULL);
5378	if (result != ISC_R_SUCCESS)
5379		return (result);
5380	dns_name_init(tname, NULL);
5381	dns_name_clone(&cname.cname, tname);
5382	dns_rdata_freestruct(&cname);
5383
5384	return (ISC_R_SUCCESS);
5385}
5386
5387static inline isc_result_t
5388dname_target(dns_rdataset_t *rdataset, dns_name_t *qname,
5389	     unsigned int nlabels, dns_fixedname_t *fixeddname)
5390{
5391	isc_result_t result;
5392	dns_rdata_t rdata = DNS_RDATA_INIT;
5393	dns_rdata_dname_t dname;
5394	dns_fixedname_t prefix;
5395
5396	/*
5397	 * Get the target name of the DNAME.
5398	 */
5399	result = dns_rdataset_first(rdataset);
5400	if (result != ISC_R_SUCCESS)
5401		return (result);
5402	dns_rdataset_current(rdataset, &rdata);
5403	result = dns_rdata_tostruct(&rdata, &dname, NULL);
5404	if (result != ISC_R_SUCCESS)
5405		return (result);
5406
5407	dns_fixedname_init(&prefix);
5408	dns_name_split(qname, nlabels, dns_fixedname_name(&prefix), NULL);
5409	dns_fixedname_init(fixeddname);
5410	result = dns_name_concatenate(dns_fixedname_name(&prefix),
5411				      &dname.dname,
5412				      dns_fixedname_name(fixeddname), NULL);
5413	dns_rdata_freestruct(&dname);
5414	return (result);
5415}
5416
5417static isc_boolean_t
5418is_answeraddress_allowed(dns_view_t *view, dns_name_t *name,
5419			 dns_rdataset_t *rdataset)
5420{
5421	isc_result_t result;
5422	dns_rdata_t rdata = DNS_RDATA_INIT;
5423	struct in_addr ina;
5424	struct in6_addr in6a;
5425	isc_netaddr_t netaddr;
5426	char addrbuf[ISC_NETADDR_FORMATSIZE];
5427	char namebuf[DNS_NAME_FORMATSIZE];
5428	char classbuf[64];
5429	char typebuf[64];
5430	int match;
5431
5432	/* By default, we allow any addresses. */
5433	if (view->denyansweracl == NULL)
5434		return (ISC_TRUE);
5435
5436	/*
5437	 * If the owner name matches one in the exclusion list, either exactly
5438	 * or partially, allow it.
5439	 */
5440	if (view->answeracl_exclude != NULL) {
5441		dns_rbtnode_t *node = NULL;
5442
5443		result = dns_rbt_findnode(view->answeracl_exclude, name, NULL,
5444					  &node, NULL, 0, NULL, NULL);
5445
5446		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
5447			return (ISC_TRUE);
5448	}
5449
5450	/*
5451	 * Otherwise, search the filter list for a match for each address
5452	 * record.  If a match is found, the address should be filtered,
5453	 * so should the entire answer.
5454	 */
5455	for (result = dns_rdataset_first(rdataset);
5456	     result == ISC_R_SUCCESS;
5457	     result = dns_rdataset_next(rdataset)) {
5458		dns_rdata_reset(&rdata);
5459		dns_rdataset_current(rdataset, &rdata);
5460		if (rdataset->type == dns_rdatatype_a) {
5461			INSIST(rdata.length == sizeof(ina.s_addr));
5462			memmove(&ina.s_addr, rdata.data, sizeof(ina.s_addr));
5463			isc_netaddr_fromin(&netaddr, &ina);
5464		} else {
5465			INSIST(rdata.length == sizeof(in6a.s6_addr));
5466			memmove(in6a.s6_addr, rdata.data, sizeof(in6a.s6_addr));
5467			isc_netaddr_fromin6(&netaddr, &in6a);
5468		}
5469
5470		result = dns_acl_match(&netaddr, NULL, view->denyansweracl,
5471				       &view->aclenv, &match, NULL);
5472
5473		if (result == ISC_R_SUCCESS && match > 0) {
5474			isc_netaddr_format(&netaddr, addrbuf, sizeof(addrbuf));
5475			dns_name_format(name, namebuf, sizeof(namebuf));
5476			dns_rdatatype_format(rdataset->type, typebuf,
5477					     sizeof(typebuf));
5478			dns_rdataclass_format(rdataset->rdclass, classbuf,
5479					      sizeof(classbuf));
5480			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5481				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5482				      "answer address %s denied for %s/%s/%s",
5483				      addrbuf, namebuf, typebuf, classbuf);
5484			return (ISC_FALSE);
5485		}
5486	}
5487
5488	return (ISC_TRUE);
5489}
5490
5491static isc_boolean_t
5492is_answertarget_allowed(dns_view_t *view, dns_name_t *name,
5493			dns_rdatatype_t type, dns_name_t *tname,
5494			dns_name_t *domain)
5495{
5496	isc_result_t result;
5497	dns_rbtnode_t *node = NULL;
5498	char qnamebuf[DNS_NAME_FORMATSIZE];
5499	char tnamebuf[DNS_NAME_FORMATSIZE];
5500	char classbuf[64];
5501	char typebuf[64];
5502
5503	/* By default, we allow any target name. */
5504	if (view->denyanswernames == NULL)
5505		return (ISC_TRUE);
5506
5507	/*
5508	 * If the owner name matches one in the exclusion list, either exactly
5509	 * or partially, allow it.
5510	 */
5511	if (view->answernames_exclude != NULL) {
5512		result = dns_rbt_findnode(view->answernames_exclude, name, NULL,
5513					  &node, NULL, 0, NULL, NULL);
5514		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
5515			return (ISC_TRUE);
5516	}
5517
5518	/*
5519	 * If the target name is a subdomain of the search domain, allow it.
5520	 */
5521	if (dns_name_issubdomain(tname, domain))
5522		return (ISC_TRUE);
5523
5524	/*
5525	 * Otherwise, apply filters.
5526	 */
5527	result = dns_rbt_findnode(view->denyanswernames, tname, NULL, &node,
5528				  NULL, 0, NULL, NULL);
5529	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
5530		dns_name_format(name, qnamebuf, sizeof(qnamebuf));
5531		dns_name_format(tname, tnamebuf, sizeof(tnamebuf));
5532		dns_rdatatype_format(type, typebuf, sizeof(typebuf));
5533		dns_rdataclass_format(view->rdclass, classbuf,
5534				      sizeof(classbuf));
5535		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5536			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5537			      "%s target %s denied for %s/%s",
5538			      typebuf, tnamebuf, qnamebuf, classbuf);
5539		return (ISC_FALSE);
5540	}
5541
5542	return (ISC_TRUE);
5543}
5544
5545static void
5546trim_ns_ttl(fetchctx_t *fctx, dns_name_t *name, dns_rdataset_t *rdataset) {
5547	char ns_namebuf[DNS_NAME_FORMATSIZE];
5548	char namebuf[DNS_NAME_FORMATSIZE];
5549	char tbuf[DNS_RDATATYPE_FORMATSIZE];
5550
5551	if (fctx->ns_ttl_ok && rdataset->ttl > fctx->ns_ttl) {
5552		dns_name_format(name, ns_namebuf, sizeof(ns_namebuf));
5553		dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
5554		dns_rdatatype_format(fctx->type, tbuf, sizeof(tbuf));
5555
5556		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5557			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
5558			      "fctx %p: trimming ttl of %s/NS for %s/%s: "
5559			      "%u -> %u", fctx, ns_namebuf, namebuf, tbuf,
5560			      rdataset->ttl, fctx->ns_ttl);
5561		rdataset->ttl = fctx->ns_ttl;
5562	}
5563}
5564
5565/*
5566 * Handle a no-answer response (NXDOMAIN, NXRRSET, or referral).
5567 * If look_in_options has LOOK_FOR_NS_IN_ANSWER then we look in the answer
5568 * section for the NS RRset if the query type is NS; if it has
5569 * LOOK_FOR_GLUE_IN_ANSWER we look for glue incorrectly returned in the answer
5570 * section for A and AAAA queries.
5571 */
5572#define LOOK_FOR_NS_IN_ANSWER 0x1
5573#define LOOK_FOR_GLUE_IN_ANSWER 0x2
5574
5575static isc_result_t
5576noanswer_response(fetchctx_t *fctx, dns_name_t *oqname,
5577		  unsigned int look_in_options)
5578{
5579	isc_result_t result;
5580	dns_message_t *message;
5581	dns_name_t *name, *qname, *ns_name, *soa_name, *ds_name, *save_name;
5582	dns_rdataset_t *rdataset, *ns_rdataset;
5583	isc_boolean_t aa, negative_response;
5584	dns_rdatatype_t type, save_type;
5585	dns_section_t section;
5586
5587	FCTXTRACE("noanswer_response");
5588
5589	if ((look_in_options & LOOK_FOR_NS_IN_ANSWER) != 0) {
5590		INSIST(fctx->type == dns_rdatatype_ns);
5591		section = DNS_SECTION_ANSWER;
5592	} else
5593		section = DNS_SECTION_AUTHORITY;
5594
5595	message = fctx->rmessage;
5596
5597	/*
5598	 * Setup qname.
5599	 */
5600	if (oqname == NULL) {
5601		/*
5602		 * We have a normal, non-chained negative response or
5603		 * referral.
5604		 */
5605		if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
5606			aa = ISC_TRUE;
5607		else
5608			aa = ISC_FALSE;
5609		qname = &fctx->name;
5610	} else {
5611		/*
5612		 * We're being invoked by answer_response() after it has
5613		 * followed a CNAME/DNAME chain.
5614		 */
5615		qname = oqname;
5616		aa = ISC_FALSE;
5617		/*
5618		 * If the current qname is not a subdomain of the query
5619		 * domain, there's no point in looking at the authority
5620		 * section without doing DNSSEC validation.
5621		 *
5622		 * Until we do that validation, we'll just return success
5623		 * in this case.
5624		 */
5625		if (!dns_name_issubdomain(qname, &fctx->domain))
5626			return (ISC_R_SUCCESS);
5627	}
5628
5629	/*
5630	 * We have to figure out if this is a negative response, or a
5631	 * referral.
5632	 */
5633
5634	/*
5635	 * Sometimes we can tell if its a negative response by looking at
5636	 * the message header.
5637	 */
5638	negative_response = ISC_FALSE;
5639	if (message->rcode == dns_rcode_nxdomain ||
5640	    (message->counts[DNS_SECTION_ANSWER] == 0 &&
5641	     message->counts[DNS_SECTION_AUTHORITY] == 0))
5642		negative_response = ISC_TRUE;
5643
5644	/*
5645	 * Process the authority section.
5646	 */
5647	ns_name = NULL;
5648	ns_rdataset = NULL;
5649	soa_name = NULL;
5650	ds_name = NULL;
5651	save_name = NULL;
5652	save_type = dns_rdatatype_none;
5653	result = dns_message_firstname(message, section);
5654	while (result == ISC_R_SUCCESS) {
5655		name = NULL;
5656		dns_message_currentname(message, section, &name);
5657		if (dns_name_issubdomain(name, &fctx->domain)) {
5658			/*
5659			 * Look for NS/SOA RRsets first.
5660			 */
5661			for (rdataset = ISC_LIST_HEAD(name->list);
5662			     rdataset != NULL;
5663			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5664				type = rdataset->type;
5665				if (type == dns_rdatatype_rrsig)
5666					type = rdataset->covers;
5667				if (((type == dns_rdatatype_ns ||
5668				      type == dns_rdatatype_soa) &&
5669				     !dns_name_issubdomain(qname, name))) {
5670					char qbuf[DNS_NAME_FORMATSIZE];
5671					char nbuf[DNS_NAME_FORMATSIZE];
5672					char tbuf[DNS_RDATATYPE_FORMATSIZE];
5673					dns_rdatatype_format(fctx->type, tbuf,
5674							     sizeof(tbuf));
5675					dns_name_format(name, nbuf,
5676							     sizeof(nbuf));
5677					dns_name_format(qname, qbuf,
5678							     sizeof(qbuf));
5679					log_formerr(fctx,
5680						    "unrelated %s %s in "
5681						    "%s authority section",
5682						    tbuf, qbuf, nbuf);
5683					return (DNS_R_FORMERR);
5684				}
5685				if (type == dns_rdatatype_ns) {
5686					/*
5687					 * NS or RRSIG NS.
5688					 *
5689					 * Only one set of NS RRs is allowed.
5690					 */
5691					if (rdataset->type ==
5692					    dns_rdatatype_ns) {
5693						if (ns_name != NULL &&
5694						    name != ns_name) {
5695							log_formerr(fctx,
5696								"multiple NS "
5697								"RRsets in "
5698								"authority "
5699								"section");
5700							return (DNS_R_FORMERR);
5701						}
5702						ns_name = name;
5703						ns_rdataset = rdataset;
5704					}
5705					name->attributes |=
5706						DNS_NAMEATTR_CACHE;
5707					rdataset->attributes |=
5708						DNS_RDATASETATTR_CACHE;
5709					rdataset->trust = dns_trust_glue;
5710				}
5711				if (type == dns_rdatatype_soa) {
5712					/*
5713					 * SOA, or RRSIG SOA.
5714					 *
5715					 * Only one SOA is allowed.
5716					 */
5717					if (rdataset->type ==
5718					    dns_rdatatype_soa) {
5719						if (soa_name != NULL &&
5720						    name != soa_name) {
5721							log_formerr(fctx,
5722								"multiple SOA "
5723								"RRs in "
5724								"authority "
5725								"section");
5726							return (DNS_R_FORMERR);
5727						}
5728						soa_name = name;
5729					}
5730					name->attributes |=
5731						DNS_NAMEATTR_NCACHE;
5732					rdataset->attributes |=
5733						DNS_RDATASETATTR_NCACHE;
5734					if (aa)
5735						rdataset->trust =
5736						    dns_trust_authauthority;
5737					else if (ISFORWARDER(fctx->addrinfo))
5738						rdataset->trust =
5739							dns_trust_answer;
5740					else
5741						rdataset->trust =
5742							dns_trust_additional;
5743				}
5744			}
5745		}
5746		result = dns_message_nextname(message, section);
5747		if (result == ISC_R_NOMORE)
5748			break;
5749		else if (result != ISC_R_SUCCESS)
5750			return (result);
5751	}
5752
5753	log_ns_ttl(fctx, "noanswer_response");
5754
5755	if (ns_rdataset != NULL && dns_name_equal(&fctx->domain, ns_name) &&
5756	    !dns_name_equal(ns_name, dns_rootname))
5757		trim_ns_ttl(fctx, ns_name, ns_rdataset);
5758
5759	/*
5760	 * A negative response has a SOA record (Type 2)
5761	 * and a optional NS RRset (Type 1) or it has neither
5762	 * a SOA or a NS RRset (Type 3, handled above) or
5763	 * rcode is NXDOMAIN (handled above) in which case
5764	 * the NS RRset is allowed (Type 4).
5765	 */
5766	if (soa_name != NULL)
5767		negative_response = ISC_TRUE;
5768
5769	result = dns_message_firstname(message, section);
5770	while (result == ISC_R_SUCCESS) {
5771		name = NULL;
5772		dns_message_currentname(message, section, &name);
5773		if (dns_name_issubdomain(name, &fctx->domain)) {
5774			for (rdataset = ISC_LIST_HEAD(name->list);
5775			     rdataset != NULL;
5776			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5777				type = rdataset->type;
5778				if (type == dns_rdatatype_rrsig)
5779					type = rdataset->covers;
5780				if (type == dns_rdatatype_nsec ||
5781				    type == dns_rdatatype_nsec3) {
5782					/*
5783					 * NSEC or RRSIG NSEC.
5784					 */
5785					if (negative_response) {
5786						name->attributes |=
5787							DNS_NAMEATTR_NCACHE;
5788						rdataset->attributes |=
5789							DNS_RDATASETATTR_NCACHE;
5790					} else if (type == dns_rdatatype_nsec) {
5791						name->attributes |=
5792							DNS_NAMEATTR_CACHE;
5793						rdataset->attributes |=
5794							DNS_RDATASETATTR_CACHE;
5795					}
5796					if (aa)
5797						rdataset->trust =
5798						    dns_trust_authauthority;
5799					else if (ISFORWARDER(fctx->addrinfo))
5800						rdataset->trust =
5801							dns_trust_answer;
5802					else
5803						rdataset->trust =
5804							dns_trust_additional;
5805					/*
5806					 * No additional data needs to be
5807					 * marked.
5808					 */
5809				} else if (type == dns_rdatatype_ds) {
5810					/*
5811					 * DS or SIG DS.
5812					 *
5813					 * These should only be here if
5814					 * this is a referral, and there
5815					 * should only be one DS RRset.
5816					 */
5817					if (ns_name == NULL) {
5818						log_formerr(fctx,
5819							    "DS with no "
5820							    "referral");
5821						return (DNS_R_FORMERR);
5822					}
5823					if (rdataset->type ==
5824					    dns_rdatatype_ds) {
5825						if (ds_name != NULL &&
5826						    name != ds_name) {
5827							log_formerr(fctx,
5828								"DS doesn't "
5829								"match "
5830								"referral "
5831								"(NS)");
5832							return (DNS_R_FORMERR);
5833						}
5834						ds_name = name;
5835					}
5836					name->attributes |=
5837						DNS_NAMEATTR_CACHE;
5838					rdataset->attributes |=
5839						DNS_RDATASETATTR_CACHE;
5840					if (aa)
5841						rdataset->trust =
5842						    dns_trust_authauthority;
5843					else if (ISFORWARDER(fctx->addrinfo))
5844						rdataset->trust =
5845							dns_trust_answer;
5846					else
5847						rdataset->trust =
5848							dns_trust_additional;
5849				}
5850			}
5851		} else {
5852			save_name = name;
5853			save_type = ISC_LIST_HEAD(name->list)->type;
5854		}
5855		result = dns_message_nextname(message, section);
5856		if (result == ISC_R_NOMORE)
5857			break;
5858		else if (result != ISC_R_SUCCESS)
5859			return (result);
5860	}
5861
5862	/*
5863	 * Trigger lookups for DNS nameservers.
5864	 */
5865	if (negative_response && message->rcode == dns_rcode_noerror &&
5866	    fctx->type == dns_rdatatype_ds && soa_name != NULL &&
5867	    dns_name_equal(soa_name, qname) &&
5868	    !dns_name_equal(qname, dns_rootname))
5869		return (DNS_R_CHASEDSSERVERS);
5870
5871	/*
5872	 * Did we find anything?
5873	 */
5874	if (!negative_response && ns_name == NULL) {
5875		/*
5876		 * Nope.
5877		 */
5878		if (oqname != NULL) {
5879			/*
5880			 * We've already got a partial CNAME/DNAME chain,
5881			 * and haven't found else anything useful here, but
5882			 * no error has occurred since we have an answer.
5883			 */
5884			return (ISC_R_SUCCESS);
5885		} else {
5886			/*
5887			 * The responder is insane.
5888			 */
5889			if (save_name == NULL) {
5890				log_formerr(fctx, "invalid response");
5891				return (DNS_R_FORMERR);
5892			}
5893			if (!dns_name_issubdomain(save_name, &fctx->domain)) {
5894				char nbuf[DNS_NAME_FORMATSIZE];
5895				char dbuf[DNS_NAME_FORMATSIZE];
5896				char tbuf[DNS_RDATATYPE_FORMATSIZE];
5897
5898				dns_rdatatype_format(save_type, tbuf,
5899					sizeof(tbuf));
5900				dns_name_format(save_name, nbuf, sizeof(nbuf));
5901				dns_name_format(&fctx->domain, dbuf,
5902					sizeof(dbuf));
5903
5904				log_formerr(fctx, "Name %s (%s) not subdomain"
5905					" of zone %s -- invalid response",
5906					nbuf, tbuf, dbuf);
5907			} else {
5908				log_formerr(fctx, "invalid response");
5909			}
5910			return (DNS_R_FORMERR);
5911		}
5912	}
5913
5914	/*
5915	 * If we found both NS and SOA, they should be the same name.
5916	 */
5917	if (ns_name != NULL && soa_name != NULL && ns_name != soa_name) {
5918		log_formerr(fctx, "NS/SOA mismatch");
5919		return (DNS_R_FORMERR);
5920	}
5921
5922	/*
5923	 * Do we have a referral?  (We only want to follow a referral if
5924	 * we're not following a chain.)
5925	 */
5926	if (!negative_response && ns_name != NULL && oqname == NULL) {
5927		/*
5928		 * We already know ns_name is a subdomain of fctx->domain.
5929		 * If ns_name is equal to fctx->domain, we're not making
5930		 * progress.  We return DNS_R_FORMERR so that we'll keep
5931		 * trying other servers.
5932		 */
5933		if (dns_name_equal(ns_name, &fctx->domain)) {
5934			log_formerr(fctx, "non-improving referral");
5935			return (DNS_R_FORMERR);
5936		}
5937
5938		/*
5939		 * If the referral name is not a parent of the query
5940		 * name, consider the responder insane.
5941		 */
5942		if (! dns_name_issubdomain(&fctx->name, ns_name)) {
5943			/* Logged twice */
5944			log_formerr(fctx, "referral to non-parent");
5945			FCTXTRACE("referral to non-parent");
5946			return (DNS_R_FORMERR);
5947		}
5948
5949		/*
5950		 * Mark any additional data related to this rdataset.
5951		 * It's important that we do this before we change the
5952		 * query domain.
5953		 */
5954		INSIST(ns_rdataset != NULL);
5955		fctx->attributes |= FCTX_ATTR_GLUING;
5956		(void)dns_rdataset_additionaldata(ns_rdataset, check_related,
5957						  fctx);
5958#if CHECK_FOR_GLUE_IN_ANSWER
5959		/*
5960		 * Look in the answer section for "glue" that is incorrectly
5961		 * returned as a answer.  This is needed if the server also
5962		 * minimizes the response size by not adding records to the
5963		 * additional section that are in the answer section or if
5964		 * the record gets dropped due to message size constraints.
5965		 */
5966		if ((look_in_options & LOOK_FOR_GLUE_IN_ANSWER) != 0 &&
5967		    (fctx->type == dns_rdatatype_aaaa ||
5968		     fctx->type == dns_rdatatype_a))
5969			(void)dns_rdataset_additionaldata(ns_rdataset,
5970							  check_answer, fctx);
5971#endif
5972		fctx->attributes &= ~FCTX_ATTR_GLUING;
5973		/*
5974		 * NS rdatasets with 0 TTL cause problems.
5975		 * dns_view_findzonecut() will not find them when we
5976		 * try to follow the referral, and we'll SERVFAIL
5977		 * because the best nameservers are now above QDOMAIN.
5978		 * We force the TTL to 1 second to prevent this.
5979		 */
5980		if (ns_rdataset->ttl == 0)
5981			ns_rdataset->ttl = 1;
5982		/*
5983		 * Set the current query domain to the referral name.
5984		 *
5985		 * XXXRTH  We should check if we're in forward-only mode, and
5986		 *		if so we should bail out.
5987		 */
5988		INSIST(dns_name_countlabels(&fctx->domain) > 0);
5989		dns_name_free(&fctx->domain, fctx->mctx);
5990		if (dns_rdataset_isassociated(&fctx->nameservers))
5991			dns_rdataset_disassociate(&fctx->nameservers);
5992		dns_name_init(&fctx->domain, NULL);
5993		result = dns_name_dup(ns_name, fctx->mctx, &fctx->domain);
5994		if (result != ISC_R_SUCCESS)
5995			return (result);
5996		fctx->attributes |= FCTX_ATTR_WANTCACHE;
5997		fctx->ns_ttl_ok = ISC_FALSE;
5998		log_ns_ttl(fctx, "DELEGATION");
5999		return (DNS_R_DELEGATION);
6000	}
6001
6002	/*
6003	 * Since we're not doing a referral, we don't want to cache any
6004	 * NS RRs we may have found.
6005	 */
6006	if (ns_name != NULL)
6007		ns_name->attributes &= ~DNS_NAMEATTR_CACHE;
6008
6009	if (negative_response && oqname == NULL)
6010		fctx->attributes |= FCTX_ATTR_WANTNCACHE;
6011
6012	return (ISC_R_SUCCESS);
6013}
6014
6015static isc_result_t
6016answer_response(fetchctx_t *fctx) {
6017	isc_result_t result;
6018	dns_message_t *message;
6019	dns_name_t *name, *dname = NULL, *qname, tname, *ns_name;
6020	dns_rdataset_t *rdataset, *ns_rdataset;
6021	isc_boolean_t done, external, chaining, aa, found, want_chaining;
6022	isc_boolean_t have_answer, found_cname, found_type, wanted_chaining;
6023	unsigned int aflag;
6024	dns_rdatatype_t type;
6025	dns_fixedname_t fdname, fqname;
6026	dns_view_t *view;
6027
6028	FCTXTRACE("answer_response");
6029
6030	message = fctx->rmessage;
6031
6032	/*
6033	 * Examine the answer section, marking those rdatasets which are
6034	 * part of the answer and should be cached.
6035	 */
6036
6037	done = ISC_FALSE;
6038	found_cname = ISC_FALSE;
6039	found_type = ISC_FALSE;
6040	chaining = ISC_FALSE;
6041	have_answer = ISC_FALSE;
6042	want_chaining = ISC_FALSE;
6043	POST(want_chaining);
6044	if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
6045		aa = ISC_TRUE;
6046	else
6047		aa = ISC_FALSE;
6048	qname = &fctx->name;
6049	type = fctx->type;
6050	view = fctx->res->view;
6051	result = dns_message_firstname(message, DNS_SECTION_ANSWER);
6052	while (!done && result == ISC_R_SUCCESS) {
6053		dns_namereln_t namereln;
6054		int order;
6055		unsigned int nlabels;
6056
6057		name = NULL;
6058		dns_message_currentname(message, DNS_SECTION_ANSWER, &name);
6059		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
6060		namereln = dns_name_fullcompare(qname, name, &order, &nlabels);
6061		if (namereln == dns_namereln_equal) {
6062			wanted_chaining = ISC_FALSE;
6063			for (rdataset = ISC_LIST_HEAD(name->list);
6064			     rdataset != NULL;
6065			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6066				found = ISC_FALSE;
6067				want_chaining = ISC_FALSE;
6068				aflag = 0;
6069				if (rdataset->type == dns_rdatatype_nsec3) {
6070					/*
6071					 * NSEC3 records are not allowed to
6072					 * appear in the answer section.
6073					 */
6074					log_formerr(fctx, "NSEC3 in answer");
6075					return (DNS_R_FORMERR);
6076				}
6077
6078				/*
6079				 * Apply filters, if given, on answers to reject
6080				 * a malicious attempt of rebinding.
6081				 */
6082				if ((rdataset->type == dns_rdatatype_a ||
6083				     rdataset->type == dns_rdatatype_aaaa) &&
6084				    !is_answeraddress_allowed(view, name,
6085							      rdataset)) {
6086					return (DNS_R_SERVFAIL);
6087				}
6088
6089				if (rdataset->type == type && !found_cname) {
6090					/*
6091					 * We've found an ordinary answer.
6092					 */
6093					found = ISC_TRUE;
6094					found_type = ISC_TRUE;
6095					done = ISC_TRUE;
6096					aflag = DNS_RDATASETATTR_ANSWER;
6097				} else if (type == dns_rdatatype_any) {
6098					/*
6099					 * We've found an answer matching
6100					 * an ANY query.  There may be
6101					 * more.
6102					 */
6103					found = ISC_TRUE;
6104					aflag = DNS_RDATASETATTR_ANSWER;
6105				} else if (rdataset->type == dns_rdatatype_rrsig
6106					   && rdataset->covers == type
6107					   && !found_cname) {
6108					/*
6109					 * We've found a signature that
6110					 * covers the type we're looking for.
6111					 */
6112					found = ISC_TRUE;
6113					found_type = ISC_TRUE;
6114					aflag = DNS_RDATASETATTR_ANSWERSIG;
6115				} else if (rdataset->type ==
6116					   dns_rdatatype_cname
6117					   && !found_type) {
6118					/*
6119					 * We're looking for something else,
6120					 * but we found a CNAME.
6121					 *
6122					 * Getting a CNAME response for some
6123					 * query types is an error, see
6124					 * RFC 4035, Section 2.5.
6125					 */
6126					if (type == dns_rdatatype_rrsig ||
6127					    type == dns_rdatatype_key ||
6128					    type == dns_rdatatype_nsec) {
6129						char buf[DNS_RDATATYPE_FORMATSIZE];
6130						dns_rdatatype_format(fctx->type,
6131							      buf, sizeof(buf));
6132						log_formerr(fctx,
6133							    "CNAME response "
6134							    "for %s RR", buf);
6135						return (DNS_R_FORMERR);
6136					}
6137					found = ISC_TRUE;
6138					found_cname = ISC_TRUE;
6139					want_chaining = ISC_TRUE;
6140					aflag = DNS_RDATASETATTR_ANSWER;
6141					result = cname_target(rdataset,
6142							      &tname);
6143					if (result != ISC_R_SUCCESS)
6144						return (result);
6145					/* Apply filters on the target name. */
6146					if (!is_answertarget_allowed(view,
6147							name,
6148							rdataset->type,
6149							&tname,
6150							&fctx->domain)) {
6151						return (DNS_R_SERVFAIL);
6152					}
6153				} else if (rdataset->type == dns_rdatatype_rrsig
6154					   && rdataset->covers ==
6155					   dns_rdatatype_cname
6156					   && !found_type) {
6157					/*
6158					 * We're looking for something else,
6159					 * but we found a SIG CNAME.
6160					 */
6161					found = ISC_TRUE;
6162					found_cname = ISC_TRUE;
6163					aflag = DNS_RDATASETATTR_ANSWERSIG;
6164				}
6165
6166				if (found) {
6167					/*
6168					 * We've found an answer to our
6169					 * question.
6170					 */
6171					name->attributes |=
6172						DNS_NAMEATTR_CACHE;
6173					rdataset->attributes |=
6174						DNS_RDATASETATTR_CACHE;
6175					rdataset->trust = dns_trust_answer;
6176					if (!chaining) {
6177						/*
6178						 * This data is "the" answer
6179						 * to our question only if
6180						 * we're not chaining (i.e.
6181						 * if we haven't followed
6182						 * a CNAME or DNAME).
6183						 */
6184						INSIST(!external);
6185						if (aflag ==
6186						    DNS_RDATASETATTR_ANSWER) {
6187							have_answer = ISC_TRUE;
6188							name->attributes |=
6189								DNS_NAMEATTR_ANSWER;
6190						}
6191						rdataset->attributes |= aflag;
6192						if (aa)
6193							rdataset->trust =
6194							  dns_trust_authanswer;
6195					} else if (external) {
6196						/*
6197						 * This data is outside of
6198						 * our query domain, and
6199						 * may not be cached.
6200						 */
6201						rdataset->attributes |=
6202						    DNS_RDATASETATTR_EXTERNAL;
6203					}
6204
6205					/*
6206					 * Mark any additional data related
6207					 * to this rdataset.
6208					 */
6209					(void)dns_rdataset_additionaldata(
6210							rdataset,
6211							check_related,
6212							fctx);
6213
6214					/*
6215					 * CNAME chaining.
6216					 */
6217					if (want_chaining) {
6218						wanted_chaining = ISC_TRUE;
6219						name->attributes |=
6220							DNS_NAMEATTR_CHAINING;
6221						rdataset->attributes |=
6222						    DNS_RDATASETATTR_CHAINING;
6223						qname = &tname;
6224					}
6225				}
6226				/*
6227				 * We could add an "else" clause here and
6228				 * log that we're ignoring this rdataset.
6229				 */
6230			}
6231			/*
6232			 * If wanted_chaining is true, we've done
6233			 * some chaining as the result of processing
6234			 * this node, and thus we need to set
6235			 * chaining to true.
6236			 *
6237			 * We don't set chaining inside of the
6238			 * rdataset loop because doing that would
6239			 * cause us to ignore the signatures of
6240			 * CNAMEs.
6241			 */
6242			if (wanted_chaining)
6243				chaining = ISC_TRUE;
6244		} else {
6245			dns_rdataset_t *dnameset = NULL;
6246
6247			/*
6248			 * Look for a DNAME (or its SIG).  Anything else is
6249			 * ignored.
6250			 */
6251			wanted_chaining = ISC_FALSE;
6252			for (rdataset = ISC_LIST_HEAD(name->list);
6253			     rdataset != NULL;
6254			     rdataset = ISC_LIST_NEXT(rdataset, link))
6255			{
6256				/*
6257				 * Only pass DNAME or RRSIG(DNAME).
6258				 */
6259				if (rdataset->type != dns_rdatatype_dname &&
6260				    (rdataset->type != dns_rdatatype_rrsig ||
6261				     rdataset->covers != dns_rdatatype_dname))
6262					continue;
6263
6264				/*
6265				 * If we're not chaining, then the DNAME and
6266				 * its signature should not be external.
6267				 */
6268				if (!chaining && external) {
6269					char qbuf[DNS_NAME_FORMATSIZE];
6270					char obuf[DNS_NAME_FORMATSIZE];
6271
6272					dns_name_format(name, qbuf,
6273							sizeof(qbuf));
6274					dns_name_format(&fctx->domain, obuf,
6275							sizeof(obuf));
6276					log_formerr(fctx, "external DNAME or "
6277						    "RRSIG covering DNAME "
6278						    "in answer: %s is "
6279						    "not in %s", qbuf, obuf);
6280					return (DNS_R_FORMERR);
6281				}
6282
6283				if (namereln != dns_namereln_subdomain) {
6284					char qbuf[DNS_NAME_FORMATSIZE];
6285					char obuf[DNS_NAME_FORMATSIZE];
6286
6287					dns_name_format(qname, qbuf,
6288							sizeof(qbuf));
6289					dns_name_format(name, obuf,
6290							sizeof(obuf));
6291					log_formerr(fctx, "unrelated DNAME "
6292						    "in answer: %s is "
6293						    "not in %s", qbuf, obuf);
6294					return (DNS_R_FORMERR);
6295				}
6296
6297				aflag = 0;
6298				if (rdataset->type == dns_rdatatype_dname) {
6299					want_chaining = ISC_TRUE;
6300					POST(want_chaining);
6301					aflag = DNS_RDATASETATTR_ANSWER;
6302					result = dname_target(rdataset, qname,
6303							      nlabels, &fdname);
6304					if (result == ISC_R_NOSPACE) {
6305						/*
6306						 * We can't construct the
6307						 * DNAME target.  Do not
6308						 * try to continue.
6309						 */
6310						want_chaining = ISC_FALSE;
6311						POST(want_chaining);
6312					} else if (result != ISC_R_SUCCESS)
6313						return (result);
6314					else
6315						dnameset = rdataset;
6316
6317					dname = dns_fixedname_name(&fdname);
6318					if (!is_answertarget_allowed(view,
6319							qname, rdataset->type,
6320							dname, &fctx->domain)) {
6321						return (DNS_R_SERVFAIL);
6322					}
6323				} else {
6324					/*
6325					 * We've found a signature that
6326					 * covers the DNAME.
6327					 */
6328					aflag = DNS_RDATASETATTR_ANSWERSIG;
6329				}
6330
6331				/*
6332				 * We've found an answer to our
6333				 * question.
6334				 */
6335				name->attributes |= DNS_NAMEATTR_CACHE;
6336				rdataset->attributes |= DNS_RDATASETATTR_CACHE;
6337				rdataset->trust = dns_trust_answer;
6338				if (!chaining) {
6339					/*
6340					 * This data is "the" answer to
6341					 * our question only if we're
6342					 * not chaining.
6343					 */
6344					INSIST(!external);
6345					if (aflag == DNS_RDATASETATTR_ANSWER) {
6346						have_answer = ISC_TRUE;
6347						name->attributes |=
6348							DNS_NAMEATTR_ANSWER;
6349					}
6350					rdataset->attributes |= aflag;
6351					if (aa)
6352						rdataset->trust =
6353						  dns_trust_authanswer;
6354				} else if (external) {
6355					rdataset->attributes |=
6356					    DNS_RDATASETATTR_EXTERNAL;
6357				}
6358			}
6359
6360			/*
6361			 * DNAME chaining.
6362			 */
6363			if (dnameset != NULL) {
6364				/*
6365				 * Copy the dname into the qname fixed name.
6366				 *
6367				 * Although we check for failure of the copy
6368				 * operation, in practice it should never fail
6369				 * since we already know that the  result fits
6370				 * in a fixedname.
6371				 */
6372				dns_fixedname_init(&fqname);
6373				qname = dns_fixedname_name(&fqname);
6374				result = dns_name_copy(dname, qname, NULL);
6375				if (result != ISC_R_SUCCESS)
6376					return (result);
6377				wanted_chaining = ISC_TRUE;
6378				name->attributes |= DNS_NAMEATTR_CHAINING;
6379				dnameset->attributes |=
6380					    DNS_RDATASETATTR_CHAINING;
6381			}
6382			if (wanted_chaining)
6383				chaining = ISC_TRUE;
6384		}
6385		result = dns_message_nextname(message, DNS_SECTION_ANSWER);
6386	}
6387	if (result == ISC_R_NOMORE)
6388		result = ISC_R_SUCCESS;
6389	if (result != ISC_R_SUCCESS)
6390		return (result);
6391
6392	/*
6393	 * We should have found an answer.
6394	 */
6395	if (!have_answer) {
6396		log_formerr(fctx, "reply has no answer");
6397		return (DNS_R_FORMERR);
6398	}
6399
6400	/*
6401	 * This response is now potentially cacheable.
6402	 */
6403	fctx->attributes |= FCTX_ATTR_WANTCACHE;
6404
6405	/*
6406	 * Did chaining end before we got the final answer?
6407	 */
6408	if (chaining) {
6409		/*
6410		 * Yes.  This may be a negative reply, so hand off
6411		 * authority section processing to the noanswer code.
6412		 * If it isn't a noanswer response, no harm will be
6413		 * done.
6414		 */
6415		return (noanswer_response(fctx, qname, 0));
6416	}
6417
6418	/*
6419	 * We didn't end with an incomplete chain, so the rcode should be
6420	 * "no error".
6421	 */
6422	if (message->rcode != dns_rcode_noerror) {
6423		log_formerr(fctx, "CNAME/DNAME chain complete, but RCODE "
6424				  "indicates error");
6425		return (DNS_R_FORMERR);
6426	}
6427
6428	/*
6429	 * Examine the authority section (if there is one).
6430	 *
6431	 * We expect there to be only one owner name for all the rdatasets
6432	 * in this section, and we expect that it is not external.
6433	 */
6434	done = ISC_FALSE;
6435	ns_name = NULL;
6436	ns_rdataset = NULL;
6437	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6438	while (!done && result == ISC_R_SUCCESS) {
6439		name = NULL;
6440		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
6441		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
6442		if (!external) {
6443			/*
6444			 * We expect to find NS or SIG NS rdatasets, and
6445			 * nothing else.
6446			 */
6447			for (rdataset = ISC_LIST_HEAD(name->list);
6448			     rdataset != NULL;
6449			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6450				if (rdataset->type == dns_rdatatype_ns ||
6451				    (rdataset->type == dns_rdatatype_rrsig &&
6452				     rdataset->covers == dns_rdatatype_ns)) {
6453					name->attributes |=
6454						DNS_NAMEATTR_CACHE;
6455					rdataset->attributes |=
6456						DNS_RDATASETATTR_CACHE;
6457					if (aa && !chaining)
6458						rdataset->trust =
6459						    dns_trust_authauthority;
6460					else
6461						rdataset->trust =
6462						    dns_trust_additional;
6463
6464					if (rdataset->type == dns_rdatatype_ns) {
6465						ns_name = name;
6466						ns_rdataset = rdataset;
6467					}
6468					/*
6469					 * Mark any additional data related
6470					 * to this rdataset.
6471					 */
6472					(void)dns_rdataset_additionaldata(
6473							rdataset,
6474							check_related,
6475							fctx);
6476					done = ISC_TRUE;
6477				}
6478			}
6479		}
6480		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
6481	}
6482	if (result == ISC_R_NOMORE)
6483		result = ISC_R_SUCCESS;
6484
6485	log_ns_ttl(fctx, "answer_response");
6486
6487	if (ns_rdataset != NULL && dns_name_equal(&fctx->domain, ns_name) &&
6488	    !dns_name_equal(ns_name, dns_rootname))
6489		trim_ns_ttl(fctx, ns_name, ns_rdataset);
6490
6491	return (result);
6492}
6493
6494static isc_boolean_t
6495fctx_decreference(fetchctx_t *fctx) {
6496	isc_boolean_t bucket_empty = ISC_FALSE;
6497
6498	INSIST(fctx->references > 0);
6499	fctx->references--;
6500	if (fctx->references == 0) {
6501		/*
6502		 * No one cares about the result of this fetch anymore.
6503		 */
6504		if (fctx->pending == 0 && fctx->nqueries == 0 &&
6505		    ISC_LIST_EMPTY(fctx->validators) && SHUTTINGDOWN(fctx)) {
6506			/*
6507			 * This fctx is already shutdown; we were just
6508			 * waiting for the last reference to go away.
6509			 */
6510			bucket_empty = fctx_unlink(fctx);
6511			fctx_destroy(fctx);
6512		} else {
6513			/*
6514			 * Initiate shutdown.
6515			 */
6516			fctx_shutdown(fctx);
6517		}
6518	}
6519	return (bucket_empty);
6520}
6521
6522static void
6523resume_dslookup(isc_task_t *task, isc_event_t *event) {
6524	dns_fetchevent_t *fevent;
6525	dns_resolver_t *res;
6526	fetchctx_t *fctx;
6527	isc_result_t result;
6528	isc_boolean_t bucket_empty;
6529	isc_boolean_t locked = ISC_FALSE;
6530	unsigned int bucketnum;
6531	dns_rdataset_t nameservers;
6532	dns_fixedname_t fixed;
6533	dns_name_t *domain;
6534
6535	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
6536	fevent = (dns_fetchevent_t *)event;
6537	fctx = event->ev_arg;
6538	REQUIRE(VALID_FCTX(fctx));
6539	res = fctx->res;
6540
6541	UNUSED(task);
6542	FCTXTRACE("resume_dslookup");
6543
6544	if (fevent->node != NULL)
6545		dns_db_detachnode(fevent->db, &fevent->node);
6546	if (fevent->db != NULL)
6547		dns_db_detach(&fevent->db);
6548
6549	dns_rdataset_init(&nameservers);
6550
6551	bucketnum = fctx->bucketnum;
6552	if (fevent->result == ISC_R_CANCELED) {
6553		dns_resolver_destroyfetch(&fctx->nsfetch);
6554		fctx_done(fctx, ISC_R_CANCELED, __LINE__);
6555	} else if (fevent->result == ISC_R_SUCCESS) {
6556
6557		FCTXTRACE("resuming DS lookup");
6558
6559		dns_resolver_destroyfetch(&fctx->nsfetch);
6560		if (dns_rdataset_isassociated(&fctx->nameservers))
6561			dns_rdataset_disassociate(&fctx->nameservers);
6562		dns_rdataset_clone(fevent->rdataset, &fctx->nameservers);
6563		fctx->ns_ttl = fctx->nameservers.ttl;
6564		fctx->ns_ttl_ok = ISC_TRUE;
6565		log_ns_ttl(fctx, "resume_dslookup");
6566		dns_name_free(&fctx->domain, fctx->mctx);
6567		dns_name_init(&fctx->domain, NULL);
6568		result = dns_name_dup(&fctx->nsname, fctx->mctx, &fctx->domain);
6569		if (result != ISC_R_SUCCESS) {
6570			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
6571			goto cleanup;
6572		}
6573		/*
6574		 * Try again.
6575		 */
6576		fctx_try(fctx, ISC_TRUE, ISC_FALSE);
6577	} else {
6578		unsigned int n;
6579		dns_rdataset_t *nsrdataset = NULL;
6580
6581		/*
6582		 * Retrieve state from fctx->nsfetch before we destroy it.
6583		 */
6584		dns_fixedname_init(&fixed);
6585		domain = dns_fixedname_name(&fixed);
6586		dns_name_copy(&fctx->nsfetch->private->domain, domain, NULL);
6587		if (dns_name_equal(&fctx->nsname, domain)) {
6588			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
6589			dns_resolver_destroyfetch(&fctx->nsfetch);
6590			goto cleanup;
6591		}
6592		if (dns_rdataset_isassociated(
6593		    &fctx->nsfetch->private->nameservers)) {
6594			dns_rdataset_clone(
6595			    &fctx->nsfetch->private->nameservers,
6596			    &nameservers);
6597			nsrdataset = &nameservers;
6598		} else
6599			domain = NULL;
6600		dns_resolver_destroyfetch(&fctx->nsfetch);
6601		n = dns_name_countlabels(&fctx->nsname);
6602		dns_name_getlabelsequence(&fctx->nsname, 1, n - 1,
6603					  &fctx->nsname);
6604
6605		if (dns_rdataset_isassociated(fevent->rdataset))
6606			dns_rdataset_disassociate(fevent->rdataset);
6607		FCTXTRACE("continuing to look for parent's NS records");
6608		result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
6609						  dns_rdatatype_ns, domain,
6610						  nsrdataset, NULL, 0, task,
6611						  resume_dslookup, fctx,
6612						  &fctx->nsrrset, NULL,
6613						  &fctx->nsfetch);
6614		if (result != ISC_R_SUCCESS)
6615			fctx_done(fctx, result, __LINE__);
6616		else {
6617			LOCK(&res->buckets[bucketnum].lock);
6618			locked = ISC_TRUE;
6619			fctx->references++;
6620		}
6621	}
6622
6623 cleanup:
6624	if (dns_rdataset_isassociated(&nameservers))
6625		dns_rdataset_disassociate(&nameservers);
6626	if (dns_rdataset_isassociated(fevent->rdataset))
6627		dns_rdataset_disassociate(fevent->rdataset);
6628	INSIST(fevent->sigrdataset == NULL);
6629	isc_event_free(&event);
6630	if (!locked)
6631		LOCK(&res->buckets[bucketnum].lock);
6632	bucket_empty = fctx_decreference(fctx);
6633	UNLOCK(&res->buckets[bucketnum].lock);
6634	if (bucket_empty)
6635		empty_bucket(res);
6636}
6637
6638static inline void
6639checknamessection(dns_message_t *message, dns_section_t section) {
6640	isc_result_t result;
6641	dns_name_t *name;
6642	dns_rdata_t rdata = DNS_RDATA_INIT;
6643	dns_rdataset_t *rdataset;
6644
6645	for (result = dns_message_firstname(message, section);
6646	     result == ISC_R_SUCCESS;
6647	     result = dns_message_nextname(message, section))
6648	{
6649		name = NULL;
6650		dns_message_currentname(message, section, &name);
6651		for (rdataset = ISC_LIST_HEAD(name->list);
6652		     rdataset != NULL;
6653		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6654			for (result = dns_rdataset_first(rdataset);
6655			     result == ISC_R_SUCCESS;
6656			     result = dns_rdataset_next(rdataset)) {
6657				dns_rdataset_current(rdataset, &rdata);
6658				if (!dns_rdata_checkowner(name, rdata.rdclass,
6659							  rdata.type,
6660							  ISC_FALSE) ||
6661				    !dns_rdata_checknames(&rdata, name, NULL))
6662				{
6663					rdataset->attributes |=
6664						DNS_RDATASETATTR_CHECKNAMES;
6665				}
6666				dns_rdata_reset(&rdata);
6667			}
6668		}
6669	}
6670}
6671
6672static void
6673checknames(dns_message_t *message) {
6674
6675	checknamessection(message, DNS_SECTION_ANSWER);
6676	checknamessection(message, DNS_SECTION_AUTHORITY);
6677	checknamessection(message, DNS_SECTION_ADDITIONAL);
6678}
6679
6680/*
6681 * Log server NSID at log level 'level'
6682 */
6683static void
6684log_nsid(isc_buffer_t *opt, size_t nsid_len, resquery_t *query,
6685	 int level, isc_mem_t *mctx)
6686{
6687	static const char hex[17] = "0123456789abcdef";
6688	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
6689	isc_uint16_t buflen, i;
6690	unsigned char *p, *buf, *nsid;
6691
6692	/* Allocate buffer for storing hex version of the NSID */
6693	buflen = (isc_uint16_t)nsid_len * 2 + 1;
6694	buf = isc_mem_get(mctx, buflen);
6695	if (buf == NULL)
6696		return;
6697
6698	/* Convert to hex */
6699	p = buf;
6700	nsid = isc_buffer_current(opt);
6701	for (i = 0; i < nsid_len; i++) {
6702		*p++ = hex[(nsid[0] >> 4) & 0xf];
6703		*p++ = hex[nsid[0] & 0xf];
6704		nsid++;
6705	}
6706	*p = '\0';
6707
6708	isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
6709			    sizeof(addrbuf));
6710	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6711		      DNS_LOGMODULE_RESOLVER, level,
6712		      "received NSID '%s' from %s", buf, addrbuf);
6713
6714	/* Clean up */
6715	isc_mem_put(mctx, buf, buflen);
6716	return;
6717}
6718
6719static void
6720log_packet(dns_message_t *message, int level, isc_mem_t *mctx) {
6721	isc_buffer_t buffer;
6722	char *buf = NULL;
6723	int len = 1024;
6724	isc_result_t result;
6725
6726	if (! isc_log_wouldlog(dns_lctx, level))
6727		return;
6728
6729	/*
6730	 * Note that these are multiline debug messages.  We want a newline
6731	 * to appear in the log after each message.
6732	 */
6733
6734	do {
6735		buf = isc_mem_get(mctx, len);
6736		if (buf == NULL)
6737			break;
6738		isc_buffer_init(&buffer, buf, len);
6739		result = dns_message_totext(message, &dns_master_style_debug,
6740					    0, &buffer);
6741		if (result == ISC_R_NOSPACE) {
6742			isc_mem_put(mctx, buf, len);
6743			len += 1024;
6744		} else if (result == ISC_R_SUCCESS)
6745			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6746				      DNS_LOGMODULE_RESOLVER, level,
6747				      "received packet:\n%.*s",
6748				      (int)isc_buffer_usedlength(&buffer),
6749				      buf);
6750	} while (result == ISC_R_NOSPACE);
6751
6752	if (buf != NULL)
6753		isc_mem_put(mctx, buf, len);
6754}
6755
6756static isc_boolean_t
6757iscname(fetchctx_t *fctx) {
6758	isc_result_t result;
6759
6760	result = dns_message_findname(fctx->rmessage, DNS_SECTION_ANSWER,
6761				      &fctx->name, dns_rdatatype_cname, 0,
6762				      NULL, NULL);
6763	return (result == ISC_R_SUCCESS ? ISC_TRUE : ISC_FALSE);
6764}
6765
6766static isc_boolean_t
6767betterreferral(fetchctx_t *fctx) {
6768	isc_result_t result;
6769	dns_name_t *name;
6770	dns_rdataset_t *rdataset;
6771	dns_message_t *message = fctx->rmessage;
6772
6773	for (result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6774	     result == ISC_R_SUCCESS;
6775	     result = dns_message_nextname(message, DNS_SECTION_AUTHORITY)) {
6776		name = NULL;
6777		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
6778		if (!isstrictsubdomain(name, &fctx->domain))
6779			continue;
6780		for (rdataset = ISC_LIST_HEAD(name->list);
6781		     rdataset != NULL;
6782		     rdataset = ISC_LIST_NEXT(rdataset, link))
6783			if (rdataset->type == dns_rdatatype_ns)
6784				return (ISC_TRUE);
6785	}
6786	return (ISC_FALSE);
6787}
6788
6789static void
6790process_opt(resquery_t *query, dns_rdataset_t *opt) {
6791	dns_rdata_t rdata;
6792	isc_buffer_t optbuf;
6793	isc_result_t result;
6794	isc_uint16_t optcode;
6795	isc_uint16_t optlen;
6796
6797	result = dns_rdataset_first(opt);
6798	if (result == ISC_R_SUCCESS) {
6799		dns_rdata_init(&rdata);
6800		dns_rdataset_current(opt, &rdata);
6801		isc_buffer_init(&optbuf, rdata.data, rdata.length);
6802		isc_buffer_add(&optbuf, rdata.length);
6803		while (isc_buffer_remaininglength(&optbuf) >= 4) {
6804			optcode = isc_buffer_getuint16(&optbuf);
6805			optlen = isc_buffer_getuint16(&optbuf);
6806			INSIST(optlen <= isc_buffer_remaininglength(&optbuf));
6807			switch (optcode) {
6808			case DNS_OPT_NSID:
6809				if (query->options & DNS_FETCHOPT_WANTNSID)
6810					log_nsid(&optbuf, optlen, query,
6811						 ISC_LOG_INFO,
6812						 query->fctx->res->mctx);
6813				isc_buffer_forward(&optbuf, optlen);
6814				break;
6815			default:
6816				isc_buffer_forward(&optbuf, optlen);
6817				break;
6818			}
6819		}
6820		INSIST(isc_buffer_remaininglength(&optbuf) == 0U);
6821	}
6822}
6823
6824static void
6825resquery_response(isc_task_t *task, isc_event_t *event) {
6826	isc_result_t result = ISC_R_SUCCESS;
6827	resquery_t *query = event->ev_arg;
6828	dns_dispatchevent_t *devent = (dns_dispatchevent_t *)event;
6829	isc_boolean_t keep_trying, get_nameservers, resend;
6830	isc_boolean_t truncated;
6831	dns_message_t *message;
6832	dns_rdataset_t *opt;
6833	fetchctx_t *fctx;
6834	dns_name_t *fname;
6835	dns_fixedname_t foundname;
6836	isc_stdtime_t now;
6837	isc_time_t tnow, *finish;
6838	dns_adbaddrinfo_t *addrinfo;
6839	unsigned int options;
6840	unsigned int findoptions;
6841	isc_result_t broken_server;
6842	badnstype_t broken_type = badns_response;
6843	isc_boolean_t no_response;
6844
6845	REQUIRE(VALID_QUERY(query));
6846	fctx = query->fctx;
6847	options = query->options;
6848	REQUIRE(VALID_FCTX(fctx));
6849	REQUIRE(event->ev_type == DNS_EVENT_DISPATCH);
6850
6851	QTRACE("response");
6852
6853	if (isc_sockaddr_pf(&query->addrinfo->sockaddr) == PF_INET)
6854		inc_stats(fctx->res, dns_resstatscounter_responsev4);
6855	else
6856		inc_stats(fctx->res, dns_resstatscounter_responsev6);
6857
6858	(void)isc_timer_touch(fctx->timer);
6859
6860	keep_trying = ISC_FALSE;
6861	broken_server = ISC_R_SUCCESS;
6862	get_nameservers = ISC_FALSE;
6863	resend = ISC_FALSE;
6864	truncated = ISC_FALSE;
6865	finish = NULL;
6866	no_response = ISC_FALSE;
6867
6868	if (fctx->res->exiting) {
6869		result = ISC_R_SHUTTINGDOWN;
6870		goto done;
6871	}
6872
6873	fctx->timeouts = 0;
6874	fctx->timeout = ISC_FALSE;
6875	fctx->addrinfo = query->addrinfo;
6876
6877	/*
6878	 * XXXRTH  We should really get the current time just once.  We
6879	 *		need a routine to convert from an isc_time_t to an
6880	 *		isc_stdtime_t.
6881	 */
6882	TIME_NOW(&tnow);
6883	finish = &tnow;
6884	isc_stdtime_get(&now);
6885
6886	/*
6887	 * Did the dispatcher have a problem?
6888	 */
6889	if (devent->result != ISC_R_SUCCESS) {
6890		if (devent->result == ISC_R_EOF &&
6891		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
6892			/*
6893			 * The problem might be that they
6894			 * don't understand EDNS0.  Turn it
6895			 * off and try again.
6896			 */
6897			options |= DNS_FETCHOPT_NOEDNS0;
6898			resend = ISC_TRUE;
6899			add_bad_edns(fctx, &query->addrinfo->sockaddr);
6900		} else {
6901			/*
6902			 * There's no hope for this query.
6903			 */
6904			keep_trying = ISC_TRUE;
6905
6906			/*
6907			 * If this is a network error on an exclusive query
6908			 * socket, mark the server as bad so that we won't try
6909			 * it for this fetch again.  Also adjust finish and
6910			 * no_response so that we penalize this address in SRTT
6911			 * adjustment later.
6912			 */
6913			if (query->exclusivesocket &&
6914			    (devent->result == ISC_R_HOSTUNREACH ||
6915			     devent->result == ISC_R_NETUNREACH ||
6916			     devent->result == ISC_R_CONNREFUSED ||
6917			     devent->result == ISC_R_CANCELED)) {
6918				    broken_server = devent->result;
6919				    broken_type = badns_unreachable;
6920				    finish = NULL;
6921				    no_response = ISC_TRUE;
6922			}
6923		}
6924		goto done;
6925	}
6926
6927	message = fctx->rmessage;
6928
6929	if (query->tsig != NULL) {
6930		result = dns_message_setquerytsig(message, query->tsig);
6931		if (result != ISC_R_SUCCESS)
6932			goto done;
6933	}
6934
6935	if (query->tsigkey) {
6936		result = dns_message_settsigkey(message, query->tsigkey);
6937		if (result != ISC_R_SUCCESS)
6938			goto done;
6939	}
6940
6941	dns_message_setclass(message, fctx->res->rdclass);
6942
6943	result = dns_message_parse(message, &devent->buffer, 0);
6944	if (result != ISC_R_SUCCESS) {
6945		switch (result) {
6946		case ISC_R_UNEXPECTEDEND:
6947			if (!message->question_ok ||
6948			    (message->flags & DNS_MESSAGEFLAG_TC) == 0 ||
6949			    (options & DNS_FETCHOPT_TCP) != 0) {
6950				/*
6951				 * Either the message ended prematurely,
6952				 * and/or wasn't marked as being truncated,
6953				 * and/or this is a response to a query we
6954				 * sent over TCP.  In all of these cases,
6955				 * something is wrong with the remote
6956				 * server and we don't want to retry using
6957				 * TCP.
6958				 */
6959				if ((query->options & DNS_FETCHOPT_NOEDNS0)
6960				    == 0) {
6961					/*
6962					 * The problem might be that they
6963					 * don't understand EDNS0.  Turn it
6964					 * off and try again.
6965					 */
6966					options |= DNS_FETCHOPT_NOEDNS0;
6967					resend = ISC_TRUE;
6968					add_bad_edns(fctx,
6969						    &query->addrinfo->sockaddr);
6970					inc_stats(fctx->res,
6971						 dns_resstatscounter_edns0fail);
6972				} else {
6973					broken_server = result;
6974					keep_trying = ISC_TRUE;
6975				}
6976				goto done;
6977			}
6978			/*
6979			 * We defer retrying via TCP for a bit so we can
6980			 * check out this message further.
6981			 */
6982			truncated = ISC_TRUE;
6983			break;
6984		case DNS_R_FORMERR:
6985			if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
6986				/*
6987				 * The problem might be that they
6988				 * don't understand EDNS0.  Turn it
6989				 * off and try again.
6990				 */
6991				options |= DNS_FETCHOPT_NOEDNS0;
6992				resend = ISC_TRUE;
6993				add_bad_edns(fctx, &query->addrinfo->sockaddr);
6994				inc_stats(fctx->res,
6995						 dns_resstatscounter_edns0fail);
6996			} else {
6997				broken_server = DNS_R_UNEXPECTEDRCODE;
6998				keep_trying = ISC_TRUE;
6999			}
7000			goto done;
7001		default:
7002			/*
7003			 * Something bad has happened.
7004			 */
7005			goto done;
7006		}
7007	}
7008
7009
7010	/*
7011	 * Log the incoming packet.
7012	 */
7013	log_packet(message, ISC_LOG_DEBUG(10), fctx->res->mctx);
7014
7015	if (message->rdclass != fctx->res->rdclass) {
7016		resend = ISC_TRUE;
7017		FCTXTRACE("bad class");
7018		goto done;
7019	}
7020
7021	/*
7022	 * Process receive opt record.
7023	 */
7024	opt = dns_message_getopt(message);
7025	if (opt != NULL)
7026		process_opt(query, opt);
7027
7028	/*
7029	 * If the message is signed, check the signature.  If not, this
7030	 * returns success anyway.
7031	 */
7032	result = dns_message_checksig(message, fctx->res->view);
7033	if (result != ISC_R_SUCCESS)
7034		goto done;
7035
7036	/*
7037	 * The dispatcher should ensure we only get responses with QR set.
7038	 */
7039	INSIST((message->flags & DNS_MESSAGEFLAG_QR) != 0);
7040	/*
7041	 * INSIST() that the message comes from the place we sent it to,
7042	 * since the dispatch code should ensure this.
7043	 *
7044	 * INSIST() that the message id is correct (this should also be
7045	 * ensured by the dispatch code).
7046	 */
7047
7048	/*
7049	 * We have an affirmative response to the query and we have
7050	 * previously got a response from this server which indicated
7051	 * EDNS may not be supported so we can now cache the lack of
7052	 * EDNS support.
7053	 */
7054	if (opt == NULL &&
7055	    (message->rcode == dns_rcode_noerror ||
7056	     message->rcode == dns_rcode_nxdomain ||
7057	     message->rcode == dns_rcode_refused ||
7058	     message->rcode == dns_rcode_yxdomain) &&
7059	     bad_edns(fctx, &query->addrinfo->sockaddr)) {
7060		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
7061		isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
7062				    sizeof(addrbuf));
7063		dns_adb_changeflags(fctx->adb, query->addrinfo,
7064				    DNS_FETCHOPT_NOEDNS0,
7065				    DNS_FETCHOPT_NOEDNS0);
7066	}
7067
7068	/*
7069	 * Deal with truncated responses by retrying using TCP.
7070	 */
7071	if ((message->flags & DNS_MESSAGEFLAG_TC) != 0)
7072		truncated = ISC_TRUE;
7073
7074	if (truncated) {
7075		inc_stats(fctx->res, dns_resstatscounter_truncated);
7076		if ((options & DNS_FETCHOPT_TCP) != 0) {
7077			broken_server = DNS_R_TRUNCATEDTCP;
7078			keep_trying = ISC_TRUE;
7079		} else {
7080			options |= DNS_FETCHOPT_TCP;
7081			resend = ISC_TRUE;
7082		}
7083		goto done;
7084	}
7085
7086	/*
7087	 * Is it a query response?
7088	 */
7089	if (message->opcode != dns_opcode_query) {
7090		/* XXXRTH Log */
7091		broken_server = DNS_R_UNEXPECTEDOPCODE;
7092		keep_trying = ISC_TRUE;
7093		goto done;
7094	}
7095
7096	/*
7097	 * Update statistics about erroneous responses.
7098	 */
7099	if (message->rcode != dns_rcode_noerror) {
7100		switch (message->rcode) {
7101		case dns_rcode_nxdomain:
7102			inc_stats(fctx->res, dns_resstatscounter_nxdomain);
7103			break;
7104		case dns_rcode_servfail:
7105			inc_stats(fctx->res, dns_resstatscounter_servfail);
7106			break;
7107		case dns_rcode_formerr:
7108			inc_stats(fctx->res, dns_resstatscounter_formerr);
7109			break;
7110		default:
7111			inc_stats(fctx->res, dns_resstatscounter_othererror);
7112			break;
7113		}
7114	}
7115
7116	/*
7117	 * Is the remote server broken, or does it dislike us?
7118	 */
7119	if (message->rcode != dns_rcode_noerror &&
7120	    message->rcode != dns_rcode_nxdomain) {
7121		if (((message->rcode == dns_rcode_formerr ||
7122		      message->rcode == dns_rcode_notimp) ||
7123		     (message->rcode == dns_rcode_servfail &&
7124		      dns_message_getopt(message) == NULL)) &&
7125		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
7126			/*
7127			 * It's very likely they don't like EDNS0.
7128			 * If the response code is SERVFAIL, also check if the
7129			 * response contains an OPT RR and don't cache the
7130			 * failure since it can be returned for various other
7131			 * reasons.
7132			 *
7133			 * XXXRTH  We should check if the question
7134			 *		we're asking requires EDNS0, and
7135			 *		if so, we should bail out.
7136			 */
7137			options |= DNS_FETCHOPT_NOEDNS0;
7138			resend = ISC_TRUE;
7139			/*
7140			 * Remember that they may not like EDNS0.
7141			 */
7142			add_bad_edns(fctx, &query->addrinfo->sockaddr);
7143			inc_stats(fctx->res, dns_resstatscounter_edns0fail);
7144		} else if (message->rcode == dns_rcode_formerr) {
7145			if (ISFORWARDER(query->addrinfo)) {
7146				/*
7147				 * This forwarder doesn't understand us,
7148				 * but other forwarders might.  Keep trying.
7149				 */
7150				broken_server = DNS_R_REMOTEFORMERR;
7151				keep_trying = ISC_TRUE;
7152			} else {
7153				/*
7154				 * The server doesn't understand us.  Since
7155				 * all servers for a zone need similar
7156				 * capabilities, we assume that we will get
7157				 * FORMERR from all servers, and thus we
7158				 * cannot make any more progress with this
7159				 * fetch.
7160				 */
7161				log_formerr(fctx, "server sent FORMERR");
7162				result = DNS_R_FORMERR;
7163			}
7164		} else if (message->rcode == dns_rcode_yxdomain) {
7165			/*
7166			 * DNAME mapping failed because the new name
7167			 * was too long.  There's no chance of success
7168			 * for this fetch.
7169			 */
7170			result = DNS_R_YXDOMAIN;
7171		} else if (message->rcode == dns_rcode_badvers) {
7172			unsigned int flags, mask;
7173			unsigned int version;
7174
7175			resend = ISC_TRUE;
7176			INSIST(opt != NULL);
7177			version = (opt->ttl >> 16) & 0xff;
7178			flags = (version << DNS_FETCHOPT_EDNSVERSIONSHIFT) |
7179				DNS_FETCHOPT_EDNSVERSIONSET;
7180			mask = DNS_FETCHOPT_EDNSVERSIONMASK |
7181			       DNS_FETCHOPT_EDNSVERSIONSET;
7182			switch (version) {
7183			case 0:
7184				dns_adb_changeflags(fctx->adb, query->addrinfo,
7185						    flags, mask);
7186				break;
7187			default:
7188				broken_server = DNS_R_BADVERS;
7189				keep_trying = ISC_TRUE;
7190				break;
7191			}
7192		} else {
7193			/*
7194			 * XXXRTH log.
7195			 */
7196			broken_server = DNS_R_UNEXPECTEDRCODE;
7197			INSIST(broken_server != ISC_R_SUCCESS);
7198			keep_trying = ISC_TRUE;
7199		}
7200		goto done;
7201	}
7202
7203	/*
7204	 * Is the question the same as the one we asked?
7205	 */
7206	result = same_question(fctx);
7207	if (result != ISC_R_SUCCESS) {
7208		/* XXXRTH Log */
7209		if (result == DNS_R_FORMERR)
7210			keep_trying = ISC_TRUE;
7211		goto done;
7212	}
7213
7214	/*
7215	 * Is the server lame?
7216	 */
7217	if (fctx->res->lame_ttl != 0 && !ISFORWARDER(query->addrinfo) &&
7218	    is_lame(fctx)) {
7219		inc_stats(fctx->res, dns_resstatscounter_lame);
7220		log_lame(fctx, query->addrinfo);
7221		result = dns_adb_marklame(fctx->adb, query->addrinfo,
7222					  &fctx->name, fctx->type,
7223					  now + fctx->res->lame_ttl);
7224		if (result != ISC_R_SUCCESS)
7225			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7226				      DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR,
7227				      "could not mark server as lame: %s",
7228				      isc_result_totext(result));
7229		broken_server = DNS_R_LAME;
7230		keep_trying = ISC_TRUE;
7231		goto done;
7232	}
7233
7234	/*
7235	 * Enforce delegations only zones like NET and COM.
7236	 */
7237	if (!ISFORWARDER(query->addrinfo) &&
7238	    dns_view_isdelegationonly(fctx->res->view, &fctx->domain) &&
7239	    !dns_name_equal(&fctx->domain, &fctx->name) &&
7240	    fix_mustbedelegationornxdomain(message, fctx)) {
7241		char namebuf[DNS_NAME_FORMATSIZE];
7242		char domainbuf[DNS_NAME_FORMATSIZE];
7243		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
7244		char classbuf[64];
7245		char typebuf[64];
7246
7247		dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
7248		dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
7249		dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
7250		dns_rdataclass_format(fctx->res->rdclass, classbuf,
7251				      sizeof(classbuf));
7252		isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
7253				    sizeof(addrbuf));
7254
7255		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DELEGATION_ONLY,
7256			     DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7257			     "enforced delegation-only for '%s' (%s/%s/%s) "
7258			     "from %s",
7259			     domainbuf, namebuf, typebuf, classbuf, addrbuf);
7260	}
7261
7262	if ((fctx->res->options & DNS_RESOLVER_CHECKNAMES) != 0)
7263		checknames(message);
7264
7265	/*
7266	 * Clear cache bits.
7267	 */
7268	fctx->attributes &= ~(FCTX_ATTR_WANTNCACHE | FCTX_ATTR_WANTCACHE);
7269
7270	/*
7271	 * Did we get any answers?
7272	 */
7273	if (message->counts[DNS_SECTION_ANSWER] > 0 &&
7274	    (message->rcode == dns_rcode_noerror ||
7275	     message->rcode == dns_rcode_nxdomain)) {
7276		/*
7277		 * [normal case]
7278		 * We've got answers.  If it has an authoritative answer or an
7279		 * answer from a forwarder, we're done.
7280		 */
7281		if ((message->flags & DNS_MESSAGEFLAG_AA) != 0 ||
7282		    ISFORWARDER(query->addrinfo))
7283			result = answer_response(fctx);
7284		else if (iscname(fctx) &&
7285			 fctx->type != dns_rdatatype_any &&
7286			 fctx->type != dns_rdatatype_cname) {
7287			/*
7288			 * A BIND8 server could return a non-authoritative
7289			 * answer when a CNAME is followed.  We should treat
7290			 * it as a valid answer.
7291			 */
7292			result = answer_response(fctx);
7293		} else if (fctx->type != dns_rdatatype_ns &&
7294			   !betterreferral(fctx)) {
7295			/*
7296			 * Lame response !!!.
7297			 */
7298			result = answer_response(fctx);
7299		} else {
7300			if (fctx->type == dns_rdatatype_ns) {
7301				/*
7302				 * A BIND 8 server could incorrectly return a
7303				 * non-authoritative answer to an NS query
7304				 * instead of a referral. Since this answer
7305				 * lacks the SIGs necessary to do DNSSEC
7306				 * validation, we must invoke the following
7307				 * special kludge to treat it as a referral.
7308				 */
7309				result = noanswer_response(fctx, NULL,
7310						   LOOK_FOR_NS_IN_ANSWER);
7311			} else {
7312				/*
7313				 * Some other servers may still somehow include
7314				 * an answer when it should return a referral
7315				 * with an empty answer.  Check to see if we can
7316				 * treat this as a referral by ignoring the
7317				 * answer.  Further more, there may be an
7318				 * implementation that moves A/AAAA glue records
7319				 * to the answer section for that type of
7320				 * delegation when the query is for that glue
7321				 * record.  LOOK_FOR_GLUE_IN_ANSWER will handle
7322				 * such a corner case.
7323				 */
7324				result = noanswer_response(fctx, NULL,
7325						   LOOK_FOR_GLUE_IN_ANSWER);
7326			}
7327			if (result != DNS_R_DELEGATION) {
7328				/*
7329				 * At this point, AA is not set, the response
7330				 * is not a referral, and the server is not a
7331				 * forwarder.  It is technically lame and it's
7332				 * easier to treat it as such than to figure out
7333				 * some more elaborate course of action.
7334				 */
7335				broken_server = DNS_R_LAME;
7336				keep_trying = ISC_TRUE;
7337				goto done;
7338			}
7339			goto force_referral;
7340		}
7341		if (result != ISC_R_SUCCESS) {
7342			if (result == DNS_R_FORMERR)
7343				keep_trying = ISC_TRUE;
7344			goto done;
7345		}
7346	} else if (message->counts[DNS_SECTION_AUTHORITY] > 0 ||
7347		   message->rcode == dns_rcode_noerror ||
7348		   message->rcode == dns_rcode_nxdomain) {
7349		/*
7350		 * NXDOMAIN, NXRDATASET, or referral.
7351		 */
7352		result = noanswer_response(fctx, NULL, 0);
7353		switch (result) {
7354		case ISC_R_SUCCESS:
7355		case DNS_R_CHASEDSSERVERS:
7356			break;
7357		case DNS_R_DELEGATION:
7358 force_referral:
7359			/*
7360			 * We don't have the answer, but we know a better
7361			 * place to look.
7362			 */
7363			get_nameservers = ISC_TRUE;
7364			keep_trying = ISC_TRUE;
7365			/*
7366			 * We have a new set of name servers, and it
7367			 * has not experienced any restarts yet.
7368			 */
7369			fctx->restarts = 0;
7370
7371			/*
7372			 * Update local statistics counters collected for each
7373			 * new zone.
7374			 */
7375			fctx->referrals++;
7376			fctx->querysent = 0;
7377			fctx->lamecount = 0;
7378			fctx->neterr = 0;
7379			fctx->badresp = 0;
7380			fctx->adberr = 0;
7381
7382			result = ISC_R_SUCCESS;
7383			break;
7384		default:
7385			/*
7386			 * Something has gone wrong.
7387			 */
7388			if (result == DNS_R_FORMERR)
7389				keep_trying = ISC_TRUE;
7390			goto done;
7391		}
7392	} else {
7393		/*
7394		 * The server is insane.
7395		 */
7396		/* XXXRTH Log */
7397		broken_server = DNS_R_UNEXPECTEDRCODE;
7398		keep_trying = ISC_TRUE;
7399		goto done;
7400	}
7401
7402	/*
7403	 * Follow additional section data chains.
7404	 */
7405	chase_additional(fctx);
7406
7407	/*
7408	 * Cache the cacheable parts of the message.  This may also cause
7409	 * work to be queued to the DNSSEC validator.
7410	 */
7411	if (WANTCACHE(fctx)) {
7412		result = cache_message(fctx, query->addrinfo, now);
7413		if (result != ISC_R_SUCCESS)
7414			goto done;
7415	}
7416
7417	/*
7418	 * Ncache the negatively cacheable parts of the message.  This may
7419	 * also cause work to be queued to the DNSSEC validator.
7420	 */
7421	if (WANTNCACHE(fctx)) {
7422		dns_rdatatype_t covers;
7423		if (message->rcode == dns_rcode_nxdomain)
7424			covers = dns_rdatatype_any;
7425		else
7426			covers = fctx->type;
7427
7428		/*
7429		 * Cache any negative cache entries in the message.
7430		 */
7431		result = ncache_message(fctx, query->addrinfo, covers, now);
7432	}
7433
7434 done:
7435	/*
7436	 * Remember the query's addrinfo, in case we need to mark the
7437	 * server as broken.
7438	 */
7439	addrinfo = query->addrinfo;
7440
7441	/*
7442	 * Cancel the query.
7443	 *
7444	 * XXXRTH  Don't cancel the query if waiting for validation?
7445	 */
7446	fctx_cancelquery(&query, &devent, finish, no_response);
7447
7448	if (keep_trying) {
7449		if (result == DNS_R_FORMERR)
7450			broken_server = DNS_R_FORMERR;
7451		if (broken_server != ISC_R_SUCCESS) {
7452			/*
7453			 * Add this server to the list of bad servers for
7454			 * this fctx.
7455			 */
7456			add_bad(fctx, addrinfo, broken_server, broken_type);
7457		}
7458
7459		if (get_nameservers) {
7460			dns_name_t *name;
7461			dns_fixedname_init(&foundname);
7462			fname = dns_fixedname_name(&foundname);
7463			if (result != ISC_R_SUCCESS) {
7464				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7465				return;
7466			}
7467			findoptions = 0;
7468			if (dns_rdatatype_atparent(fctx->type))
7469				findoptions |= DNS_DBFIND_NOEXACT;
7470			if ((options & DNS_FETCHOPT_UNSHARED) == 0)
7471				name = &fctx->name;
7472			else
7473				name = &fctx->domain;
7474			result = dns_view_findzonecut(fctx->res->view,
7475						      name, fname,
7476						      now, findoptions,
7477						      ISC_TRUE,
7478						      &fctx->nameservers,
7479						      NULL);
7480			if (result != ISC_R_SUCCESS) {
7481				FCTXTRACE("couldn't find a zonecut");
7482				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7483				return;
7484			}
7485			if (!dns_name_issubdomain(fname, &fctx->domain)) {
7486				/*
7487				 * The best nameservers are now above our
7488				 * QDOMAIN.
7489				 */
7490				FCTXTRACE("nameservers now above QDOMAIN");
7491				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7492				return;
7493			}
7494			dns_name_free(&fctx->domain, fctx->mctx);
7495			dns_name_init(&fctx->domain, NULL);
7496			result = dns_name_dup(fname, fctx->mctx, &fctx->domain);
7497			if (result != ISC_R_SUCCESS) {
7498				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7499				return;
7500			}
7501			fctx->ns_ttl = fctx->nameservers.ttl;
7502			fctx->ns_ttl_ok = ISC_TRUE;
7503			fctx_cancelqueries(fctx, ISC_TRUE);
7504			fctx_cleanupfinds(fctx);
7505			fctx_cleanupaltfinds(fctx);
7506			fctx_cleanupforwaddrs(fctx);
7507			fctx_cleanupaltaddrs(fctx);
7508		}
7509		/*
7510		 * Try again.
7511		 */
7512		fctx_try(fctx, !get_nameservers, ISC_FALSE);
7513	} else if (resend) {
7514		/*
7515		 * Resend (probably with changed options).
7516		 */
7517		FCTXTRACE("resend");
7518		inc_stats(fctx->res, dns_resstatscounter_retry);
7519		result = fctx_query(fctx, addrinfo, options);
7520		if (result != ISC_R_SUCCESS)
7521			fctx_done(fctx, result, __LINE__);
7522	} else if (result == ISC_R_SUCCESS && !HAVE_ANSWER(fctx)) {
7523		/*
7524		 * All has gone well so far, but we are waiting for the
7525		 * DNSSEC validator to validate the answer.
7526		 */
7527		FCTXTRACE("wait for validator");
7528		fctx_cancelqueries(fctx, ISC_TRUE);
7529		/*
7530		 * We must not retransmit while the validator is working;
7531		 * it has references to the current rmessage.
7532		 */
7533		result = fctx_stopidletimer(fctx);
7534		if (result != ISC_R_SUCCESS)
7535			fctx_done(fctx, result, __LINE__);
7536	} else if (result == DNS_R_CHASEDSSERVERS) {
7537		unsigned int n;
7538		add_bad(fctx, addrinfo, result, broken_type);
7539		fctx_cancelqueries(fctx, ISC_TRUE);
7540		fctx_cleanupfinds(fctx);
7541		fctx_cleanupforwaddrs(fctx);
7542
7543		n = dns_name_countlabels(&fctx->name);
7544		dns_name_getlabelsequence(&fctx->name, 1, n - 1, &fctx->nsname);
7545
7546		FCTXTRACE("suspending DS lookup to find parent's NS records");
7547
7548		result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
7549						  dns_rdatatype_ns,
7550						  NULL, NULL, NULL, 0, task,
7551						  resume_dslookup, fctx,
7552						  &fctx->nsrrset, NULL,
7553						  &fctx->nsfetch);
7554		if (result != ISC_R_SUCCESS)
7555			fctx_done(fctx, result, __LINE__);
7556		else {
7557			LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
7558			fctx->references++;
7559			UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
7560			result = fctx_stopidletimer(fctx);
7561			if (result != ISC_R_SUCCESS)
7562				fctx_done(fctx, result, __LINE__);
7563		}
7564	} else {
7565		/*
7566		 * We're done.
7567		 */
7568		fctx_done(fctx, result, __LINE__);
7569	}
7570}
7571
7572
7573/***
7574 *** Resolver Methods
7575 ***/
7576static void
7577destroy_badcache(dns_resolver_t *res) {
7578	dns_badcache_t *bad, *next;
7579	unsigned int i;
7580
7581	if (res->badcache != NULL) {
7582		for (i = 0; i < res->badhash; i++)
7583			for (bad = res->badcache[i]; bad != NULL;
7584			     bad = next) {
7585				next = bad->next;
7586				isc_mem_put(res->mctx, bad, sizeof(*bad) +
7587					    bad->name.length);
7588				res->badcount--;
7589			}
7590		isc_mem_put(res->mctx, res->badcache,
7591			    sizeof(*res->badcache) * res->badhash);
7592		res->badcache = NULL;
7593		res->badhash = 0;
7594		INSIST(res->badcount == 0);
7595	}
7596}
7597
7598static void
7599destroy(dns_resolver_t *res) {
7600	unsigned int i;
7601	alternate_t *a;
7602
7603	REQUIRE(res->references == 0);
7604	REQUIRE(!res->priming);
7605	REQUIRE(res->primefetch == NULL);
7606
7607	RTRACE("destroy");
7608
7609	INSIST(res->nfctx == 0);
7610
7611	DESTROYLOCK(&res->primelock);
7612	DESTROYLOCK(&res->nlock);
7613	DESTROYLOCK(&res->lock);
7614	for (i = 0; i < res->nbuckets; i++) {
7615		INSIST(ISC_LIST_EMPTY(res->buckets[i].fctxs));
7616		isc_task_shutdown(res->buckets[i].task);
7617		isc_task_detach(&res->buckets[i].task);
7618		DESTROYLOCK(&res->buckets[i].lock);
7619		isc_mem_detach(&res->buckets[i].mctx);
7620	}
7621	isc_mem_put(res->mctx, res->buckets,
7622		    res->nbuckets * sizeof(fctxbucket_t));
7623	if (res->dispatches4 != NULL)
7624		dns_dispatchset_destroy(&res->dispatches4);
7625	if (res->dispatches6 != NULL)
7626		dns_dispatchset_destroy(&res->dispatches6);
7627	while ((a = ISC_LIST_HEAD(res->alternates)) != NULL) {
7628		ISC_LIST_UNLINK(res->alternates, a, link);
7629		if (!a->isaddress)
7630			dns_name_free(&a->_u._n.name, res->mctx);
7631		isc_mem_put(res->mctx, a, sizeof(*a));
7632	}
7633	dns_resolver_reset_algorithms(res);
7634	destroy_badcache(res);
7635	dns_resolver_resetmustbesecure(res);
7636#if USE_ALGLOCK
7637	isc_rwlock_destroy(&res->alglock);
7638#endif
7639#if USE_MBSLOCK
7640	isc_rwlock_destroy(&res->mbslock);
7641#endif
7642	isc_timer_detach(&res->spillattimer);
7643	res->magic = 0;
7644	isc_mem_put(res->mctx, res, sizeof(*res));
7645}
7646
7647static void
7648send_shutdown_events(dns_resolver_t *res) {
7649	isc_event_t *event, *next_event;
7650	isc_task_t *etask;
7651
7652	/*
7653	 * Caller must be holding the resolver lock.
7654	 */
7655
7656	for (event = ISC_LIST_HEAD(res->whenshutdown);
7657	     event != NULL;
7658	     event = next_event) {
7659		next_event = ISC_LIST_NEXT(event, ev_link);
7660		ISC_LIST_UNLINK(res->whenshutdown, event, ev_link);
7661		etask = event->ev_sender;
7662		event->ev_sender = res;
7663		isc_task_sendanddetach(&etask, &event);
7664	}
7665}
7666
7667static void
7668empty_bucket(dns_resolver_t *res) {
7669	RTRACE("empty_bucket");
7670
7671	LOCK(&res->lock);
7672
7673	INSIST(res->activebuckets > 0);
7674	res->activebuckets--;
7675	if (res->activebuckets == 0)
7676		send_shutdown_events(res);
7677
7678	UNLOCK(&res->lock);
7679}
7680
7681static void
7682spillattimer_countdown(isc_task_t *task, isc_event_t *event) {
7683	dns_resolver_t *res = event->ev_arg;
7684	isc_result_t result;
7685	unsigned int count;
7686	isc_boolean_t logit = ISC_FALSE;
7687
7688	REQUIRE(VALID_RESOLVER(res));
7689
7690	UNUSED(task);
7691
7692	LOCK(&res->lock);
7693	INSIST(!res->exiting);
7694	if (res->spillat > res->spillatmin) {
7695		res->spillat--;
7696		logit = ISC_TRUE;
7697	}
7698	if (res->spillat <= res->spillatmin) {
7699		result = isc_timer_reset(res->spillattimer,
7700					 isc_timertype_inactive, NULL,
7701					 NULL, ISC_TRUE);
7702		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7703	}
7704	count = res->spillat;
7705	UNLOCK(&res->lock);
7706	if (logit)
7707		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7708			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7709			      "clients-per-query decreased to %u", count);
7710
7711	isc_event_free(&event);
7712}
7713
7714isc_result_t
7715dns_resolver_create(dns_view_t *view,
7716		    isc_taskmgr_t *taskmgr,
7717		    unsigned int ntasks, unsigned int ndisp,
7718		    isc_socketmgr_t *socketmgr,
7719		    isc_timermgr_t *timermgr,
7720		    unsigned int options,
7721		    dns_dispatchmgr_t *dispatchmgr,
7722		    dns_dispatch_t *dispatchv4,
7723		    dns_dispatch_t *dispatchv6,
7724		    dns_resolver_t **resp)
7725{
7726	dns_resolver_t *res;
7727	isc_result_t result = ISC_R_SUCCESS;
7728	unsigned int i, buckets_created = 0;
7729	isc_task_t *task = NULL;
7730	char name[16];
7731	unsigned dispattr;
7732
7733	/*
7734	 * Create a resolver.
7735	 */
7736
7737	REQUIRE(DNS_VIEW_VALID(view));
7738	REQUIRE(ntasks > 0);
7739	REQUIRE(ndisp > 0);
7740	REQUIRE(resp != NULL && *resp == NULL);
7741	REQUIRE(dispatchmgr != NULL);
7742	REQUIRE(dispatchv4 != NULL || dispatchv6 != NULL);
7743
7744	res = isc_mem_get(view->mctx, sizeof(*res));
7745	if (res == NULL)
7746		return (ISC_R_NOMEMORY);
7747	RTRACE("create");
7748	res->mctx = view->mctx;
7749	res->rdclass = view->rdclass;
7750	res->socketmgr = socketmgr;
7751	res->timermgr = timermgr;
7752	res->taskmgr = taskmgr;
7753	res->dispatchmgr = dispatchmgr;
7754	res->view = view;
7755	res->options = options;
7756	res->lame_ttl = 0;
7757	ISC_LIST_INIT(res->alternates);
7758	res->udpsize = RECV_BUFFER_SIZE;
7759	res->algorithms = NULL;
7760	res->badcache = NULL;
7761	res->badcount = 0;
7762	res->badhash = 0;
7763	res->badsweep = 0;
7764	res->mustbesecure = NULL;
7765	res->spillatmin = res->spillat = 10;
7766	res->spillatmax = 100;
7767	res->spillattimer = NULL;
7768	res->zero_no_soa_ttl = ISC_FALSE;
7769	res->query_timeout = DEFAULT_QUERY_TIMEOUT;
7770	res->maxdepth = DEFAULT_RECURSION_DEPTH;
7771	res->nbuckets = ntasks;
7772	res->activebuckets = ntasks;
7773	res->buckets = isc_mem_get(view->mctx,
7774				   ntasks * sizeof(fctxbucket_t));
7775	if (res->buckets == NULL) {
7776		result = ISC_R_NOMEMORY;
7777		goto cleanup_res;
7778	}
7779	for (i = 0; i < ntasks; i++) {
7780		result = isc_mutex_init(&res->buckets[i].lock);
7781		if (result != ISC_R_SUCCESS)
7782			goto cleanup_buckets;
7783		res->buckets[i].task = NULL;
7784		result = isc_task_create(taskmgr, 0, &res->buckets[i].task);
7785		if (result != ISC_R_SUCCESS) {
7786			DESTROYLOCK(&res->buckets[i].lock);
7787			goto cleanup_buckets;
7788		}
7789		res->buckets[i].mctx = NULL;
7790		snprintf(name, sizeof(name), "res%u", i);
7791#ifdef ISC_PLATFORM_USETHREADS
7792		/*
7793		 * Use a separate memory context for each bucket to reduce
7794		 * contention among multiple threads.  Do this only when
7795		 * enabling threads because it will be require more memory.
7796		 */
7797		result = isc_mem_create(0, 0, &res->buckets[i].mctx);
7798		if (result != ISC_R_SUCCESS) {
7799			isc_task_detach(&res->buckets[i].task);
7800			DESTROYLOCK(&res->buckets[i].lock);
7801			goto cleanup_buckets;
7802		}
7803		isc_mem_setname(res->buckets[i].mctx, name, NULL);
7804#else
7805		isc_mem_attach(view->mctx, &res->buckets[i].mctx);
7806#endif
7807		isc_task_setname(res->buckets[i].task, name, res);
7808		ISC_LIST_INIT(res->buckets[i].fctxs);
7809		res->buckets[i].exiting = ISC_FALSE;
7810		buckets_created++;
7811	}
7812
7813	res->dispatches4 = NULL;
7814	if (dispatchv4 != NULL) {
7815		dns_dispatchset_create(view->mctx, socketmgr, taskmgr,
7816				       dispatchv4, &res->dispatches4, ndisp);
7817		dispattr = dns_dispatch_getattributes(dispatchv4);
7818		res->exclusivev4 =
7819			ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
7820	}
7821
7822	res->dispatches6 = NULL;
7823	if (dispatchv6 != NULL) {
7824		dns_dispatchset_create(view->mctx, socketmgr, taskmgr,
7825				       dispatchv6, &res->dispatches6, ndisp);
7826		dispattr = dns_dispatch_getattributes(dispatchv6);
7827		res->exclusivev6 =
7828			ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
7829	}
7830
7831	res->references = 1;
7832	res->exiting = ISC_FALSE;
7833	res->frozen = ISC_FALSE;
7834	ISC_LIST_INIT(res->whenshutdown);
7835	res->priming = ISC_FALSE;
7836	res->primefetch = NULL;
7837	res->nfctx = 0;
7838
7839	result = isc_mutex_init(&res->lock);
7840	if (result != ISC_R_SUCCESS)
7841		goto cleanup_dispatches;
7842
7843	result = isc_mutex_init(&res->nlock);
7844	if (result != ISC_R_SUCCESS)
7845		goto cleanup_lock;
7846
7847	result = isc_mutex_init(&res->primelock);
7848	if (result != ISC_R_SUCCESS)
7849		goto cleanup_nlock;
7850
7851	task = NULL;
7852	result = isc_task_create(taskmgr, 0, &task);
7853	if (result != ISC_R_SUCCESS)
7854		goto cleanup_primelock;
7855
7856	result = isc_timer_create(timermgr, isc_timertype_inactive, NULL, NULL,
7857				  task, spillattimer_countdown, res,
7858				  &res->spillattimer);
7859	isc_task_detach(&task);
7860	if (result != ISC_R_SUCCESS)
7861		goto cleanup_primelock;
7862
7863#if USE_ALGLOCK
7864	result = isc_rwlock_init(&res->alglock, 0, 0);
7865	if (result != ISC_R_SUCCESS)
7866		goto cleanup_spillattimer;
7867#endif
7868#if USE_MBSLOCK
7869	result = isc_rwlock_init(&res->mbslock, 0, 0);
7870	if (result != ISC_R_SUCCESS)
7871		goto cleanup_alglock;
7872#endif
7873
7874	res->magic = RES_MAGIC;
7875
7876	*resp = res;
7877
7878	return (ISC_R_SUCCESS);
7879
7880#if USE_MBSLOCK
7881 cleanup_alglock:
7882#if USE_ALGLOCK
7883	isc_rwlock_destroy(&res->alglock);
7884#endif
7885#endif
7886#if USE_ALGLOCK || USE_MBSLOCK
7887 cleanup_spillattimer:
7888	isc_timer_detach(&res->spillattimer);
7889#endif
7890
7891 cleanup_primelock:
7892	DESTROYLOCK(&res->primelock);
7893
7894 cleanup_nlock:
7895	DESTROYLOCK(&res->nlock);
7896
7897 cleanup_lock:
7898	DESTROYLOCK(&res->lock);
7899
7900 cleanup_dispatches:
7901	if (res->dispatches6 != NULL)
7902		dns_dispatchset_destroy(&res->dispatches6);
7903	if (res->dispatches4 != NULL)
7904		dns_dispatchset_destroy(&res->dispatches4);
7905
7906 cleanup_buckets:
7907	for (i = 0; i < buckets_created; i++) {
7908		isc_mem_detach(&res->buckets[i].mctx);
7909		DESTROYLOCK(&res->buckets[i].lock);
7910		isc_task_shutdown(res->buckets[i].task);
7911		isc_task_detach(&res->buckets[i].task);
7912	}
7913	isc_mem_put(view->mctx, res->buckets,
7914		    res->nbuckets * sizeof(fctxbucket_t));
7915
7916 cleanup_res:
7917	isc_mem_put(view->mctx, res, sizeof(*res));
7918
7919	return (result);
7920}
7921
7922#ifdef BIND9
7923static void
7924prime_done(isc_task_t *task, isc_event_t *event) {
7925	dns_resolver_t *res;
7926	dns_fetchevent_t *fevent;
7927	dns_fetch_t *fetch;
7928	dns_db_t *db = NULL;
7929
7930	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
7931	fevent = (dns_fetchevent_t *)event;
7932	res = event->ev_arg;
7933	REQUIRE(VALID_RESOLVER(res));
7934
7935	UNUSED(task);
7936
7937	LOCK(&res->lock);
7938
7939	INSIST(res->priming);
7940	res->priming = ISC_FALSE;
7941	LOCK(&res->primelock);
7942	fetch = res->primefetch;
7943	res->primefetch = NULL;
7944	UNLOCK(&res->primelock);
7945
7946	UNLOCK(&res->lock);
7947
7948	if (fevent->result == ISC_R_SUCCESS &&
7949	    res->view->cache != NULL && res->view->hints != NULL) {
7950		dns_cache_attachdb(res->view->cache, &db);
7951		dns_root_checkhints(res->view, res->view->hints, db);
7952		dns_db_detach(&db);
7953	}
7954
7955	if (fevent->node != NULL)
7956		dns_db_detachnode(fevent->db, &fevent->node);
7957	if (fevent->db != NULL)
7958		dns_db_detach(&fevent->db);
7959	if (dns_rdataset_isassociated(fevent->rdataset))
7960		dns_rdataset_disassociate(fevent->rdataset);
7961	INSIST(fevent->sigrdataset == NULL);
7962
7963	isc_mem_put(res->mctx, fevent->rdataset, sizeof(*fevent->rdataset));
7964
7965	isc_event_free(&event);
7966	dns_resolver_destroyfetch(&fetch);
7967}
7968
7969void
7970dns_resolver_prime(dns_resolver_t *res) {
7971	isc_boolean_t want_priming = ISC_FALSE;
7972	dns_rdataset_t *rdataset;
7973	isc_result_t result;
7974
7975	REQUIRE(VALID_RESOLVER(res));
7976	REQUIRE(res->frozen);
7977
7978	RTRACE("dns_resolver_prime");
7979
7980	LOCK(&res->lock);
7981
7982	if (!res->exiting && !res->priming) {
7983		INSIST(res->primefetch == NULL);
7984		res->priming = ISC_TRUE;
7985		want_priming = ISC_TRUE;
7986	}
7987
7988	UNLOCK(&res->lock);
7989
7990	if (want_priming) {
7991		/*
7992		 * To avoid any possible recursive locking problems, we
7993		 * start the priming fetch like any other fetch, and holding
7994		 * no resolver locks.  No one else will try to start it
7995		 * because we're the ones who set res->priming to true.
7996		 * Any other callers of dns_resolver_prime() while we're
7997		 * running will see that res->priming is already true and
7998		 * do nothing.
7999		 */
8000		RTRACE("priming");
8001		rdataset = isc_mem_get(res->mctx, sizeof(*rdataset));
8002		if (rdataset == NULL) {
8003			LOCK(&res->lock);
8004			INSIST(res->priming);
8005			INSIST(res->primefetch == NULL);
8006			res->priming = ISC_FALSE;
8007			UNLOCK(&res->lock);
8008			return;
8009		}
8010		dns_rdataset_init(rdataset);
8011		LOCK(&res->primelock);
8012		result = dns_resolver_createfetch(res, dns_rootname,
8013						  dns_rdatatype_ns,
8014						  NULL, NULL, NULL, 0,
8015						  res->buckets[0].task,
8016						  prime_done,
8017						  res, rdataset, NULL,
8018						  &res->primefetch);
8019		UNLOCK(&res->primelock);
8020		if (result != ISC_R_SUCCESS) {
8021			LOCK(&res->lock);
8022			INSIST(res->priming);
8023			res->priming = ISC_FALSE;
8024			UNLOCK(&res->lock);
8025		}
8026	}
8027}
8028#endif /* BIND9 */
8029
8030void
8031dns_resolver_freeze(dns_resolver_t *res) {
8032	/*
8033	 * Freeze resolver.
8034	 */
8035
8036	REQUIRE(VALID_RESOLVER(res));
8037
8038	res->frozen = ISC_TRUE;
8039}
8040
8041void
8042dns_resolver_attach(dns_resolver_t *source, dns_resolver_t **targetp) {
8043	REQUIRE(VALID_RESOLVER(source));
8044	REQUIRE(targetp != NULL && *targetp == NULL);
8045
8046	RRTRACE(source, "attach");
8047	LOCK(&source->lock);
8048	REQUIRE(!source->exiting);
8049
8050	INSIST(source->references > 0);
8051	source->references++;
8052	INSIST(source->references != 0);
8053	UNLOCK(&source->lock);
8054
8055	*targetp = source;
8056}
8057
8058void
8059dns_resolver_whenshutdown(dns_resolver_t *res, isc_task_t *task,
8060			  isc_event_t **eventp)
8061{
8062	isc_task_t *clone;
8063	isc_event_t *event;
8064
8065	REQUIRE(VALID_RESOLVER(res));
8066	REQUIRE(eventp != NULL);
8067
8068	event = *eventp;
8069	*eventp = NULL;
8070
8071	LOCK(&res->lock);
8072
8073	if (res->exiting && res->activebuckets == 0) {
8074		/*
8075		 * We're already shutdown.  Send the event.
8076		 */
8077		event->ev_sender = res;
8078		isc_task_send(task, &event);
8079	} else {
8080		clone = NULL;
8081		isc_task_attach(task, &clone);
8082		event->ev_sender = clone;
8083		ISC_LIST_APPEND(res->whenshutdown, event, ev_link);
8084	}
8085
8086	UNLOCK(&res->lock);
8087}
8088
8089void
8090dns_resolver_shutdown(dns_resolver_t *res) {
8091	unsigned int i;
8092	fetchctx_t *fctx;
8093	isc_result_t result;
8094
8095	REQUIRE(VALID_RESOLVER(res));
8096
8097	RTRACE("shutdown");
8098
8099	LOCK(&res->lock);
8100
8101	if (!res->exiting) {
8102		RTRACE("exiting");
8103		res->exiting = ISC_TRUE;
8104
8105		for (i = 0; i < res->nbuckets; i++) {
8106			LOCK(&res->buckets[i].lock);
8107			for (fctx = ISC_LIST_HEAD(res->buckets[i].fctxs);
8108			     fctx != NULL;
8109			     fctx = ISC_LIST_NEXT(fctx, link))
8110				fctx_shutdown(fctx);
8111			if (res->dispatches4 != NULL && !res->exclusivev4) {
8112				dns_dispatchset_cancelall(res->dispatches4,
8113							  res->buckets[i].task);
8114			}
8115			if (res->dispatches6 != NULL && !res->exclusivev6) {
8116				dns_dispatchset_cancelall(res->dispatches6,
8117							  res->buckets[i].task);
8118			}
8119			res->buckets[i].exiting = ISC_TRUE;
8120			if (ISC_LIST_EMPTY(res->buckets[i].fctxs)) {
8121				INSIST(res->activebuckets > 0);
8122				res->activebuckets--;
8123			}
8124			UNLOCK(&res->buckets[i].lock);
8125		}
8126		if (res->activebuckets == 0)
8127			send_shutdown_events(res);
8128		result = isc_timer_reset(res->spillattimer,
8129					 isc_timertype_inactive, NULL,
8130					 NULL, ISC_TRUE);
8131		RUNTIME_CHECK(result == ISC_R_SUCCESS);
8132	}
8133
8134	UNLOCK(&res->lock);
8135}
8136
8137void
8138dns_resolver_detach(dns_resolver_t **resp) {
8139	dns_resolver_t *res;
8140	isc_boolean_t need_destroy = ISC_FALSE;
8141
8142	REQUIRE(resp != NULL);
8143	res = *resp;
8144	REQUIRE(VALID_RESOLVER(res));
8145
8146	RTRACE("detach");
8147
8148	LOCK(&res->lock);
8149
8150	INSIST(res->references > 0);
8151	res->references--;
8152	if (res->references == 0) {
8153		INSIST(res->exiting && res->activebuckets == 0);
8154		need_destroy = ISC_TRUE;
8155	}
8156
8157	UNLOCK(&res->lock);
8158
8159	if (need_destroy)
8160		destroy(res);
8161
8162	*resp = NULL;
8163}
8164
8165static inline isc_boolean_t
8166fctx_match(fetchctx_t *fctx, dns_name_t *name, dns_rdatatype_t type,
8167	   unsigned int options)
8168{
8169	/*
8170	 * Don't match fetch contexts that are shutting down.
8171	 */
8172	if (fctx->cloned || fctx->state == fetchstate_done ||
8173	    ISC_LIST_EMPTY(fctx->events))
8174		return (ISC_FALSE);
8175
8176	if (fctx->type != type || fctx->options != options)
8177		return (ISC_FALSE);
8178	return (dns_name_equal(&fctx->name, name));
8179}
8180
8181static inline void
8182log_fetch(dns_name_t *name, dns_rdatatype_t type) {
8183	char namebuf[DNS_NAME_FORMATSIZE];
8184	char typebuf[DNS_RDATATYPE_FORMATSIZE];
8185	int level = ISC_LOG_DEBUG(1);
8186
8187	if (! isc_log_wouldlog(dns_lctx, level))
8188		return;
8189
8190	dns_name_format(name, namebuf, sizeof(namebuf));
8191	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
8192
8193	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
8194		      DNS_LOGMODULE_RESOLVER, level,
8195		      "createfetch: %s %s", namebuf, typebuf);
8196}
8197
8198isc_result_t
8199dns_resolver_createfetch(dns_resolver_t *res, dns_name_t *name,
8200			 dns_rdatatype_t type,
8201			 dns_name_t *domain, dns_rdataset_t *nameservers,
8202			 dns_forwarders_t *forwarders,
8203			 unsigned int options, isc_task_t *task,
8204			 isc_taskaction_t action, void *arg,
8205			 dns_rdataset_t *rdataset,
8206			 dns_rdataset_t *sigrdataset,
8207			 dns_fetch_t **fetchp)
8208{
8209	return (dns_resolver_createfetch3(res, name, type, domain,
8210					  nameservers, forwarders, NULL, 0,
8211					  options, 0, task, action, arg,
8212					  rdataset, sigrdataset, fetchp));
8213}
8214
8215isc_result_t
8216dns_resolver_createfetch2(dns_resolver_t *res, dns_name_t *name,
8217			  dns_rdatatype_t type,
8218			  dns_name_t *domain, dns_rdataset_t *nameservers,
8219			  dns_forwarders_t *forwarders,
8220			  isc_sockaddr_t *client, dns_messageid_t id,
8221			  unsigned int options, isc_task_t *task,
8222			  isc_taskaction_t action, void *arg,
8223			  dns_rdataset_t *rdataset,
8224			  dns_rdataset_t *sigrdataset,
8225			  dns_fetch_t **fetchp)
8226{
8227	return (dns_resolver_createfetch3(res, name, type, domain,
8228					  nameservers, forwarders, client, id,
8229					  options, 0, task, action, arg,
8230					  rdataset, sigrdataset, fetchp));
8231}
8232
8233isc_result_t
8234dns_resolver_createfetch3(dns_resolver_t *res, dns_name_t *name,
8235			  dns_rdatatype_t type,
8236			  dns_name_t *domain, dns_rdataset_t *nameservers,
8237			  dns_forwarders_t *forwarders,
8238			  isc_sockaddr_t *client, dns_messageid_t id,
8239			  unsigned int options, unsigned int depth,
8240			  isc_task_t *task,
8241			  isc_taskaction_t action, void *arg,
8242			  dns_rdataset_t *rdataset,
8243			  dns_rdataset_t *sigrdataset,
8244			  dns_fetch_t **fetchp)
8245{
8246	dns_fetch_t *fetch;
8247	fetchctx_t *fctx = NULL;
8248	isc_result_t result = ISC_R_SUCCESS;
8249	unsigned int bucketnum;
8250	isc_boolean_t new_fctx = ISC_FALSE;
8251	isc_event_t *event;
8252	unsigned int count = 0;
8253	unsigned int spillat;
8254	unsigned int spillatmin;
8255	isc_boolean_t destroy = ISC_FALSE;
8256
8257	UNUSED(forwarders);
8258
8259	REQUIRE(VALID_RESOLVER(res));
8260	REQUIRE(res->frozen);
8261	/* XXXRTH  Check for meta type */
8262	if (domain != NULL) {
8263		REQUIRE(DNS_RDATASET_VALID(nameservers));
8264		REQUIRE(nameservers->type == dns_rdatatype_ns);
8265	} else
8266		REQUIRE(nameservers == NULL);
8267	REQUIRE(forwarders == NULL);
8268	REQUIRE(!dns_rdataset_isassociated(rdataset));
8269	REQUIRE(sigrdataset == NULL ||
8270		!dns_rdataset_isassociated(sigrdataset));
8271	REQUIRE(fetchp != NULL && *fetchp == NULL);
8272
8273	log_fetch(name, type);
8274
8275	/*
8276	 * XXXRTH  use a mempool?
8277	 */
8278	fetch = isc_mem_get(res->mctx, sizeof(*fetch));
8279	if (fetch == NULL)
8280		return (ISC_R_NOMEMORY);
8281
8282	bucketnum = dns_name_fullhash(name, ISC_FALSE) % res->nbuckets;
8283
8284	LOCK(&res->lock);
8285	spillat = res->spillat;
8286	spillatmin = res->spillatmin;
8287	UNLOCK(&res->lock);
8288	LOCK(&res->buckets[bucketnum].lock);
8289
8290	if (res->buckets[bucketnum].exiting) {
8291		result = ISC_R_SHUTTINGDOWN;
8292		goto unlock;
8293	}
8294
8295	if ((options & DNS_FETCHOPT_UNSHARED) == 0) {
8296		for (fctx = ISC_LIST_HEAD(res->buckets[bucketnum].fctxs);
8297		     fctx != NULL;
8298		     fctx = ISC_LIST_NEXT(fctx, link)) {
8299			if (fctx_match(fctx, name, type, options))
8300				break;
8301		}
8302	}
8303
8304	/*
8305	 * Is this a duplicate?
8306	 */
8307	if (fctx != NULL && client != NULL) {
8308		dns_fetchevent_t *fevent;
8309		for (fevent = ISC_LIST_HEAD(fctx->events);
8310		     fevent != NULL;
8311		     fevent = ISC_LIST_NEXT(fevent, ev_link)) {
8312			if (fevent->client != NULL && fevent->id == id &&
8313			    isc_sockaddr_equal(fevent->client, client)) {
8314				result = DNS_R_DUPLICATE;
8315				goto unlock;
8316			}
8317			count++;
8318		}
8319	}
8320	if (count >= spillatmin && spillatmin != 0) {
8321		INSIST(fctx != NULL);
8322		if (count >= spillat)
8323			fctx->spilled = ISC_TRUE;
8324		if (fctx->spilled) {
8325			result = DNS_R_DROP;
8326			goto unlock;
8327		}
8328	}
8329
8330	if (fctx == NULL) {
8331		result = fctx_create(res, name, type, domain, nameservers,
8332				     options, bucketnum, depth, &fctx);
8333		if (result != ISC_R_SUCCESS)
8334			goto unlock;
8335		new_fctx = ISC_TRUE;
8336	} else if (fctx->depth > depth)
8337		fctx->depth = depth;
8338
8339	result = fctx_join(fctx, task, client, id, action, arg,
8340			   rdataset, sigrdataset, fetch);
8341	if (new_fctx) {
8342		if (result == ISC_R_SUCCESS) {
8343			/*
8344			 * Launch this fctx.
8345			 */
8346			event = &fctx->control_event;
8347			ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
8348				       DNS_EVENT_FETCHCONTROL,
8349				       fctx_start, fctx, NULL,
8350				       NULL, NULL);
8351			isc_task_send(res->buckets[bucketnum].task, &event);
8352		} else {
8353			/*
8354			 * We don't care about the result of fctx_unlink()
8355			 * since we know we're not exiting.
8356			 */
8357			(void)fctx_unlink(fctx);
8358			destroy = ISC_TRUE;
8359		}
8360	}
8361
8362 unlock:
8363	UNLOCK(&res->buckets[bucketnum].lock);
8364
8365	if (destroy)
8366		fctx_destroy(fctx);
8367
8368	if (result == ISC_R_SUCCESS) {
8369		FTRACE("created");
8370		*fetchp = fetch;
8371	} else
8372		isc_mem_put(res->mctx, fetch, sizeof(*fetch));
8373
8374	return (result);
8375}
8376
8377void
8378dns_resolver_cancelfetch(dns_fetch_t *fetch) {
8379	fetchctx_t *fctx;
8380	dns_resolver_t *res;
8381	dns_fetchevent_t *event, *next_event;
8382	isc_task_t *etask;
8383
8384	REQUIRE(DNS_FETCH_VALID(fetch));
8385	fctx = fetch->private;
8386	REQUIRE(VALID_FCTX(fctx));
8387	res = fctx->res;
8388
8389	FTRACE("cancelfetch");
8390
8391	LOCK(&res->buckets[fctx->bucketnum].lock);
8392
8393	/*
8394	 * Find the completion event for this fetch (as opposed
8395	 * to those for other fetches that have joined the same
8396	 * fctx) and send it with result = ISC_R_CANCELED.
8397	 */
8398	event = NULL;
8399	if (fctx->state != fetchstate_done) {
8400		for (event = ISC_LIST_HEAD(fctx->events);
8401		     event != NULL;
8402		     event = next_event) {
8403			next_event = ISC_LIST_NEXT(event, ev_link);
8404			if (event->fetch == fetch) {
8405				ISC_LIST_UNLINK(fctx->events, event, ev_link);
8406				break;
8407			}
8408		}
8409	}
8410	if (event != NULL) {
8411		etask = event->ev_sender;
8412		event->ev_sender = fctx;
8413		event->result = ISC_R_CANCELED;
8414		isc_task_sendanddetach(&etask, ISC_EVENT_PTR(&event));
8415	}
8416	/*
8417	 * The fctx continues running even if no fetches remain;
8418	 * the answer is still cached.
8419	 */
8420
8421	UNLOCK(&res->buckets[fctx->bucketnum].lock);
8422}
8423
8424void
8425dns_resolver_destroyfetch(dns_fetch_t **fetchp) {
8426	dns_fetch_t *fetch;
8427	dns_resolver_t *res;
8428	dns_fetchevent_t *event, *next_event;
8429	fetchctx_t *fctx;
8430	unsigned int bucketnum;
8431	isc_boolean_t bucket_empty;
8432
8433	REQUIRE(fetchp != NULL);
8434	fetch = *fetchp;
8435	REQUIRE(DNS_FETCH_VALID(fetch));
8436	fctx = fetch->private;
8437	REQUIRE(VALID_FCTX(fctx));
8438	res = fctx->res;
8439
8440	FTRACE("destroyfetch");
8441
8442	bucketnum = fctx->bucketnum;
8443	LOCK(&res->buckets[bucketnum].lock);
8444
8445	/*
8446	 * Sanity check: the caller should have gotten its event before
8447	 * trying to destroy the fetch.
8448	 */
8449	event = NULL;
8450	if (fctx->state != fetchstate_done) {
8451		for (event = ISC_LIST_HEAD(fctx->events);
8452		     event != NULL;
8453		     event = next_event) {
8454			next_event = ISC_LIST_NEXT(event, ev_link);
8455			RUNTIME_CHECK(event->fetch != fetch);
8456		}
8457	}
8458
8459	bucket_empty = fctx_decreference(fctx);
8460
8461	UNLOCK(&res->buckets[bucketnum].lock);
8462
8463	isc_mem_put(res->mctx, fetch, sizeof(*fetch));
8464	*fetchp = NULL;
8465
8466	if (bucket_empty)
8467		empty_bucket(res);
8468}
8469
8470void
8471dns_resolver_logfetch(dns_fetch_t *fetch, isc_log_t *lctx,
8472		      isc_logcategory_t *category, isc_logmodule_t *module,
8473		      int level, isc_boolean_t duplicateok)
8474{
8475	fetchctx_t *fctx;
8476	dns_resolver_t *res;
8477	char domainbuf[DNS_NAME_FORMATSIZE];
8478
8479	REQUIRE(DNS_FETCH_VALID(fetch));
8480	fctx = fetch->private;
8481	REQUIRE(VALID_FCTX(fctx));
8482	res = fctx->res;
8483
8484	LOCK(&res->buckets[fctx->bucketnum].lock);
8485
8486	INSIST(fctx->exitline >= 0);
8487	if (!fctx->logged || duplicateok) {
8488		dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
8489		isc_log_write(lctx, category, module, level,
8490			      "fetch completed at %s:%d for %s in "
8491			      "%" ISC_PRINT_QUADFORMAT "u."
8492			      "%06" ISC_PRINT_QUADFORMAT "u: %s/%s "
8493			      "[domain:%s,referral:%u,restart:%u,qrysent:%u,"
8494			      "timeout:%u,lame:%u,neterr:%u,badresp:%u,"
8495			      "adberr:%u,findfail:%u,valfail:%u]",
8496			      __FILE__, fctx->exitline, fctx->info,
8497			      fctx->duration / US_PER_SEC,
8498			      fctx->duration % US_PER_SEC,
8499			      isc_result_totext(fctx->result),
8500			      isc_result_totext(fctx->vresult), domainbuf,
8501			      fctx->referrals, fctx->restarts,
8502			      fctx->querysent, fctx->timeouts, fctx->lamecount,
8503			      fctx->neterr, fctx->badresp, fctx->adberr,
8504			      fctx->findfail, fctx->valfail);
8505		fctx->logged = ISC_TRUE;
8506	}
8507
8508	UNLOCK(&res->buckets[fctx->bucketnum].lock);
8509}
8510
8511dns_dispatchmgr_t *
8512dns_resolver_dispatchmgr(dns_resolver_t *resolver) {
8513	REQUIRE(VALID_RESOLVER(resolver));
8514	return (resolver->dispatchmgr);
8515}
8516
8517dns_dispatch_t *
8518dns_resolver_dispatchv4(dns_resolver_t *resolver) {
8519	REQUIRE(VALID_RESOLVER(resolver));
8520	return (dns_dispatchset_get(resolver->dispatches4));
8521}
8522
8523dns_dispatch_t *
8524dns_resolver_dispatchv6(dns_resolver_t *resolver) {
8525	REQUIRE(VALID_RESOLVER(resolver));
8526	return (dns_dispatchset_get(resolver->dispatches6));
8527}
8528
8529isc_socketmgr_t *
8530dns_resolver_socketmgr(dns_resolver_t *resolver) {
8531	REQUIRE(VALID_RESOLVER(resolver));
8532	return (resolver->socketmgr);
8533}
8534
8535isc_taskmgr_t *
8536dns_resolver_taskmgr(dns_resolver_t *resolver) {
8537	REQUIRE(VALID_RESOLVER(resolver));
8538	return (resolver->taskmgr);
8539}
8540
8541isc_uint32_t
8542dns_resolver_getlamettl(dns_resolver_t *resolver) {
8543	REQUIRE(VALID_RESOLVER(resolver));
8544	return (resolver->lame_ttl);
8545}
8546
8547void
8548dns_resolver_setlamettl(dns_resolver_t *resolver, isc_uint32_t lame_ttl) {
8549	REQUIRE(VALID_RESOLVER(resolver));
8550	resolver->lame_ttl = lame_ttl;
8551}
8552
8553unsigned int
8554dns_resolver_nrunning(dns_resolver_t *resolver) {
8555	unsigned int n;
8556	LOCK(&resolver->nlock);
8557	n = resolver->nfctx;
8558	UNLOCK(&resolver->nlock);
8559	return (n);
8560}
8561
8562isc_result_t
8563dns_resolver_addalternate(dns_resolver_t *resolver, isc_sockaddr_t *alt,
8564			  dns_name_t *name, in_port_t port) {
8565	alternate_t *a;
8566	isc_result_t result;
8567
8568	REQUIRE(VALID_RESOLVER(resolver));
8569	REQUIRE(!resolver->frozen);
8570	REQUIRE((alt == NULL) ^ (name == NULL));
8571
8572	a = isc_mem_get(resolver->mctx, sizeof(*a));
8573	if (a == NULL)
8574		return (ISC_R_NOMEMORY);
8575	if (alt != NULL) {
8576		a->isaddress = ISC_TRUE;
8577		a->_u.addr = *alt;
8578	} else {
8579		a->isaddress = ISC_FALSE;
8580		a->_u._n.port = port;
8581		dns_name_init(&a->_u._n.name, NULL);
8582		result = dns_name_dup(name, resolver->mctx, &a->_u._n.name);
8583		if (result != ISC_R_SUCCESS) {
8584			isc_mem_put(resolver->mctx, a, sizeof(*a));
8585			return (result);
8586		}
8587	}
8588	ISC_LINK_INIT(a, link);
8589	ISC_LIST_APPEND(resolver->alternates, a, link);
8590
8591	return (ISC_R_SUCCESS);
8592}
8593
8594void
8595dns_resolver_setudpsize(dns_resolver_t *resolver, isc_uint16_t udpsize) {
8596	REQUIRE(VALID_RESOLVER(resolver));
8597	resolver->udpsize = udpsize;
8598}
8599
8600isc_uint16_t
8601dns_resolver_getudpsize(dns_resolver_t *resolver) {
8602	REQUIRE(VALID_RESOLVER(resolver));
8603	return (resolver->udpsize);
8604}
8605
8606void
8607dns_resolver_flushbadcache(dns_resolver_t *resolver, dns_name_t *name) {
8608	unsigned int i;
8609	dns_badcache_t *bad, *prev, *next;
8610
8611	REQUIRE(VALID_RESOLVER(resolver));
8612
8613	LOCK(&resolver->lock);
8614	if (resolver->badcache == NULL)
8615		goto unlock;
8616
8617	if (name != NULL) {
8618		isc_time_t now;
8619		isc_result_t result;
8620		result = isc_time_now(&now);
8621		if (result != ISC_R_SUCCESS)
8622			isc_time_settoepoch(&now);
8623		i = dns_name_hash(name, ISC_FALSE) % resolver->badhash;
8624		prev = NULL;
8625		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8626			int n;
8627			next = bad->next;
8628			n = isc_time_compare(&bad->expire, &now);
8629			if (n < 0 || dns_name_equal(name, &bad->name)) {
8630				if (prev == NULL)
8631					resolver->badcache[i] = bad->next;
8632				else
8633					prev->next = bad->next;
8634				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8635					    bad->name.length);
8636				resolver->badcount--;
8637			} else
8638				prev = bad;
8639		}
8640	} else
8641		destroy_badcache(resolver);
8642
8643 unlock:
8644	UNLOCK(&resolver->lock);
8645
8646}
8647
8648static void
8649resizehash(dns_resolver_t *resolver, isc_time_t *now, isc_boolean_t grow) {
8650	unsigned int newsize;
8651	dns_badcache_t **new, *bad, *next;
8652	unsigned int i;
8653
8654	if (grow)
8655		newsize = resolver->badhash * 2 + 1;
8656	else
8657		newsize = (resolver->badhash - 1) / 2;
8658
8659	new = isc_mem_get(resolver->mctx,
8660			  sizeof(*resolver->badcache) * newsize);
8661	if (new == NULL)
8662		return;
8663	memset(new, 0, sizeof(*resolver->badcache) * newsize);
8664	for (i = 0; i < resolver->badhash; i++) {
8665		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8666			next = bad->next;
8667			if (isc_time_compare(&bad->expire, now) < 0) {
8668				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8669					    bad->name.length);
8670				resolver->badcount--;
8671			} else {
8672				bad->next = new[bad->hashval % newsize];
8673				new[bad->hashval % newsize] = bad;
8674			}
8675		}
8676	}
8677	isc_mem_put(resolver->mctx, resolver->badcache,
8678		    sizeof(*resolver->badcache) * resolver->badhash);
8679	resolver->badhash = newsize;
8680	resolver->badcache = new;
8681}
8682
8683void
8684dns_resolver_addbadcache(dns_resolver_t *resolver, dns_name_t *name,
8685			 dns_rdatatype_t type, isc_time_t *expire)
8686{
8687	isc_time_t now;
8688	isc_result_t result = ISC_R_SUCCESS;
8689	unsigned int i, hashval;
8690	dns_badcache_t *bad, *prev, *next;
8691
8692	REQUIRE(VALID_RESOLVER(resolver));
8693
8694	LOCK(&resolver->lock);
8695	if (resolver->badcache == NULL) {
8696		resolver->badcache = isc_mem_get(resolver->mctx,
8697						 sizeof(*resolver->badcache) *
8698						 DNS_BADCACHE_SIZE);
8699		if (resolver->badcache == NULL)
8700			goto cleanup;
8701		resolver->badhash = DNS_BADCACHE_SIZE;
8702		memset(resolver->badcache, 0, sizeof(*resolver->badcache) *
8703		       resolver->badhash);
8704	}
8705
8706	result = isc_time_now(&now);
8707	if (result != ISC_R_SUCCESS)
8708		isc_time_settoepoch(&now);
8709	hashval = dns_name_hash(name, ISC_FALSE);
8710	i = hashval % resolver->badhash;
8711	prev = NULL;
8712	for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8713		next = bad->next;
8714		if (bad->type == type && dns_name_equal(name, &bad->name))
8715			break;
8716		if (isc_time_compare(&bad->expire, &now) < 0) {
8717			if (prev == NULL)
8718				resolver->badcache[i] = bad->next;
8719			else
8720				prev->next = bad->next;
8721			isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8722				    bad->name.length);
8723			resolver->badcount--;
8724		} else
8725			prev = bad;
8726	}
8727	if (bad == NULL) {
8728		isc_buffer_t buffer;
8729		bad = isc_mem_get(resolver->mctx, sizeof(*bad) + name->length);
8730		if (bad == NULL)
8731			goto cleanup;
8732		bad->type = type;
8733		bad->hashval = hashval;
8734		bad->expire = *expire;
8735		isc_buffer_init(&buffer, bad + 1, name->length);
8736		dns_name_init(&bad->name, NULL);
8737		dns_name_copy(name, &bad->name, &buffer);
8738		bad->next = resolver->badcache[i];
8739		resolver->badcache[i] = bad;
8740		resolver->badcount++;
8741		if (resolver->badcount > resolver->badhash * 8)
8742			resizehash(resolver, &now, ISC_TRUE);
8743		if (resolver->badcount < resolver->badhash * 2 &&
8744		    resolver->badhash > DNS_BADCACHE_SIZE)
8745			resizehash(resolver, &now, ISC_FALSE);
8746	} else
8747		bad->expire = *expire;
8748 cleanup:
8749	UNLOCK(&resolver->lock);
8750}
8751
8752isc_boolean_t
8753dns_resolver_getbadcache(dns_resolver_t *resolver, dns_name_t *name,
8754			 dns_rdatatype_t type, isc_time_t *now)
8755{
8756	dns_badcache_t *bad, *prev, *next;
8757	isc_boolean_t answer = ISC_FALSE;
8758	unsigned int i;
8759
8760	REQUIRE(VALID_RESOLVER(resolver));
8761
8762	LOCK(&resolver->lock);
8763	if (resolver->badcache == NULL)
8764		goto unlock;
8765
8766	i = dns_name_hash(name, ISC_FALSE) % resolver->badhash;
8767	prev = NULL;
8768	for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8769		next = bad->next;
8770		/*
8771		 * Search the hash list. Clean out expired records as we go.
8772		 */
8773		if (isc_time_compare(&bad->expire, now) < 0) {
8774			if (prev != NULL)
8775				prev->next = bad->next;
8776			else
8777				resolver->badcache[i] = bad->next;
8778			isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8779				    bad->name.length);
8780			resolver->badcount--;
8781			continue;
8782		}
8783		if (bad->type == type && dns_name_equal(name, &bad->name)) {
8784			answer = ISC_TRUE;
8785			break;
8786		}
8787		prev = bad;
8788	}
8789
8790	/*
8791	 * Slow sweep to clean out stale records.
8792	 */
8793	i = resolver->badsweep++ % resolver->badhash;
8794	bad = resolver->badcache[i];
8795	if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) {
8796		resolver->badcache[i] = bad->next;
8797		isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8798			    bad->name.length);
8799		resolver->badcount--;
8800	}
8801
8802 unlock:
8803	UNLOCK(&resolver->lock);
8804	return (answer);
8805}
8806
8807void
8808dns_resolver_printbadcache(dns_resolver_t *resolver, FILE *fp) {
8809	char namebuf[DNS_NAME_FORMATSIZE];
8810	char typebuf[DNS_RDATATYPE_FORMATSIZE];
8811	dns_badcache_t *bad, *next, *prev;
8812	isc_time_t now;
8813	unsigned int i;
8814	isc_uint64_t t;
8815
8816	LOCK(&resolver->lock);
8817	fprintf(fp, ";\n; Bad cache\n;\n");
8818
8819	if (resolver->badcache == NULL)
8820		goto unlock;
8821
8822	TIME_NOW(&now);
8823	for (i = 0; i < resolver->badhash; i++) {
8824		prev = NULL;
8825		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8826			next = bad->next;
8827			if (isc_time_compare(&bad->expire, &now) < 0) {
8828				if (prev != NULL)
8829					prev->next = bad->next;
8830				else
8831					resolver->badcache[i] = bad->next;
8832				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8833					    bad->name.length);
8834				resolver->badcount--;
8835				continue;
8836			}
8837			prev = bad;
8838			dns_name_format(&bad->name, namebuf, sizeof(namebuf));
8839			dns_rdatatype_format(bad->type, typebuf,
8840					     sizeof(typebuf));
8841			t = isc_time_microdiff(&bad->expire, &now);
8842			t /= 1000;
8843			fprintf(fp, "; %s/%s [ttl "
8844				"%" ISC_PLATFORM_QUADFORMAT "u]\n",
8845				namebuf, typebuf, t);
8846		}
8847	}
8848
8849 unlock:
8850	UNLOCK(&resolver->lock);
8851}
8852
8853static void
8854free_algorithm(void *node, void *arg) {
8855	unsigned char *algorithms = node;
8856	isc_mem_t *mctx = arg;
8857
8858	isc_mem_put(mctx, algorithms, *algorithms);
8859}
8860
8861void
8862dns_resolver_reset_algorithms(dns_resolver_t *resolver) {
8863
8864	REQUIRE(VALID_RESOLVER(resolver));
8865
8866#if USE_ALGLOCK
8867	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
8868#endif
8869	if (resolver->algorithms != NULL)
8870		dns_rbt_destroy(&resolver->algorithms);
8871#if USE_ALGLOCK
8872	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
8873#endif
8874}
8875
8876isc_result_t
8877dns_resolver_disable_algorithm(dns_resolver_t *resolver, dns_name_t *name,
8878			       unsigned int alg)
8879{
8880	unsigned int len, mask;
8881	unsigned char *new;
8882	unsigned char *algorithms;
8883	isc_result_t result;
8884	dns_rbtnode_t *node = NULL;
8885
8886	REQUIRE(VALID_RESOLVER(resolver));
8887	if (alg > 255)
8888		return (ISC_R_RANGE);
8889
8890#if USE_ALGLOCK
8891	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
8892#endif
8893	if (resolver->algorithms == NULL) {
8894		result = dns_rbt_create(resolver->mctx, free_algorithm,
8895					resolver->mctx, &resolver->algorithms);
8896		if (result != ISC_R_SUCCESS)
8897			goto cleanup;
8898	}
8899
8900	len = alg/8 + 2;
8901	mask = 1 << (alg%8);
8902
8903	result = dns_rbt_addnode(resolver->algorithms, name, &node);
8904
8905	if (result == ISC_R_SUCCESS || result == ISC_R_EXISTS) {
8906		algorithms = node->data;
8907		if (algorithms == NULL || len > *algorithms) {
8908			new = isc_mem_get(resolver->mctx, len);
8909			if (new == NULL) {
8910				result = ISC_R_NOMEMORY;
8911				goto cleanup;
8912			}
8913			memset(new, 0, len);
8914			if (algorithms != NULL)
8915				memmove(new, algorithms, *algorithms);
8916			new[len-1] |= mask;
8917			*new = len;
8918			node->data = new;
8919			if (algorithms != NULL)
8920				isc_mem_put(resolver->mctx, algorithms,
8921					    *algorithms);
8922		} else
8923			algorithms[len-1] |= mask;
8924	}
8925	result = ISC_R_SUCCESS;
8926 cleanup:
8927#if USE_ALGLOCK
8928	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
8929#endif
8930	return (result);
8931}
8932
8933isc_boolean_t
8934dns_resolver_algorithm_supported(dns_resolver_t *resolver, dns_name_t *name,
8935				 unsigned int alg)
8936{
8937	unsigned int len, mask;
8938	unsigned char *algorithms;
8939	void *data = NULL;
8940	isc_result_t result;
8941	isc_boolean_t found = ISC_FALSE;
8942
8943	REQUIRE(VALID_RESOLVER(resolver));
8944
8945	/*
8946	 * DH is unsupported for DNSKEYs, see RFC 4034 sec. A.1.
8947	 */
8948	if ((alg == DST_ALG_DH) || (alg == DST_ALG_INDIRECT))
8949		return (ISC_FALSE);
8950
8951#if USE_ALGLOCK
8952	RWLOCK(&resolver->alglock, isc_rwlocktype_read);
8953#endif
8954	if (resolver->algorithms == NULL)
8955		goto unlock;
8956	result = dns_rbt_findname(resolver->algorithms, name, 0, NULL, &data);
8957	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8958		len = alg/8 + 2;
8959		mask = 1 << (alg%8);
8960		algorithms = data;
8961		if (len <= *algorithms && (algorithms[len-1] & mask) != 0)
8962			found = ISC_TRUE;
8963	}
8964 unlock:
8965#if USE_ALGLOCK
8966	RWUNLOCK(&resolver->alglock, isc_rwlocktype_read);
8967#endif
8968	if (found)
8969		return (ISC_FALSE);
8970
8971	return (dst_algorithm_supported(alg));
8972}
8973
8974isc_boolean_t
8975dns_resolver_digest_supported(dns_resolver_t *resolver, unsigned int digest) {
8976
8977	UNUSED(resolver);
8978	return (dns_ds_digest_supported(digest));
8979}
8980
8981void
8982dns_resolver_resetmustbesecure(dns_resolver_t *resolver) {
8983
8984	REQUIRE(VALID_RESOLVER(resolver));
8985
8986#if USE_MBSLOCK
8987	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
8988#endif
8989	if (resolver->mustbesecure != NULL)
8990		dns_rbt_destroy(&resolver->mustbesecure);
8991#if USE_MBSLOCK
8992	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
8993#endif
8994}
8995
8996static isc_boolean_t yes = ISC_TRUE, no = ISC_FALSE;
8997
8998isc_result_t
8999dns_resolver_setmustbesecure(dns_resolver_t *resolver, dns_name_t *name,
9000			     isc_boolean_t value)
9001{
9002	isc_result_t result;
9003
9004	REQUIRE(VALID_RESOLVER(resolver));
9005
9006#if USE_MBSLOCK
9007	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
9008#endif
9009	if (resolver->mustbesecure == NULL) {
9010		result = dns_rbt_create(resolver->mctx, NULL, NULL,
9011					&resolver->mustbesecure);
9012		if (result != ISC_R_SUCCESS)
9013			goto cleanup;
9014	}
9015	result = dns_rbt_addname(resolver->mustbesecure, name,
9016				 value ? &yes : &no);
9017 cleanup:
9018#if USE_MBSLOCK
9019	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
9020#endif
9021	return (result);
9022}
9023
9024isc_boolean_t
9025dns_resolver_getmustbesecure(dns_resolver_t *resolver, dns_name_t *name) {
9026	void *data = NULL;
9027	isc_boolean_t value = ISC_FALSE;
9028	isc_result_t result;
9029
9030	REQUIRE(VALID_RESOLVER(resolver));
9031
9032#if USE_MBSLOCK
9033	RWLOCK(&resolver->mbslock, isc_rwlocktype_read);
9034#endif
9035	if (resolver->mustbesecure == NULL)
9036		goto unlock;
9037	result = dns_rbt_findname(resolver->mustbesecure, name, 0, NULL, &data);
9038	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
9039		value = *(isc_boolean_t*)data;
9040 unlock:
9041#if USE_MBSLOCK
9042	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_read);
9043#endif
9044	return (value);
9045}
9046
9047void
9048dns_resolver_getclientsperquery(dns_resolver_t *resolver, isc_uint32_t *cur,
9049				isc_uint32_t *min, isc_uint32_t *max)
9050{
9051	REQUIRE(VALID_RESOLVER(resolver));
9052
9053	LOCK(&resolver->lock);
9054	if (cur != NULL)
9055		*cur = resolver->spillat;
9056	if (min != NULL)
9057		*min = resolver->spillatmin;
9058	if (max != NULL)
9059		*max = resolver->spillatmax;
9060	UNLOCK(&resolver->lock);
9061}
9062
9063void
9064dns_resolver_setclientsperquery(dns_resolver_t *resolver, isc_uint32_t min,
9065				isc_uint32_t max)
9066{
9067	REQUIRE(VALID_RESOLVER(resolver));
9068
9069	LOCK(&resolver->lock);
9070	resolver->spillatmin = resolver->spillat = min;
9071	resolver->spillatmax = max;
9072	UNLOCK(&resolver->lock);
9073}
9074
9075isc_boolean_t
9076dns_resolver_getzeronosoattl(dns_resolver_t *resolver) {
9077	REQUIRE(VALID_RESOLVER(resolver));
9078
9079	return (resolver->zero_no_soa_ttl);
9080}
9081
9082void
9083dns_resolver_setzeronosoattl(dns_resolver_t *resolver, isc_boolean_t state) {
9084	REQUIRE(VALID_RESOLVER(resolver));
9085
9086	resolver->zero_no_soa_ttl = state;
9087}
9088
9089unsigned int
9090dns_resolver_getoptions(dns_resolver_t *resolver) {
9091	REQUIRE(VALID_RESOLVER(resolver));
9092
9093	return (resolver->options);
9094}
9095
9096unsigned int
9097dns_resolver_gettimeout(dns_resolver_t *resolver) {
9098	REQUIRE(VALID_RESOLVER(resolver));
9099
9100	return (resolver->query_timeout);
9101}
9102
9103void
9104dns_resolver_settimeout(dns_resolver_t *resolver, unsigned int seconds) {
9105	REQUIRE(VALID_RESOLVER(resolver));
9106
9107	if (seconds == 0)
9108		seconds = DEFAULT_QUERY_TIMEOUT;
9109	if (seconds > MAXIMUM_QUERY_TIMEOUT)
9110		seconds = MAXIMUM_QUERY_TIMEOUT;
9111	if (seconds < MINIMUM_QUERY_TIMEOUT)
9112		seconds =  MINIMUM_QUERY_TIMEOUT;
9113
9114	resolver->query_timeout = seconds;
9115}
9116
9117void
9118dns_resolver_setmaxdepth(dns_resolver_t *resolver, unsigned int maxdepth) {
9119	REQUIRE(VALID_RESOLVER(resolver));
9120	resolver->maxdepth = maxdepth;
9121}
9122
9123unsigned int
9124dns_resolver_getmaxdepth(dns_resolver_t *resolver) {
9125	REQUIRE(VALID_RESOLVER(resolver));
9126	return (resolver->maxdepth);
9127}
9128