1/*	$NetBSD: resolver.c,v 1.11.4.3 2012/12/15 05:39:59 riz Exp $	*/
2
3/*
4 * Copyright (C) 2004-2012  Internet Systems Consortium, Inc. ("ISC")
5 * Copyright (C) 1999-2003  Internet Software Consortium.
6 *
7 * Permission to use, copy, modify, and/or distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
13 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19
20/* Id */
21
22/*! \file */
23
24#include <config.h>
25
26#include <isc/platform.h>
27#include <isc/print.h>
28#include <isc/string.h>
29#include <isc/random.h>
30#include <isc/task.h>
31#include <isc/stats.h>
32#include <isc/timer.h>
33#include <isc/util.h>
34
35#include <dns/acl.h>
36#include <dns/adb.h>
37#include <dns/cache.h>
38#include <dns/db.h>
39#include <dns/dispatch.h>
40#include <dns/ds.h>
41#include <dns/events.h>
42#include <dns/forward.h>
43#include <dns/keytable.h>
44#include <dns/log.h>
45#include <dns/message.h>
46#include <dns/ncache.h>
47#include <dns/opcode.h>
48#include <dns/peer.h>
49#include <dns/rbt.h>
50#include <dns/rcode.h>
51#include <dns/rdata.h>
52#include <dns/rdataclass.h>
53#include <dns/rdatalist.h>
54#include <dns/rdataset.h>
55#include <dns/rdatastruct.h>
56#include <dns/rdatatype.h>
57#include <dns/resolver.h>
58#include <dns/result.h>
59#include <dns/rootns.h>
60#include <dns/stats.h>
61#include <dns/tsig.h>
62#include <dns/validator.h>
63
64#define DNS_RESOLVER_TRACE
65#ifdef DNS_RESOLVER_TRACE
66#define RTRACE(m)       isc_log_write(dns_lctx, \
67				      DNS_LOGCATEGORY_RESOLVER, \
68				      DNS_LOGMODULE_RESOLVER, \
69				      ISC_LOG_DEBUG(3), \
70				      "res %p: %s", res, (m))
71#define RRTRACE(r, m)   isc_log_write(dns_lctx, \
72				      DNS_LOGCATEGORY_RESOLVER, \
73				      DNS_LOGMODULE_RESOLVER, \
74				      ISC_LOG_DEBUG(3), \
75				      "res %p: %s", (r), (m))
76#define FCTXTRACE(m)    isc_log_write(dns_lctx, \
77				      DNS_LOGCATEGORY_RESOLVER, \
78				      DNS_LOGMODULE_RESOLVER, \
79				      ISC_LOG_DEBUG(3), \
80				      "fctx %p(%s'): %s", fctx, fctx->info, (m))
81#define FCTXTRACE2(m1, m2) \
82			isc_log_write(dns_lctx, \
83				      DNS_LOGCATEGORY_RESOLVER, \
84				      DNS_LOGMODULE_RESOLVER, \
85				      ISC_LOG_DEBUG(3), \
86				      "fctx %p(%s): %s %s", \
87				      fctx, fctx->info, (m1), (m2))
88#define FTRACE(m)       isc_log_write(dns_lctx, \
89				      DNS_LOGCATEGORY_RESOLVER, \
90				      DNS_LOGMODULE_RESOLVER, \
91				      ISC_LOG_DEBUG(3), \
92				      "fetch %p (fctx %p(%s)): %s", \
93				      fetch, fetch->private, \
94				      fetch->private->info, (m))
95#define QTRACE(m)       isc_log_write(dns_lctx, \
96				      DNS_LOGCATEGORY_RESOLVER, \
97				      DNS_LOGMODULE_RESOLVER, \
98				      ISC_LOG_DEBUG(3), \
99				      "resquery %p (fctx %p(%s)): %s", \
100				      query, query->fctx, \
101				      query->fctx->info, (m))
102#else
103#define RTRACE(m)
104#define RRTRACE(r, m)
105#define FCTXTRACE(m)
106#define FTRACE(m)
107#define QTRACE(m)
108#endif
109
110#define US_PER_SEC 1000000U
111/*
112 * The maximum time we will wait for a single query.
113 */
114#define MAX_SINGLE_QUERY_TIMEOUT 9U
115#define MAX_SINGLE_QUERY_TIMEOUT_US (MAX_SINGLE_QUERY_TIMEOUT*US_PER_SEC)
116
117/*
118 * We need to allow a individual query time to complete / timeout.
119 */
120#define MINIMUM_QUERY_TIMEOUT (MAX_SINGLE_QUERY_TIMEOUT + 1U)
121
122/* The default time in seconds for the whole query to live. */
123#ifndef DEFAULT_QUERY_TIMEOUT
124#define DEFAULT_QUERY_TIMEOUT MINIMUM_QUERY_TIMEOUT
125#endif
126
127#ifndef MAXIMUM_QUERY_TIMEOUT
128#define MAXIMUM_QUERY_TIMEOUT 30 /* The maximum time in seconds for the whole query to live. */
129#endif
130
131/*%
132 * Maximum EDNS0 input packet size.
133 */
134#define RECV_BUFFER_SIZE                4096            /* XXXRTH  Constant. */
135
136/*%
137 * This defines the maximum number of timeouts we will permit before we
138 * disable EDNS0 on the query.
139 */
140#define MAX_EDNS0_TIMEOUTS      3
141
142typedef struct fetchctx fetchctx_t;
143
144typedef struct query {
145	/* Locked by task event serialization. */
146	unsigned int			magic;
147	fetchctx_t *			fctx;
148	isc_mem_t *			mctx;
149	dns_dispatchmgr_t *		dispatchmgr;
150	dns_dispatch_t *		dispatch;
151	isc_boolean_t			exclusivesocket;
152	dns_adbaddrinfo_t *		addrinfo;
153	isc_socket_t *			tcpsocket;
154	isc_time_t			start;
155	dns_messageid_t			id;
156	dns_dispentry_t *		dispentry;
157	ISC_LINK(struct query)		link;
158	isc_buffer_t			buffer;
159	isc_buffer_t			*tsig;
160	dns_tsigkey_t			*tsigkey;
161	unsigned int			options;
162	unsigned int			attributes;
163	unsigned int			sends;
164	unsigned int			connects;
165	unsigned char			data[512];
166} resquery_t;
167
168#define QUERY_MAGIC			ISC_MAGIC('Q', '!', '!', '!')
169#define VALID_QUERY(query)		ISC_MAGIC_VALID(query, QUERY_MAGIC)
170
171#define RESQUERY_ATTR_CANCELED          0x02
172
173#define RESQUERY_CONNECTING(q)          ((q)->connects > 0)
174#define RESQUERY_CANCELED(q)            (((q)->attributes & \
175					  RESQUERY_ATTR_CANCELED) != 0)
176#define RESQUERY_SENDING(q)             ((q)->sends > 0)
177
178typedef enum {
179	fetchstate_init = 0,            /*%< Start event has not run yet. */
180	fetchstate_active,
181	fetchstate_done                 /*%< FETCHDONE events posted. */
182} fetchstate;
183
184typedef enum {
185	badns_unreachable = 0,
186	badns_response,
187	badns_validation
188} badnstype_t;
189
190struct fetchctx {
191	/*% Not locked. */
192	unsigned int			magic;
193	dns_resolver_t *		res;
194	dns_name_t			name;
195	dns_rdatatype_t			type;
196	unsigned int			options;
197	unsigned int			bucketnum;
198	char *				info;
199	isc_mem_t *			mctx;
200
201	/*% Locked by appropriate bucket lock. */
202	fetchstate			state;
203	isc_boolean_t			want_shutdown;
204	isc_boolean_t			cloned;
205	isc_boolean_t			spilled;
206	unsigned int			references;
207	isc_event_t			control_event;
208	ISC_LINK(struct fetchctx)       link;
209	ISC_LIST(dns_fetchevent_t)      events;
210	/*% Locked by task event serialization. */
211	dns_name_t			domain;
212	dns_rdataset_t			nameservers;
213	unsigned int			attributes;
214	isc_timer_t *			timer;
215	isc_time_t			expires;
216	isc_interval_t			interval;
217	dns_message_t *			qmessage;
218	dns_message_t *			rmessage;
219	ISC_LIST(resquery_t)		queries;
220	dns_adbfindlist_t		finds;
221	dns_adbfind_t *			find;
222	dns_adbfindlist_t		altfinds;
223	dns_adbfind_t *			altfind;
224	dns_adbaddrinfolist_t		forwaddrs;
225	dns_adbaddrinfolist_t		altaddrs;
226	isc_sockaddrlist_t		forwarders;
227	dns_fwdpolicy_t			fwdpolicy;
228	isc_sockaddrlist_t		bad;
229	isc_sockaddrlist_t		edns;
230	isc_sockaddrlist_t		edns512;
231	isc_sockaddrlist_t		bad_edns;
232	dns_validator_t			*validator;
233	ISC_LIST(dns_validator_t)       validators;
234	dns_db_t *			cache;
235	dns_adb_t *			adb;
236	isc_boolean_t			ns_ttl_ok;
237	isc_uint32_t			ns_ttl;
238
239	/*%
240	 * The number of events we're waiting for.
241	 */
242	unsigned int			pending;
243
244	/*%
245	 * The number of times we've "restarted" the current
246	 * nameserver set.  This acts as a failsafe to prevent
247	 * us from pounding constantly on a particular set of
248	 * servers that, for whatever reason, are not giving
249	 * us useful responses, but are responding in such a
250	 * way that they are not marked "bad".
251	 */
252	unsigned int			restarts;
253
254	/*%
255	 * The number of timeouts that have occurred since we
256	 * last successfully received a response packet.  This
257	 * is used for EDNS0 black hole detection.
258	 */
259	unsigned int			timeouts;
260
261	/*%
262	 * Look aside state for DS lookups.
263	 */
264	dns_name_t 			nsname;
265	dns_fetch_t *			nsfetch;
266	dns_rdataset_t			nsrrset;
267
268	/*%
269	 * Number of queries that reference this context.
270	 */
271	unsigned int			nqueries;
272
273	/*%
274	 * The reason to print when logging a successful
275	 * response to a query.
276	 */
277	const char *			reason;
278
279	/*%
280	 * Random numbers to use for mixing up server addresses.
281	 */
282	isc_uint32_t                    rand_buf;
283	isc_uint32_t                    rand_bits;
284
285	/*%
286	 * Fetch-local statistics for detailed logging.
287	 */
288	isc_result_t			result; /*%< fetch result  */
289	isc_result_t			vresult; /*%< validation result  */
290	int				exitline;
291	isc_time_t			start;
292	isc_uint64_t			duration;
293	isc_boolean_t			logged;
294	unsigned int			querysent;
295	unsigned int			referrals;
296	unsigned int			lamecount;
297	unsigned int			neterr;
298	unsigned int			badresp;
299	unsigned int			adberr;
300	unsigned int			findfail;
301	unsigned int			valfail;
302	isc_boolean_t			timeout;
303	dns_adbaddrinfo_t 		*addrinfo;
304	isc_sockaddr_t			*client;
305};
306
307#define FCTX_MAGIC			ISC_MAGIC('F', '!', '!', '!')
308#define VALID_FCTX(fctx)		ISC_MAGIC_VALID(fctx, FCTX_MAGIC)
309
310#define FCTX_ATTR_HAVEANSWER            0x0001
311#define FCTX_ATTR_GLUING                0x0002
312#define FCTX_ATTR_ADDRWAIT              0x0004
313#define FCTX_ATTR_SHUTTINGDOWN          0x0008
314#define FCTX_ATTR_WANTCACHE             0x0010
315#define FCTX_ATTR_WANTNCACHE            0x0020
316#define FCTX_ATTR_NEEDEDNS0             0x0040
317#define FCTX_ATTR_TRIEDFIND             0x0080
318#define FCTX_ATTR_TRIEDALT              0x0100
319
320#define HAVE_ANSWER(f)          (((f)->attributes & FCTX_ATTR_HAVEANSWER) != \
321				 0)
322#define GLUING(f)               (((f)->attributes & FCTX_ATTR_GLUING) != \
323				 0)
324#define ADDRWAIT(f)             (((f)->attributes & FCTX_ATTR_ADDRWAIT) != \
325				 0)
326#define SHUTTINGDOWN(f)         (((f)->attributes & FCTX_ATTR_SHUTTINGDOWN) \
327				 != 0)
328#define WANTCACHE(f)            (((f)->attributes & FCTX_ATTR_WANTCACHE) != 0)
329#define WANTNCACHE(f)           (((f)->attributes & FCTX_ATTR_WANTNCACHE) != 0)
330#define NEEDEDNS0(f)            (((f)->attributes & FCTX_ATTR_NEEDEDNS0) != 0)
331#define TRIEDFIND(f)            (((f)->attributes & FCTX_ATTR_TRIEDFIND) != 0)
332#define TRIEDALT(f)             (((f)->attributes & FCTX_ATTR_TRIEDALT) != 0)
333
334typedef struct {
335	dns_adbaddrinfo_t *		addrinfo;
336	fetchctx_t *			fctx;
337} dns_valarg_t;
338
339struct dns_fetch {
340	unsigned int			magic;
341	fetchctx_t *			private;
342};
343
344#define DNS_FETCH_MAGIC			ISC_MAGIC('F', 't', 'c', 'h')
345#define DNS_FETCH_VALID(fetch)		ISC_MAGIC_VALID(fetch, DNS_FETCH_MAGIC)
346
347typedef struct fctxbucket {
348	isc_task_t *			task;
349	isc_mutex_t			lock;
350	ISC_LIST(fetchctx_t)		fctxs;
351	isc_boolean_t			exiting;
352	isc_mem_t *			mctx;
353} fctxbucket_t;
354
355typedef struct alternate {
356	isc_boolean_t			isaddress;
357	union   {
358		isc_sockaddr_t		addr;
359		struct {
360			dns_name_t      name;
361			in_port_t       port;
362		} _n;
363	} _u;
364	ISC_LINK(struct alternate)      link;
365} alternate_t;
366
367typedef struct dns_badcache dns_badcache_t;
368struct dns_badcache {
369	dns_badcache_t *	next;
370	dns_rdatatype_t 	type;
371	isc_time_t		expire;
372	unsigned int		hashval;
373	dns_name_t		name;
374};
375#define DNS_BADCACHE_SIZE 1021
376#define DNS_BADCACHE_TTL(fctx) \
377	(((fctx)->res->lame_ttl > 30 ) ? (fctx)->res->lame_ttl : 30)
378
379struct dns_resolver {
380	/* Unlocked. */
381	unsigned int			magic;
382	isc_mem_t *			mctx;
383	isc_mutex_t			lock;
384	isc_mutex_t			nlock;
385	isc_mutex_t			primelock;
386	dns_rdataclass_t		rdclass;
387	isc_socketmgr_t *		socketmgr;
388	isc_timermgr_t *		timermgr;
389	isc_taskmgr_t *			taskmgr;
390	dns_view_t *			view;
391	isc_boolean_t			frozen;
392	unsigned int			options;
393	dns_dispatchmgr_t *		dispatchmgr;
394	dns_dispatch_t *		dispatchv4;
395	isc_boolean_t			exclusivev4;
396	dns_dispatch_t *		dispatchv6;
397	isc_boolean_t			exclusivev6;
398	unsigned int			ndisps;
399	unsigned int			nbuckets;
400	fctxbucket_t *			buckets;
401	isc_uint32_t			lame_ttl;
402	ISC_LIST(alternate_t)		alternates;
403	isc_uint16_t			udpsize;
404#if USE_ALGLOCK
405	isc_rwlock_t			alglock;
406#endif
407	dns_rbt_t *			algorithms;
408#if USE_MBSLOCK
409	isc_rwlock_t			mbslock;
410#endif
411	dns_rbt_t *			mustbesecure;
412	unsigned int			spillatmax;
413	unsigned int			spillatmin;
414	isc_timer_t *			spillattimer;
415	isc_boolean_t			zero_no_soa_ttl;
416	unsigned int			query_timeout;
417
418	/* Locked by lock. */
419	unsigned int			references;
420	isc_boolean_t			exiting;
421	isc_eventlist_t			whenshutdown;
422	unsigned int			activebuckets;
423	isc_boolean_t			priming;
424	unsigned int			spillat;	/* clients-per-query */
425	unsigned int			nextdisp;
426
427	/* Bad cache. */
428	dns_badcache_t  ** 		badcache;
429	unsigned int 			badcount;
430	unsigned int 			badhash;
431	unsigned int 			badsweep;
432
433	/* Locked by primelock. */
434	dns_fetch_t *			primefetch;
435	/* Locked by nlock. */
436	unsigned int			nfctx;
437};
438
439#define RES_MAGIC			ISC_MAGIC('R', 'e', 's', '!')
440#define VALID_RESOLVER(res)		ISC_MAGIC_VALID(res, RES_MAGIC)
441
442/*%
443 * Private addrinfo flags.  These must not conflict with DNS_FETCHOPT_NOEDNS0,
444 * which we also use as an addrinfo flag.
445 */
446#define FCTX_ADDRINFO_MARK              0x0001
447#define FCTX_ADDRINFO_FORWARDER         0x1000
448#define FCTX_ADDRINFO_TRIED             0x2000
449#define UNMARKED(a)                     (((a)->flags & FCTX_ADDRINFO_MARK) \
450					 == 0)
451#define ISFORWARDER(a)                  (((a)->flags & \
452					 FCTX_ADDRINFO_FORWARDER) != 0)
453#define TRIED(a)                        (((a)->flags & \
454					 FCTX_ADDRINFO_TRIED) != 0)
455
456#define NXDOMAIN(r) (((r)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
457#define NEGATIVE(r) (((r)->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
458
459static void destroy(dns_resolver_t *res);
460static void empty_bucket(dns_resolver_t *res);
461static isc_result_t resquery_send(resquery_t *query);
462static void resquery_response(isc_task_t *task, isc_event_t *event);
463static void resquery_connected(isc_task_t *task, isc_event_t *event);
464static void fctx_try(fetchctx_t *fctx, isc_boolean_t retrying,
465		     isc_boolean_t badcache);
466static void fctx_destroy(fetchctx_t *fctx);
467static isc_boolean_t fctx_unlink(fetchctx_t *fctx);
468static isc_result_t ncache_adderesult(dns_message_t *message,
469				      dns_db_t *cache, dns_dbnode_t *node,
470				      dns_rdatatype_t covers,
471				      isc_stdtime_t now, dns_ttl_t maxttl,
472				      isc_boolean_t optout,
473				      dns_rdataset_t *ardataset,
474				      isc_result_t *eresultp);
475static void validated(isc_task_t *task, isc_event_t *event);
476static isc_boolean_t maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked);
477static void add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
478		    isc_result_t reason, badnstype_t badtype);
479
480/*%
481 * Increment resolver-related statistics counters.
482 */
483static inline void
484inc_stats(dns_resolver_t *res, isc_statscounter_t counter) {
485	if (res->view->resstats != NULL)
486		isc_stats_increment(res->view->resstats, counter);
487}
488
489static isc_result_t
490valcreate(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, dns_name_t *name,
491	  dns_rdatatype_t type, dns_rdataset_t *rdataset,
492	  dns_rdataset_t *sigrdataset, unsigned int valoptions,
493	  isc_task_t *task)
494{
495	dns_validator_t *validator = NULL;
496	dns_valarg_t *valarg;
497	isc_result_t result;
498
499	valarg = isc_mem_get(fctx->mctx, sizeof(*valarg));
500	if (valarg == NULL)
501		return (ISC_R_NOMEMORY);
502
503	valarg->fctx = fctx;
504	valarg->addrinfo = addrinfo;
505
506	if (!ISC_LIST_EMPTY(fctx->validators))
507		INSIST((valoptions & DNS_VALIDATOR_DEFER) != 0);
508
509	result = dns_validator_create(fctx->res->view, name, type, rdataset,
510				      sigrdataset, fctx->rmessage,
511				      valoptions, task, validated, valarg,
512				      &validator);
513	if (result == ISC_R_SUCCESS) {
514		inc_stats(fctx->res, dns_resstatscounter_val);
515		if ((valoptions & DNS_VALIDATOR_DEFER) == 0) {
516			INSIST(fctx->validator == NULL);
517			fctx->validator = validator;
518		}
519		ISC_LIST_APPEND(fctx->validators, validator, link);
520	} else
521		isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
522	return (result);
523}
524
525static isc_boolean_t
526rrsig_fromchildzone(fetchctx_t *fctx, dns_rdataset_t *rdataset) {
527	dns_namereln_t namereln;
528	dns_rdata_rrsig_t rrsig;
529	dns_rdata_t rdata = DNS_RDATA_INIT;
530	int order;
531	isc_result_t result;
532	unsigned int labels;
533
534	for (result = dns_rdataset_first(rdataset);
535	     result == ISC_R_SUCCESS;
536	     result = dns_rdataset_next(rdataset)) {
537		dns_rdataset_current(rdataset, &rdata);
538		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
539		RUNTIME_CHECK(result == ISC_R_SUCCESS);
540		namereln = dns_name_fullcompare(&rrsig.signer, &fctx->domain,
541						&order, &labels);
542		if (namereln == dns_namereln_subdomain)
543			return (ISC_TRUE);
544		dns_rdata_reset(&rdata);
545	}
546	return (ISC_FALSE);
547}
548
549static isc_boolean_t
550fix_mustbedelegationornxdomain(dns_message_t *message, fetchctx_t *fctx) {
551	dns_name_t *name;
552	dns_name_t *domain = &fctx->domain;
553	dns_rdataset_t *rdataset;
554	dns_rdatatype_t type;
555	isc_result_t result;
556	isc_boolean_t keep_auth = ISC_FALSE;
557
558	if (message->rcode == dns_rcode_nxdomain)
559		return (ISC_FALSE);
560
561	/*
562	 * A DS RRset can appear anywhere in a zone, even for a delegation-only
563	 * zone.  So a response to an explicit query for this type should be
564	 * excluded from delegation-only fixup.
565	 *
566	 * SOA, NS, and DNSKEY can only exist at a zone apex, so a postive
567	 * response to a query for these types can never violate the
568	 * delegation-only assumption: if the query name is below a
569	 * zone cut, the response should normally be a referral, which should
570	 * be accepted; if the query name is below a zone cut but the server
571	 * happens to have authority for the zone of the query name, the
572	 * response is a (non-referral) answer.  But this does not violate
573	 * delegation-only because the query name must be in a different zone
574	 * due to the "apex-only" nature of these types.  Note that if the
575	 * remote server happens to have authority for a child zone of a
576	 * delegation-only zone, we may still incorrectly "fix" the response
577	 * with NXDOMAIN for queries for other types.  Unfortunately it's
578	 * generally impossible to differentiate this case from violation of
579	 * the delegation-only assumption.  Once the resolver learns the
580	 * correct zone cut, possibly via a separate query for an "apex-only"
581	 * type, queries for other types will be resolved correctly.
582	 *
583	 * A query for type ANY will be accepted if it hits an exceptional
584	 * type above in the answer section as it should be from a child
585	 * zone.
586	 *
587	 * Also accept answers with RRSIG records from the child zone.
588	 * Direct queries for RRSIG records should not be answered from
589	 * the parent zone.
590	 */
591
592	if (message->counts[DNS_SECTION_ANSWER] != 0 &&
593	    (fctx->type == dns_rdatatype_ns ||
594	     fctx->type == dns_rdatatype_ds ||
595	     fctx->type == dns_rdatatype_soa ||
596	     fctx->type == dns_rdatatype_any ||
597	     fctx->type == dns_rdatatype_rrsig ||
598	     fctx->type == dns_rdatatype_dnskey)) {
599		result = dns_message_firstname(message, DNS_SECTION_ANSWER);
600		while (result == ISC_R_SUCCESS) {
601			name = NULL;
602			dns_message_currentname(message, DNS_SECTION_ANSWER,
603						&name);
604			for (rdataset = ISC_LIST_HEAD(name->list);
605			     rdataset != NULL;
606			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
607				if (!dns_name_equal(name, &fctx->name))
608					continue;
609				type = rdataset->type;
610				/*
611				 * RRsig from child?
612				 */
613				if (type == dns_rdatatype_rrsig &&
614				    rrsig_fromchildzone(fctx, rdataset))
615					return (ISC_FALSE);
616				/*
617				 * Direct query for apex records or DS.
618				 */
619				if (fctx->type == type &&
620				    (type == dns_rdatatype_ds ||
621				     type == dns_rdatatype_ns ||
622				     type == dns_rdatatype_soa ||
623				     type == dns_rdatatype_dnskey))
624					return (ISC_FALSE);
625				/*
626				 * Indirect query for apex records or DS.
627				 */
628				if (fctx->type == dns_rdatatype_any &&
629				    (type == dns_rdatatype_ns ||
630				     type == dns_rdatatype_ds ||
631				     type == dns_rdatatype_soa ||
632				     type == dns_rdatatype_dnskey))
633					return (ISC_FALSE);
634			}
635			result = dns_message_nextname(message,
636						      DNS_SECTION_ANSWER);
637		}
638	}
639
640	/*
641	 * A NODATA response to a DS query?
642	 */
643	if (fctx->type == dns_rdatatype_ds &&
644	    message->counts[DNS_SECTION_ANSWER] == 0)
645		return (ISC_FALSE);
646
647	/* Look for referral or indication of answer from child zone? */
648	if (message->counts[DNS_SECTION_AUTHORITY] == 0)
649		goto munge;
650
651	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
652	while (result == ISC_R_SUCCESS) {
653		name = NULL;
654		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
655		for (rdataset = ISC_LIST_HEAD(name->list);
656		     rdataset != NULL;
657		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
658			type = rdataset->type;
659			if (type == dns_rdatatype_soa &&
660			    dns_name_equal(name, domain))
661				keep_auth = ISC_TRUE;
662
663			if (type != dns_rdatatype_ns &&
664			    type != dns_rdatatype_soa &&
665			    type != dns_rdatatype_rrsig)
666				continue;
667
668			if (type == dns_rdatatype_rrsig) {
669				if (rrsig_fromchildzone(fctx, rdataset))
670					return (ISC_FALSE);
671				else
672					continue;
673			}
674
675			/* NS or SOA records. */
676			if (dns_name_equal(name, domain)) {
677				/*
678				 * If a query for ANY causes a negative
679				 * response, we can be sure that this is
680				 * an empty node.  For other type of queries
681				 * we cannot differentiate an empty node
682				 * from a node that just doesn't have that
683				 * type of record.  We only accept the former
684				 * case.
685				 */
686				if (message->counts[DNS_SECTION_ANSWER] == 0 &&
687				    fctx->type == dns_rdatatype_any)
688					return (ISC_FALSE);
689			} else if (dns_name_issubdomain(name, domain)) {
690				/* Referral or answer from child zone. */
691				return (ISC_FALSE);
692			}
693		}
694		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
695	}
696
697 munge:
698	message->rcode = dns_rcode_nxdomain;
699	message->counts[DNS_SECTION_ANSWER] = 0;
700	if (!keep_auth)
701		message->counts[DNS_SECTION_AUTHORITY] = 0;
702	message->counts[DNS_SECTION_ADDITIONAL] = 0;
703	return (ISC_TRUE);
704}
705
706static inline isc_result_t
707fctx_starttimer(fetchctx_t *fctx) {
708	/*
709	 * Start the lifetime timer for fctx.
710	 *
711	 * This is also used for stopping the idle timer; in that
712	 * case we must purge events already posted to ensure that
713	 * no further idle events are delivered.
714	 */
715	return (isc_timer_reset(fctx->timer, isc_timertype_once,
716				&fctx->expires, NULL, ISC_TRUE));
717}
718
719static inline void
720fctx_stoptimer(fetchctx_t *fctx) {
721	isc_result_t result;
722
723	/*
724	 * We don't return a result if resetting the timer to inactive fails
725	 * since there's nothing to be done about it.  Resetting to inactive
726	 * should never fail anyway, since the code as currently written
727	 * cannot fail in that case.
728	 */
729	result = isc_timer_reset(fctx->timer, isc_timertype_inactive,
730				  NULL, NULL, ISC_TRUE);
731	if (result != ISC_R_SUCCESS) {
732		UNEXPECTED_ERROR(__FILE__, __LINE__,
733				 "isc_timer_reset(): %s",
734				 isc_result_totext(result));
735	}
736}
737
738
739static inline isc_result_t
740fctx_startidletimer(fetchctx_t *fctx, isc_interval_t *interval) {
741	/*
742	 * Start the idle timer for fctx.  The lifetime timer continues
743	 * to be in effect.
744	 */
745	return (isc_timer_reset(fctx->timer, isc_timertype_once,
746				&fctx->expires, interval, ISC_FALSE));
747}
748
749/*
750 * Stopping the idle timer is equivalent to calling fctx_starttimer(), but
751 * we use fctx_stopidletimer for readability in the code below.
752 */
753#define fctx_stopidletimer      fctx_starttimer
754
755
756static inline void
757resquery_destroy(resquery_t **queryp) {
758	resquery_t *query;
759
760	REQUIRE(queryp != NULL);
761	query = *queryp;
762	REQUIRE(!ISC_LINK_LINKED(query, link));
763
764	INSIST(query->tcpsocket == NULL);
765
766	query->fctx->nqueries--;
767	if (SHUTTINGDOWN(query->fctx)) {
768		dns_resolver_t *res = query->fctx->res;
769		if (maybe_destroy(query->fctx, ISC_FALSE))
770			empty_bucket(res);
771	}
772	query->magic = 0;
773	isc_mem_put(query->mctx, query, sizeof(*query));
774	*queryp = NULL;
775}
776
777static void
778fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp,
779		 isc_time_t *finish, isc_boolean_t no_response)
780{
781	fetchctx_t *fctx;
782	resquery_t *query;
783	unsigned int rtt, rttms;
784	unsigned int factor;
785	dns_adbfind_t *find;
786	dns_adbaddrinfo_t *addrinfo;
787	isc_socket_t *socket;
788
789	query = *queryp;
790	fctx = query->fctx;
791
792	FCTXTRACE("cancelquery");
793
794	REQUIRE(!RESQUERY_CANCELED(query));
795
796	query->attributes |= RESQUERY_ATTR_CANCELED;
797
798	/*
799	 * Should we update the RTT?
800	 */
801	if (finish != NULL || no_response) {
802		if (finish != NULL) {
803			/*
804			 * We have both the start and finish times for this
805			 * packet, so we can compute a real RTT.
806			 */
807			rtt = (unsigned int)isc_time_microdiff(finish,
808							       &query->start);
809			factor = DNS_ADB_RTTADJDEFAULT;
810
811			rttms = rtt / 1000;
812			if (rttms < DNS_RESOLVER_QRYRTTCLASS0) {
813				inc_stats(fctx->res,
814					  dns_resstatscounter_queryrtt0);
815			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS1) {
816				inc_stats(fctx->res,
817					  dns_resstatscounter_queryrtt1);
818			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS2) {
819				inc_stats(fctx->res,
820					  dns_resstatscounter_queryrtt2);
821			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS3) {
822				inc_stats(fctx->res,
823					  dns_resstatscounter_queryrtt3);
824			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS4) {
825				inc_stats(fctx->res,
826					  dns_resstatscounter_queryrtt4);
827			} else {
828				inc_stats(fctx->res,
829					  dns_resstatscounter_queryrtt5);
830			}
831		} else {
832			/*
833			 * We don't have an RTT for this query.  Maybe the
834			 * packet was lost, or maybe this server is very
835			 * slow.  We don't know.  Increase the RTT.
836			 */
837			INSIST(no_response);
838			rtt = query->addrinfo->srtt + 200000;
839			if (rtt > MAX_SINGLE_QUERY_TIMEOUT_US)
840				rtt = MAX_SINGLE_QUERY_TIMEOUT_US;
841			/*
842			 * Replace the current RTT with our value.
843			 */
844			factor = DNS_ADB_RTTADJREPLACE;
845		}
846		dns_adb_adjustsrtt(fctx->adb, query->addrinfo, rtt, factor);
847	}
848
849	/* Remember that the server has been tried. */
850	if (!TRIED(query->addrinfo)) {
851		dns_adb_changeflags(fctx->adb, query->addrinfo,
852				    FCTX_ADDRINFO_TRIED, FCTX_ADDRINFO_TRIED);
853	}
854
855	/*
856	 * Age RTTs of servers not tried.
857	 */
858	factor = DNS_ADB_RTTADJAGE;
859	if (finish != NULL)
860		for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
861		     addrinfo != NULL;
862		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
863			if (UNMARKED(addrinfo))
864				dns_adb_adjustsrtt(fctx->adb, addrinfo,
865						   0, factor);
866
867	if (finish != NULL && TRIEDFIND(fctx))
868		for (find = ISC_LIST_HEAD(fctx->finds);
869		     find != NULL;
870		     find = ISC_LIST_NEXT(find, publink))
871			for (addrinfo = ISC_LIST_HEAD(find->list);
872			     addrinfo != NULL;
873			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
874				if (UNMARKED(addrinfo))
875					dns_adb_adjustsrtt(fctx->adb, addrinfo,
876							   0, factor);
877
878	if (finish != NULL && TRIEDALT(fctx)) {
879		for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
880		     addrinfo != NULL;
881		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
882			if (UNMARKED(addrinfo))
883				dns_adb_adjustsrtt(fctx->adb, addrinfo,
884						   0, factor);
885		for (find = ISC_LIST_HEAD(fctx->altfinds);
886		     find != NULL;
887		     find = ISC_LIST_NEXT(find, publink))
888			for (addrinfo = ISC_LIST_HEAD(find->list);
889			     addrinfo != NULL;
890			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
891				if (UNMARKED(addrinfo))
892					dns_adb_adjustsrtt(fctx->adb, addrinfo,
893							   0, factor);
894	}
895
896	/*
897	 * Check for any outstanding socket events.  If they exist, cancel
898	 * them and let the event handlers finish the cleanup.  The resolver
899	 * only needs to worry about managing the connect and send events;
900	 * the dispatcher manages the recv events.
901	 */
902	if (RESQUERY_CONNECTING(query)) {
903		/*
904		 * Cancel the connect.
905		 */
906		if (query->tcpsocket != NULL) {
907			isc_socket_cancel(query->tcpsocket, NULL,
908					  ISC_SOCKCANCEL_CONNECT);
909		} else if (query->dispentry != NULL) {
910			INSIST(query->exclusivesocket);
911			socket = dns_dispatch_getentrysocket(query->dispentry);
912			if (socket != NULL)
913				isc_socket_cancel(socket, NULL,
914						  ISC_SOCKCANCEL_CONNECT);
915		}
916	} else if (RESQUERY_SENDING(query)) {
917		/*
918		 * Cancel the pending send.
919		 */
920		if (query->exclusivesocket && query->dispentry != NULL)
921			socket = dns_dispatch_getentrysocket(query->dispentry);
922		else
923			socket = dns_dispatch_getsocket(query->dispatch);
924		if (socket != NULL)
925			isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND);
926	}
927
928	if (query->dispentry != NULL)
929		dns_dispatch_removeresponse(&query->dispentry, deventp);
930
931	ISC_LIST_UNLINK(fctx->queries, query, link);
932
933	if (query->tsig != NULL)
934		isc_buffer_free(&query->tsig);
935
936	if (query->tsigkey != NULL)
937		dns_tsigkey_detach(&query->tsigkey);
938
939	if (query->dispatch != NULL)
940		dns_dispatch_detach(&query->dispatch);
941
942	if (! (RESQUERY_CONNECTING(query) || RESQUERY_SENDING(query)))
943		/*
944		 * It's safe to destroy the query now.
945		 */
946		resquery_destroy(&query);
947}
948
949static void
950fctx_cancelqueries(fetchctx_t *fctx, isc_boolean_t no_response) {
951	resquery_t *query, *next_query;
952
953	FCTXTRACE("cancelqueries");
954
955	for (query = ISC_LIST_HEAD(fctx->queries);
956	     query != NULL;
957	     query = next_query) {
958		next_query = ISC_LIST_NEXT(query, link);
959		fctx_cancelquery(&query, NULL, NULL, no_response);
960	}
961}
962
963static void
964fctx_cleanupfinds(fetchctx_t *fctx) {
965	dns_adbfind_t *find, *next_find;
966
967	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
968
969	for (find = ISC_LIST_HEAD(fctx->finds);
970	     find != NULL;
971	     find = next_find) {
972		next_find = ISC_LIST_NEXT(find, publink);
973		ISC_LIST_UNLINK(fctx->finds, find, publink);
974		dns_adb_destroyfind(&find);
975	}
976	fctx->find = NULL;
977}
978
979static void
980fctx_cleanupaltfinds(fetchctx_t *fctx) {
981	dns_adbfind_t *find, *next_find;
982
983	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
984
985	for (find = ISC_LIST_HEAD(fctx->altfinds);
986	     find != NULL;
987	     find = next_find) {
988		next_find = ISC_LIST_NEXT(find, publink);
989		ISC_LIST_UNLINK(fctx->altfinds, find, publink);
990		dns_adb_destroyfind(&find);
991	}
992	fctx->altfind = NULL;
993}
994
995static void
996fctx_cleanupforwaddrs(fetchctx_t *fctx) {
997	dns_adbaddrinfo_t *addr, *next_addr;
998
999	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1000
1001	for (addr = ISC_LIST_HEAD(fctx->forwaddrs);
1002	     addr != NULL;
1003	     addr = next_addr) {
1004		next_addr = ISC_LIST_NEXT(addr, publink);
1005		ISC_LIST_UNLINK(fctx->forwaddrs, addr, publink);
1006		dns_adb_freeaddrinfo(fctx->adb, &addr);
1007	}
1008}
1009
1010static void
1011fctx_cleanupaltaddrs(fetchctx_t *fctx) {
1012	dns_adbaddrinfo_t *addr, *next_addr;
1013
1014	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1015
1016	for (addr = ISC_LIST_HEAD(fctx->altaddrs);
1017	     addr != NULL;
1018	     addr = next_addr) {
1019		next_addr = ISC_LIST_NEXT(addr, publink);
1020		ISC_LIST_UNLINK(fctx->altaddrs, addr, publink);
1021		dns_adb_freeaddrinfo(fctx->adb, &addr);
1022	}
1023}
1024
1025static inline void
1026fctx_stopeverything(fetchctx_t *fctx, isc_boolean_t no_response) {
1027	FCTXTRACE("stopeverything");
1028	fctx_cancelqueries(fctx, no_response);
1029	fctx_cleanupfinds(fctx);
1030	fctx_cleanupaltfinds(fctx);
1031	fctx_cleanupforwaddrs(fctx);
1032	fctx_cleanupaltaddrs(fctx);
1033	fctx_stoptimer(fctx);
1034}
1035
1036static inline void
1037fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) {
1038	dns_fetchevent_t *event, *next_event;
1039	isc_task_t *task;
1040	unsigned int count = 0;
1041	isc_interval_t i;
1042	isc_boolean_t logit = ISC_FALSE;
1043	isc_time_t now;
1044	unsigned int old_spillat;
1045	unsigned int new_spillat = 0;	/* initialized to silence
1046					   compiler warnings */
1047
1048	/*
1049	 * Caller must be holding the appropriate bucket lock.
1050	 */
1051	REQUIRE(fctx->state == fetchstate_done);
1052
1053	FCTXTRACE("sendevents");
1054
1055	/*
1056	 * Keep some record of fetch result for logging later (if required).
1057	 */
1058	fctx->result = result;
1059	fctx->exitline = line;
1060	TIME_NOW(&now);
1061	fctx->duration = isc_time_microdiff(&now, &fctx->start);
1062
1063	for (event = ISC_LIST_HEAD(fctx->events);
1064	     event != NULL;
1065	     event = next_event) {
1066		next_event = ISC_LIST_NEXT(event, ev_link);
1067		ISC_LIST_UNLINK(fctx->events, event, ev_link);
1068		task = event->ev_sender;
1069		event->ev_sender = fctx;
1070		event->vresult = fctx->vresult;
1071		if (!HAVE_ANSWER(fctx))
1072			event->result = result;
1073
1074		INSIST(result != ISC_R_SUCCESS ||
1075		       dns_rdataset_isassociated(event->rdataset) ||
1076		       fctx->type == dns_rdatatype_any ||
1077		       fctx->type == dns_rdatatype_rrsig ||
1078		       fctx->type == dns_rdatatype_sig);
1079
1080		/*
1081		 * Negative results must be indicated in event->result.
1082		 */
1083		if (dns_rdataset_isassociated(event->rdataset) &&
1084		    NEGATIVE(event->rdataset)) {
1085			INSIST(event->result == DNS_R_NCACHENXDOMAIN ||
1086			       event->result == DNS_R_NCACHENXRRSET);
1087		}
1088
1089		isc_task_sendanddetach(&task, ISC_EVENT_PTR(&event));
1090		count++;
1091	}
1092
1093	if ((fctx->attributes & FCTX_ATTR_HAVEANSWER) != 0 &&
1094	    fctx->spilled &&
1095	    (count < fctx->res->spillatmax || fctx->res->spillatmax == 0)) {
1096		LOCK(&fctx->res->lock);
1097		if (count == fctx->res->spillat && !fctx->res->exiting) {
1098			old_spillat = fctx->res->spillat;
1099			fctx->res->spillat += 5;
1100			if (fctx->res->spillat > fctx->res->spillatmax &&
1101			    fctx->res->spillatmax != 0)
1102				fctx->res->spillat = fctx->res->spillatmax;
1103			new_spillat = fctx->res->spillat;
1104			if (new_spillat != old_spillat) {
1105				logit = ISC_TRUE;
1106			}
1107			isc_interval_set(&i, 20 * 60, 0);
1108			result = isc_timer_reset(fctx->res->spillattimer,
1109						 isc_timertype_ticker, NULL,
1110						 &i, ISC_TRUE);
1111			RUNTIME_CHECK(result == ISC_R_SUCCESS);
1112		}
1113		UNLOCK(&fctx->res->lock);
1114		if (logit)
1115			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
1116				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
1117				      "clients-per-query increased to %u",
1118				      new_spillat);
1119	}
1120}
1121
1122static inline void
1123log_edns(fetchctx_t *fctx) {
1124	char domainbuf[DNS_NAME_FORMATSIZE];
1125
1126	if (fctx->reason == NULL)
1127		return;
1128
1129	if (!isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(3)))
1130		return;
1131	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
1132	isc_log_write(dns_lctx, DNS_LOGCATEGORY_EDNS_DISABLED,
1133		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1134		      "success resolving '%s' (in '%s'?) after %s",
1135		      fctx->info, domainbuf, fctx->reason);
1136
1137	fctx->reason = NULL;
1138}
1139
1140static void
1141fctx_done(fetchctx_t *fctx, isc_result_t result, int line) {
1142	dns_resolver_t *res;
1143	isc_boolean_t no_response;
1144
1145	REQUIRE(line >= 0);
1146
1147	FCTXTRACE("done");
1148
1149	res = fctx->res;
1150
1151	if (result == ISC_R_SUCCESS) {
1152		/*%
1153		 * Log any deferred EDNS timeout messages.
1154		 */
1155		log_edns(fctx);
1156		no_response = ISC_TRUE;
1157	 } else
1158		no_response = ISC_FALSE;
1159
1160	fctx->reason = NULL;
1161	fctx_stopeverything(fctx, no_response);
1162
1163	LOCK(&res->buckets[fctx->bucketnum].lock);
1164
1165	fctx->state = fetchstate_done;
1166	fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1167	fctx_sendevents(fctx, result, line);
1168
1169	UNLOCK(&res->buckets[fctx->bucketnum].lock);
1170}
1171
1172static void
1173process_sendevent(resquery_t *query, isc_event_t *event) {
1174	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
1175	isc_boolean_t retry = ISC_FALSE;
1176	isc_result_t result;
1177	fetchctx_t *fctx;
1178
1179	fctx = query->fctx;
1180
1181	if (RESQUERY_CANCELED(query)) {
1182		if (query->sends == 0 && query->connects == 0) {
1183			/*
1184			 * This query was canceled while the
1185			 * isc_socket_sendto/connect() was in progress.
1186			 */
1187			if (query->tcpsocket != NULL)
1188				isc_socket_detach(&query->tcpsocket);
1189			resquery_destroy(&query);
1190		}
1191	} else {
1192		switch (sevent->result) {
1193		case ISC_R_SUCCESS:
1194			break;
1195
1196		case ISC_R_HOSTUNREACH:
1197		case ISC_R_NETUNREACH:
1198		case ISC_R_NOPERM:
1199		case ISC_R_ADDRNOTAVAIL:
1200		case ISC_R_CONNREFUSED:
1201
1202			/*
1203			 * No route to remote.
1204			 */
1205			add_bad(fctx, query->addrinfo, sevent->result,
1206				badns_unreachable);
1207			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
1208			retry = ISC_TRUE;
1209			break;
1210
1211		default:
1212			fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
1213			break;
1214		}
1215	}
1216
1217	isc_event_free(&event);
1218
1219	if (retry) {
1220		/*
1221		 * Behave as if the idle timer has expired.  For TCP
1222		 * this may not actually reflect the latest timer.
1223		 */
1224		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
1225		result = fctx_stopidletimer(fctx);
1226		if (result != ISC_R_SUCCESS)
1227			fctx_done(fctx, result, __LINE__);
1228		else
1229			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
1230	}
1231}
1232
1233static void
1234resquery_udpconnected(isc_task_t *task, isc_event_t *event) {
1235	resquery_t *query = event->ev_arg;
1236
1237	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
1238
1239	QTRACE("udpconnected");
1240
1241	UNUSED(task);
1242
1243	INSIST(RESQUERY_CONNECTING(query));
1244
1245	query->connects--;
1246
1247	process_sendevent(query, event);
1248}
1249
1250static void
1251resquery_senddone(isc_task_t *task, isc_event_t *event) {
1252	resquery_t *query = event->ev_arg;
1253
1254	REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE);
1255
1256	QTRACE("senddone");
1257
1258	/*
1259	 * XXXRTH
1260	 *
1261	 * Currently we don't wait for the senddone event before retrying
1262	 * a query.  This means that if we get really behind, we may end
1263	 * up doing extra work!
1264	 */
1265
1266	UNUSED(task);
1267
1268	INSIST(RESQUERY_SENDING(query));
1269
1270	query->sends--;
1271
1272	process_sendevent(query, event);
1273}
1274
1275static inline isc_result_t
1276fctx_addopt(dns_message_t *message, unsigned int version,
1277	    isc_uint16_t udpsize, isc_boolean_t request_nsid)
1278{
1279	dns_rdataset_t *rdataset;
1280	dns_rdatalist_t *rdatalist;
1281	dns_rdata_t *rdata;
1282	isc_result_t result;
1283
1284	rdatalist = NULL;
1285	result = dns_message_gettemprdatalist(message, &rdatalist);
1286	if (result != ISC_R_SUCCESS)
1287		return (result);
1288	rdata = NULL;
1289	result = dns_message_gettemprdata(message, &rdata);
1290	if (result != ISC_R_SUCCESS)
1291		return (result);
1292	rdataset = NULL;
1293	result = dns_message_gettemprdataset(message, &rdataset);
1294	if (result != ISC_R_SUCCESS)
1295		return (result);
1296	dns_rdataset_init(rdataset);
1297
1298	rdatalist->type = dns_rdatatype_opt;
1299	rdatalist->covers = 0;
1300
1301	/*
1302	 * Set Maximum UDP buffer size.
1303	 */
1304	rdatalist->rdclass = udpsize;
1305
1306	/*
1307	 * Set EXTENDED-RCODE and Z to 0, DO to 1.
1308	 */
1309	rdatalist->ttl = (version << 16);
1310	rdatalist->ttl |= DNS_MESSAGEEXTFLAG_DO;
1311
1312	/*
1313	 * Set EDNS options if applicable
1314	 */
1315	if (request_nsid) {
1316		/* Send empty NSID option (RFC5001) */
1317		unsigned char data[4];
1318		isc_buffer_t buf;
1319
1320		isc_buffer_init(&buf, data, sizeof(data));
1321		isc_buffer_putuint16(&buf, DNS_OPT_NSID);
1322		isc_buffer_putuint16(&buf, 0);
1323		rdata->data = data;
1324		rdata->length = sizeof(data);
1325	} else {
1326		rdata->data = NULL;
1327		rdata->length = 0;
1328	}
1329
1330	rdata->rdclass = rdatalist->rdclass;
1331	rdata->type = rdatalist->type;
1332	rdata->flags = 0;
1333
1334	ISC_LIST_INIT(rdatalist->rdata);
1335	ISC_LIST_APPEND(rdatalist->rdata, rdata, link);
1336	RUNTIME_CHECK(dns_rdatalist_tordataset(rdatalist, rdataset) == ISC_R_SUCCESS);
1337
1338	return (dns_message_setopt(message, rdataset));
1339}
1340
1341static inline void
1342fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
1343	unsigned int seconds;
1344	unsigned int us;
1345
1346	/*
1347	 * We retry every .8 seconds the first two times through the address
1348	 * list, and then we do exponential back-off.
1349	 */
1350	if (fctx->restarts < 3)
1351		us = 800000;
1352	else
1353		us = (800000 << (fctx->restarts - 2));
1354
1355	/*
1356	 * Add a fudge factor to the expected rtt based on the current
1357	 * estimate.
1358	 */
1359	if (rtt < 50000)
1360		rtt += 50000;
1361	else if (rtt < 100000)
1362		rtt += 100000;
1363	else
1364		rtt += 200000;
1365
1366	/*
1367	 * Always wait for at least the expected rtt.
1368	 */
1369	if (us < rtt)
1370		us = rtt;
1371
1372	/*
1373	 * But don't ever wait for more than 10 seconds.
1374	 */
1375	if (us > MAX_SINGLE_QUERY_TIMEOUT_US)
1376		us = MAX_SINGLE_QUERY_TIMEOUT_US;
1377
1378	seconds = us / US_PER_SEC;
1379	us -= seconds * US_PER_SEC;
1380	isc_interval_set(&fctx->interval, seconds, us * 1000);
1381}
1382
1383static isc_result_t
1384fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
1385	   unsigned int options)
1386{
1387	dns_resolver_t *res;
1388	isc_task_t *task;
1389	isc_result_t result;
1390	resquery_t *query;
1391	isc_sockaddr_t addr;
1392	isc_boolean_t have_addr = ISC_FALSE;
1393	unsigned int srtt;
1394
1395	FCTXTRACE("query");
1396
1397	res = fctx->res;
1398	task = res->buckets[fctx->bucketnum].task;
1399
1400	srtt = addrinfo->srtt;
1401
1402	/*
1403	 * A forwarder needs to make multiple queries. Give it at least
1404	 * a second to do these in.
1405	 */
1406	if (ISFORWARDER(addrinfo) && srtt < 1000000)
1407		srtt = 1000000;
1408
1409	fctx_setretryinterval(fctx, srtt);
1410	result = fctx_startidletimer(fctx, &fctx->interval);
1411	if (result != ISC_R_SUCCESS)
1412		return (result);
1413
1414	INSIST(ISC_LIST_EMPTY(fctx->validators));
1415
1416	dns_message_reset(fctx->rmessage, DNS_MESSAGE_INTENTPARSE);
1417
1418	query = isc_mem_get(fctx->mctx, sizeof(*query));
1419	if (query == NULL) {
1420		result = ISC_R_NOMEMORY;
1421		goto stop_idle_timer;
1422	}
1423	query->mctx = fctx->mctx;
1424	query->options = options;
1425	query->attributes = 0;
1426	query->sends = 0;
1427	query->connects = 0;
1428	/*
1429	 * Note that the caller MUST guarantee that 'addrinfo' will remain
1430	 * valid until this query is canceled.
1431	 */
1432	query->addrinfo = addrinfo;
1433	TIME_NOW(&query->start);
1434
1435	/*
1436	 * If this is a TCP query, then we need to make a socket and
1437	 * a dispatch for it here.  Otherwise we use the resolver's
1438	 * shared dispatch.
1439	 */
1440	query->dispatchmgr = res->dispatchmgr;
1441	query->dispatch = NULL;
1442	query->exclusivesocket = ISC_FALSE;
1443	query->tcpsocket = NULL;
1444	if (res->view->peers != NULL) {
1445		dns_peer_t *peer = NULL;
1446		isc_netaddr_t dstip;
1447		isc_netaddr_fromsockaddr(&dstip, &addrinfo->sockaddr);
1448		result = dns_peerlist_peerbyaddr(res->view->peers,
1449						 &dstip, &peer);
1450		if (result == ISC_R_SUCCESS) {
1451			result = dns_peer_getquerysource(peer, &addr);
1452			if (result == ISC_R_SUCCESS)
1453				have_addr = ISC_TRUE;
1454		}
1455	}
1456
1457	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1458		int pf;
1459
1460		pf = isc_sockaddr_pf(&addrinfo->sockaddr);
1461		if (!have_addr) {
1462			switch (pf) {
1463			case PF_INET:
1464				result =
1465				  dns_dispatch_getlocaladdress(res->dispatchv4,
1466							       &addr);
1467				break;
1468			case PF_INET6:
1469				result =
1470				  dns_dispatch_getlocaladdress(res->dispatchv6,
1471							       &addr);
1472				break;
1473			default:
1474				result = ISC_R_NOTIMPLEMENTED;
1475				break;
1476			}
1477			if (result != ISC_R_SUCCESS)
1478				goto cleanup_query;
1479		}
1480		isc_sockaddr_setport(&addr, 0);
1481
1482		result = isc_socket_create(res->socketmgr, pf,
1483					   isc_sockettype_tcp,
1484					   &query->tcpsocket);
1485		if (result != ISC_R_SUCCESS)
1486			goto cleanup_query;
1487
1488#ifndef BROKEN_TCP_BIND_BEFORE_CONNECT
1489		result = isc_socket_bind(query->tcpsocket, &addr, 0);
1490		if (result != ISC_R_SUCCESS)
1491			goto cleanup_socket;
1492#endif
1493
1494		/*
1495		 * A dispatch will be created once the connect succeeds.
1496		 */
1497	} else {
1498		if (have_addr) {
1499			unsigned int attrs, attrmask;
1500			attrs = DNS_DISPATCHATTR_UDP;
1501			switch (isc_sockaddr_pf(&addr)) {
1502			case AF_INET:
1503				attrs |= DNS_DISPATCHATTR_IPV4;
1504				break;
1505			case AF_INET6:
1506				attrs |= DNS_DISPATCHATTR_IPV6;
1507				break;
1508			default:
1509				result = ISC_R_NOTIMPLEMENTED;
1510				goto cleanup_query;
1511			}
1512			attrmask = DNS_DISPATCHATTR_UDP;
1513			attrmask |= DNS_DISPATCHATTR_TCP;
1514			attrmask |= DNS_DISPATCHATTR_IPV4;
1515			attrmask |= DNS_DISPATCHATTR_IPV6;
1516			result = dns_dispatch_getudp(res->dispatchmgr,
1517						     res->socketmgr,
1518						     res->taskmgr, &addr,
1519						     4096, 1000, 32768, 16411,
1520						     16433, attrs, attrmask,
1521						     &query->dispatch);
1522			if (result != ISC_R_SUCCESS)
1523				goto cleanup_query;
1524		} else {
1525			switch (isc_sockaddr_pf(&addrinfo->sockaddr)) {
1526			case PF_INET:
1527				dns_dispatch_attach(res->dispatchv4,
1528						    &query->dispatch);
1529				query->exclusivesocket = res->exclusivev4;
1530				break;
1531			case PF_INET6:
1532				dns_dispatch_attach(res->dispatchv6,
1533						    &query->dispatch);
1534				query->exclusivesocket = res->exclusivev6;
1535				break;
1536			default:
1537				result = ISC_R_NOTIMPLEMENTED;
1538				goto cleanup_query;
1539			}
1540		}
1541		/*
1542		 * We should always have a valid dispatcher here.  If we
1543		 * don't support a protocol family, then its dispatcher
1544		 * will be NULL, but we shouldn't be finding addresses for
1545		 * protocol types we don't support, so the dispatcher
1546		 * we found should never be NULL.
1547		 */
1548		INSIST(query->dispatch != NULL);
1549	}
1550
1551	query->dispentry = NULL;
1552	query->fctx = fctx;
1553	query->tsig = NULL;
1554	query->tsigkey = NULL;
1555	ISC_LINK_INIT(query, link);
1556	query->magic = QUERY_MAGIC;
1557
1558	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1559		/*
1560		 * Connect to the remote server.
1561		 *
1562		 * XXXRTH  Should we attach to the socket?
1563		 */
1564		result = isc_socket_connect(query->tcpsocket,
1565					    &addrinfo->sockaddr, task,
1566					    resquery_connected, query);
1567		if (result != ISC_R_SUCCESS)
1568			goto cleanup_socket;
1569		query->connects++;
1570		QTRACE("connecting via TCP");
1571	} else {
1572		result = resquery_send(query);
1573		if (result != ISC_R_SUCCESS)
1574			goto cleanup_dispatch;
1575	}
1576	fctx->querysent++;
1577
1578	ISC_LIST_APPEND(fctx->queries, query, link);
1579	query->fctx->nqueries++;
1580	if (isc_sockaddr_pf(&addrinfo->sockaddr) == PF_INET)
1581		inc_stats(res, dns_resstatscounter_queryv4);
1582	else
1583		inc_stats(res, dns_resstatscounter_queryv6);
1584	if (res->view->resquerystats != NULL)
1585		dns_rdatatypestats_increment(res->view->resquerystats,
1586					     fctx->type);
1587
1588	return (ISC_R_SUCCESS);
1589
1590 cleanup_socket:
1591	isc_socket_detach(&query->tcpsocket);
1592
1593 cleanup_dispatch:
1594	if (query->dispatch != NULL)
1595		dns_dispatch_detach(&query->dispatch);
1596
1597 cleanup_query:
1598	if (query->connects == 0) {
1599		query->magic = 0;
1600		isc_mem_put(fctx->mctx, query, sizeof(*query));
1601	}
1602
1603 stop_idle_timer:
1604	RUNTIME_CHECK(fctx_stopidletimer(fctx) == ISC_R_SUCCESS);
1605
1606	return (result);
1607}
1608
1609static isc_boolean_t
1610bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1611	isc_sockaddr_t *sa;
1612
1613	for (sa = ISC_LIST_HEAD(fctx->bad_edns);
1614	     sa != NULL;
1615	     sa = ISC_LIST_NEXT(sa, link)) {
1616		if (isc_sockaddr_equal(sa, address))
1617			return (ISC_TRUE);
1618	}
1619
1620	return (ISC_FALSE);
1621}
1622
1623static void
1624add_bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1625	isc_sockaddr_t *sa;
1626
1627	if (bad_edns(fctx, address))
1628		return;
1629
1630	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1631	if (sa == NULL)
1632		return;
1633
1634	*sa = *address;
1635	ISC_LIST_INITANDAPPEND(fctx->bad_edns, sa, link);
1636}
1637
1638static isc_boolean_t
1639triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1640	isc_sockaddr_t *sa;
1641
1642	for (sa = ISC_LIST_HEAD(fctx->edns);
1643	     sa != NULL;
1644	     sa = ISC_LIST_NEXT(sa, link)) {
1645		if (isc_sockaddr_equal(sa, address))
1646			return (ISC_TRUE);
1647	}
1648
1649	return (ISC_FALSE);
1650}
1651
1652static void
1653add_triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
1654	isc_sockaddr_t *sa;
1655
1656	if (triededns(fctx, address))
1657		return;
1658
1659	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1660	if (sa == NULL)
1661		return;
1662
1663	*sa = *address;
1664	ISC_LIST_INITANDAPPEND(fctx->edns, sa, link);
1665}
1666
1667static isc_boolean_t
1668triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1669	isc_sockaddr_t *sa;
1670
1671	for (sa = ISC_LIST_HEAD(fctx->edns512);
1672	     sa != NULL;
1673	     sa = ISC_LIST_NEXT(sa, link)) {
1674		if (isc_sockaddr_equal(sa, address))
1675			return (ISC_TRUE);
1676	}
1677
1678	return (ISC_FALSE);
1679}
1680
1681static void
1682add_triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
1683	isc_sockaddr_t *sa;
1684
1685	if (triededns512(fctx, address))
1686		return;
1687
1688	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
1689	if (sa == NULL)
1690		return;
1691
1692	*sa = *address;
1693	ISC_LIST_INITANDAPPEND(fctx->edns512, sa, link);
1694}
1695
1696static isc_result_t
1697resquery_send(resquery_t *query) {
1698	fetchctx_t *fctx;
1699	isc_result_t result;
1700	dns_name_t *qname = NULL;
1701	dns_rdataset_t *qrdataset = NULL;
1702	isc_region_t r;
1703	dns_resolver_t *res;
1704	isc_task_t *task;
1705	isc_socket_t *socket;
1706	isc_buffer_t tcpbuffer;
1707	isc_sockaddr_t *address;
1708	isc_buffer_t *buffer;
1709	isc_netaddr_t ipaddr;
1710	dns_tsigkey_t *tsigkey = NULL;
1711	dns_peer_t *peer = NULL;
1712	isc_boolean_t useedns;
1713	dns_compress_t cctx;
1714	isc_boolean_t cleanup_cctx = ISC_FALSE;
1715	isc_boolean_t secure_domain;
1716	isc_boolean_t connecting = ISC_FALSE;
1717
1718	fctx = query->fctx;
1719	QTRACE("send");
1720
1721	res = fctx->res;
1722	task = res->buckets[fctx->bucketnum].task;
1723	address = NULL;
1724
1725	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1726		/*
1727		 * Reserve space for the TCP message length.
1728		 */
1729		isc_buffer_init(&tcpbuffer, query->data, sizeof(query->data));
1730		isc_buffer_init(&query->buffer, query->data + 2,
1731				sizeof(query->data) - 2);
1732		buffer = &tcpbuffer;
1733	} else {
1734		isc_buffer_init(&query->buffer, query->data,
1735				sizeof(query->data));
1736		buffer = &query->buffer;
1737	}
1738
1739	result = dns_message_gettempname(fctx->qmessage, &qname);
1740	if (result != ISC_R_SUCCESS)
1741		goto cleanup_temps;
1742	result = dns_message_gettemprdataset(fctx->qmessage, &qrdataset);
1743	if (result != ISC_R_SUCCESS)
1744		goto cleanup_temps;
1745
1746	/*
1747	 * Get a query id from the dispatch.
1748	 */
1749	result = dns_dispatch_addresponse2(query->dispatch,
1750					   &query->addrinfo->sockaddr,
1751					   task,
1752					   resquery_response,
1753					   query,
1754					   &query->id,
1755					   &query->dispentry,
1756					   res->socketmgr);
1757	if (result != ISC_R_SUCCESS)
1758		goto cleanup_temps;
1759
1760	fctx->qmessage->opcode = dns_opcode_query;
1761
1762	/*
1763	 * Set up question.
1764	 */
1765	dns_name_init(qname, NULL);
1766	dns_name_clone(&fctx->name, qname);
1767	dns_rdataset_init(qrdataset);
1768	dns_rdataset_makequestion(qrdataset, res->rdclass, fctx->type);
1769	ISC_LIST_APPEND(qname->list, qrdataset, link);
1770	dns_message_addname(fctx->qmessage, qname, DNS_SECTION_QUESTION);
1771	qname = NULL;
1772	qrdataset = NULL;
1773
1774	/*
1775	 * Set RD if the client has requested that we do a recursive query,
1776	 * or if we're sending to a forwarder.
1777	 */
1778	if ((query->options & DNS_FETCHOPT_RECURSIVE) != 0 ||
1779	    ISFORWARDER(query->addrinfo))
1780		fctx->qmessage->flags |= DNS_MESSAGEFLAG_RD;
1781
1782	/*
1783	 * Set CD if the client says don't validate or the question is
1784	 * under a secure entry point.
1785	 */
1786	if ((query->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
1787		fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1788	} else if (res->view->enablevalidation) {
1789		result = dns_view_issecuredomain(res->view, &fctx->name,
1790						 &secure_domain);
1791		if (result != ISC_R_SUCCESS)
1792			secure_domain = ISC_FALSE;
1793		if (res->view->dlv != NULL)
1794			secure_domain = ISC_TRUE;
1795		if (secure_domain)
1796			fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
1797	}
1798
1799	/*
1800	 * We don't have to set opcode because it defaults to query.
1801	 */
1802	fctx->qmessage->id = query->id;
1803
1804	/*
1805	 * Convert the question to wire format.
1806	 */
1807	result = dns_compress_init(&cctx, -1, fctx->res->mctx);
1808	if (result != ISC_R_SUCCESS)
1809		goto cleanup_message;
1810	cleanup_cctx = ISC_TRUE;
1811
1812	result = dns_message_renderbegin(fctx->qmessage, &cctx,
1813					 &query->buffer);
1814	if (result != ISC_R_SUCCESS)
1815		goto cleanup_message;
1816
1817	result = dns_message_rendersection(fctx->qmessage,
1818					   DNS_SECTION_QUESTION, 0);
1819	if (result != ISC_R_SUCCESS)
1820		goto cleanup_message;
1821
1822	peer = NULL;
1823	isc_netaddr_fromsockaddr(&ipaddr, &query->addrinfo->sockaddr);
1824	(void) dns_peerlist_peerbyaddr(fctx->res->view->peers, &ipaddr, &peer);
1825
1826	/*
1827	 * The ADB does not know about servers with "edns no".  Check this,
1828	 * and then inform the ADB for future use.
1829	 */
1830	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0 &&
1831	    peer != NULL &&
1832	    dns_peer_getsupportedns(peer, &useedns) == ISC_R_SUCCESS &&
1833	    !useedns)
1834	{
1835		query->options |= DNS_FETCHOPT_NOEDNS0;
1836		dns_adb_changeflags(fctx->adb, query->addrinfo,
1837				    DNS_FETCHOPT_NOEDNS0,
1838				    DNS_FETCHOPT_NOEDNS0);
1839	}
1840
1841	/* Sync NOEDNS0 flag in addrinfo->flags and options now. */
1842	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) != 0)
1843		query->options |= DNS_FETCHOPT_NOEDNS0;
1844
1845	/*
1846	 * Handle timeouts by reducing the UDP response size to 512 bytes
1847	 * then if that doesn't work disabling EDNS (includes DO) and CD.
1848	 *
1849	 * These timeout can be due to:
1850	 *	* broken nameservers that don't respond to EDNS queries.
1851	 *	* broken/misconfigured firewalls and NAT implementations
1852	 *	  that don't handle IP fragmentation.
1853	 *	* broken/misconfigured firewalls that don't handle responses
1854	 *	  greater than 512 bytes.
1855	 *	* broken/misconfigured firewalls that don't handle EDNS, DO
1856	 *	  or CD.
1857	 *	* packet loss / link outage.
1858	 */
1859	if (fctx->timeout) {
1860		if ((triededns512(fctx, &query->addrinfo->sockaddr) ||
1861		     fctx->timeouts >= (MAX_EDNS0_TIMEOUTS * 2)) &&
1862		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1863			query->options |= DNS_FETCHOPT_NOEDNS0;
1864			fctx->reason = "disabling EDNS";
1865		} else if ((triededns(fctx, &query->addrinfo->sockaddr) ||
1866			    fctx->timeouts >= MAX_EDNS0_TIMEOUTS) &&
1867			   (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1868			query->options |= DNS_FETCHOPT_EDNS512;
1869			fctx->reason = "reducing the advertised EDNS UDP "
1870				       "packet size to 512 octets";
1871		}
1872		fctx->timeout = ISC_FALSE;
1873	}
1874
1875	/*
1876	 * Use EDNS0, unless the caller doesn't want it, or we know that
1877	 * the remote server doesn't like it.
1878	 */
1879	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1880		if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0) {
1881			unsigned int version = 0;       /* Default version. */
1882			unsigned int flags;
1883			isc_uint16_t udpsize = res->udpsize;
1884			isc_boolean_t reqnsid = res->view->requestnsid;
1885
1886			flags = query->addrinfo->flags;
1887			if ((flags & DNS_FETCHOPT_EDNSVERSIONSET) != 0) {
1888				version = flags & DNS_FETCHOPT_EDNSVERSIONMASK;
1889				version >>= DNS_FETCHOPT_EDNSVERSIONSHIFT;
1890			}
1891			if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1892				udpsize = 512;
1893			else if (peer != NULL)
1894				(void)dns_peer_getudpsize(peer, &udpsize);
1895
1896			/* request NSID for current view or peer? */
1897			if (peer != NULL)
1898				(void) dns_peer_getrequestnsid(peer, &reqnsid);
1899			result = fctx_addopt(fctx->qmessage, version,
1900					     udpsize, reqnsid);
1901			if (reqnsid && result == ISC_R_SUCCESS) {
1902				query->options |= DNS_FETCHOPT_WANTNSID;
1903			} else if (result != ISC_R_SUCCESS) {
1904				/*
1905				 * We couldn't add the OPT, but we'll press on.
1906				 * We're not using EDNS0, so set the NOEDNS0
1907				 * bit.
1908				 */
1909				query->options |= DNS_FETCHOPT_NOEDNS0;
1910			}
1911		} else {
1912			/*
1913			 * We know this server doesn't like EDNS0, so we
1914			 * won't use it.  Set the NOEDNS0 bit since we're
1915			 * not using EDNS0.
1916			 */
1917			query->options |= DNS_FETCHOPT_NOEDNS0;
1918		}
1919	}
1920
1921	/*
1922	 * If we need EDNS0 to do this query and aren't using it, we lose.
1923	 */
1924	if (NEEDEDNS0(fctx) && (query->options & DNS_FETCHOPT_NOEDNS0) != 0) {
1925		result = DNS_R_SERVFAIL;
1926		goto cleanup_message;
1927	}
1928
1929	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0)
1930		add_triededns(fctx, &query->addrinfo->sockaddr);
1931
1932	if ((query->options & DNS_FETCHOPT_EDNS512) != 0)
1933		add_triededns512(fctx, &query->addrinfo->sockaddr);
1934
1935	/*
1936	 * Clear CD if EDNS is not in use.
1937	 */
1938	if ((query->options & DNS_FETCHOPT_NOEDNS0) != 0)
1939		fctx->qmessage->flags &= ~DNS_MESSAGEFLAG_CD;
1940
1941	/*
1942	 * Add TSIG record tailored to the current recipient.
1943	 */
1944	result = dns_view_getpeertsig(fctx->res->view, &ipaddr, &tsigkey);
1945	if (result != ISC_R_SUCCESS && result != ISC_R_NOTFOUND)
1946		goto cleanup_message;
1947
1948	if (tsigkey != NULL) {
1949		result = dns_message_settsigkey(fctx->qmessage, tsigkey);
1950		dns_tsigkey_detach(&tsigkey);
1951		if (result != ISC_R_SUCCESS)
1952			goto cleanup_message;
1953	}
1954
1955	result = dns_message_rendersection(fctx->qmessage,
1956					   DNS_SECTION_ADDITIONAL, 0);
1957	if (result != ISC_R_SUCCESS)
1958		goto cleanup_message;
1959
1960	result = dns_message_renderend(fctx->qmessage);
1961	if (result != ISC_R_SUCCESS)
1962		goto cleanup_message;
1963
1964	dns_compress_invalidate(&cctx);
1965	cleanup_cctx = ISC_FALSE;
1966
1967	if (dns_message_gettsigkey(fctx->qmessage) != NULL) {
1968		dns_tsigkey_attach(dns_message_gettsigkey(fctx->qmessage),
1969				   &query->tsigkey);
1970		result = dns_message_getquerytsig(fctx->qmessage,
1971						  fctx->res->mctx,
1972						  &query->tsig);
1973		if (result != ISC_R_SUCCESS)
1974			goto cleanup_message;
1975	}
1976
1977	/*
1978	 * If using TCP, write the length of the message at the beginning
1979	 * of the buffer.
1980	 */
1981	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1982		isc_buffer_usedregion(&query->buffer, &r);
1983		isc_buffer_putuint16(&tcpbuffer, (isc_uint16_t)r.length);
1984		isc_buffer_add(&tcpbuffer, r.length);
1985	}
1986
1987	/*
1988	 * We're now done with the query message.
1989	 */
1990	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
1991
1992	if (query->exclusivesocket)
1993		socket = dns_dispatch_getentrysocket(query->dispentry);
1994	else
1995		socket = dns_dispatch_getsocket(query->dispatch);
1996	/*
1997	 * Send the query!
1998	 */
1999	if ((query->options & DNS_FETCHOPT_TCP) == 0) {
2000		address = &query->addrinfo->sockaddr;
2001		if (query->exclusivesocket) {
2002			result = isc_socket_connect(socket, address, task,
2003						    resquery_udpconnected,
2004						    query);
2005			if (result != ISC_R_SUCCESS)
2006				goto cleanup_message;
2007			connecting = ISC_TRUE;
2008			query->connects++;
2009		}
2010	}
2011	isc_buffer_usedregion(buffer, &r);
2012
2013	/*
2014	 * XXXRTH  Make sure we don't send to ourselves!  We should probably
2015	 *		prune out these addresses when we get them from the ADB.
2016	 */
2017	result = isc_socket_sendto(socket, &r, task, resquery_senddone,
2018				   query, address, NULL);
2019	if (result != ISC_R_SUCCESS) {
2020		if (connecting) {
2021			/*
2022			 * This query is still connecting.
2023			 * Mark it as canceled so that it will just be
2024			 * cleaned up when the connected event is received.
2025			 * Keep fctx around until the event is processed.
2026			 */
2027			query->fctx->nqueries++;
2028			query->attributes |= RESQUERY_ATTR_CANCELED;
2029		}
2030		goto cleanup_message;
2031	}
2032
2033	query->sends++;
2034
2035	QTRACE("sent");
2036
2037	return (ISC_R_SUCCESS);
2038
2039 cleanup_message:
2040	if (cleanup_cctx)
2041		dns_compress_invalidate(&cctx);
2042
2043	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
2044
2045	/*
2046	 * Stop the dispatcher from listening.
2047	 */
2048	dns_dispatch_removeresponse(&query->dispentry, NULL);
2049
2050 cleanup_temps:
2051	if (qname != NULL)
2052		dns_message_puttempname(fctx->qmessage, &qname);
2053	if (qrdataset != NULL)
2054		dns_message_puttemprdataset(fctx->qmessage, &qrdataset);
2055
2056	return (result);
2057}
2058
2059static void
2060resquery_connected(isc_task_t *task, isc_event_t *event) {
2061	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
2062	resquery_t *query = event->ev_arg;
2063	isc_boolean_t retry = ISC_FALSE;
2064	isc_interval_t interval;
2065	isc_result_t result;
2066	unsigned int attrs;
2067	fetchctx_t *fctx;
2068
2069	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
2070	REQUIRE(VALID_QUERY(query));
2071
2072	QTRACE("connected");
2073
2074	UNUSED(task);
2075
2076	/*
2077	 * XXXRTH
2078	 *
2079	 * Currently we don't wait for the connect event before retrying
2080	 * a query.  This means that if we get really behind, we may end
2081	 * up doing extra work!
2082	 */
2083
2084	query->connects--;
2085	fctx = query->fctx;
2086
2087	if (RESQUERY_CANCELED(query)) {
2088		/*
2089		 * This query was canceled while the connect() was in
2090		 * progress.
2091		 */
2092		isc_socket_detach(&query->tcpsocket);
2093		resquery_destroy(&query);
2094	} else {
2095		switch (sevent->result) {
2096		case ISC_R_SUCCESS:
2097
2098			/*
2099			 * Extend the idle timer for TCP.  20 seconds
2100			 * should be long enough for a TCP connection to be
2101			 * established, a single DNS request to be sent,
2102			 * and the response received.
2103			 */
2104			isc_interval_set(&interval, 20, 0);
2105			result = fctx_startidletimer(query->fctx, &interval);
2106			if (result != ISC_R_SUCCESS) {
2107				fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2108				fctx_done(fctx, result, __LINE__);
2109				break;
2110			}
2111			/*
2112			 * We are connected.  Create a dispatcher and
2113			 * send the query.
2114			 */
2115			attrs = 0;
2116			attrs |= DNS_DISPATCHATTR_TCP;
2117			attrs |= DNS_DISPATCHATTR_PRIVATE;
2118			attrs |= DNS_DISPATCHATTR_CONNECTED;
2119			if (isc_sockaddr_pf(&query->addrinfo->sockaddr) ==
2120			    AF_INET)
2121				attrs |= DNS_DISPATCHATTR_IPV4;
2122			else
2123				attrs |= DNS_DISPATCHATTR_IPV6;
2124			attrs |= DNS_DISPATCHATTR_MAKEQUERY;
2125
2126			result = dns_dispatch_createtcp(query->dispatchmgr,
2127						     query->tcpsocket,
2128						     query->fctx->res->taskmgr,
2129						     4096, 2, 1, 1, 3, attrs,
2130						     &query->dispatch);
2131
2132			/*
2133			 * Regardless of whether dns_dispatch_create()
2134			 * succeeded or not, we don't need our reference
2135			 * to the socket anymore.
2136			 */
2137			isc_socket_detach(&query->tcpsocket);
2138
2139			if (result == ISC_R_SUCCESS)
2140				result = resquery_send(query);
2141
2142			if (result != ISC_R_SUCCESS) {
2143				fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2144				fctx_done(fctx, result, __LINE__);
2145			}
2146			break;
2147
2148		case ISC_R_NETUNREACH:
2149		case ISC_R_HOSTUNREACH:
2150		case ISC_R_CONNREFUSED:
2151		case ISC_R_NOPERM:
2152		case ISC_R_ADDRNOTAVAIL:
2153		case ISC_R_CONNECTIONRESET:
2154			/*
2155			 * No route to remote.
2156			 */
2157			isc_socket_detach(&query->tcpsocket);
2158			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
2159			retry = ISC_TRUE;
2160			break;
2161
2162		default:
2163			isc_socket_detach(&query->tcpsocket);
2164			fctx_cancelquery(&query, NULL, NULL, ISC_FALSE);
2165			break;
2166		}
2167	}
2168
2169	isc_event_free(&event);
2170
2171	if (retry) {
2172		/*
2173		 * Behave as if the idle timer has expired.  For TCP
2174		 * connections this may not actually reflect the latest timer.
2175		 */
2176		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2177		result = fctx_stopidletimer(fctx);
2178		if (result != ISC_R_SUCCESS)
2179			fctx_done(fctx, result, __LINE__);
2180		else
2181			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
2182	}
2183}
2184
2185static void
2186fctx_finddone(isc_task_t *task, isc_event_t *event) {
2187	fetchctx_t *fctx;
2188	dns_adbfind_t *find;
2189	dns_resolver_t *res;
2190	isc_boolean_t want_try = ISC_FALSE;
2191	isc_boolean_t want_done = ISC_FALSE;
2192	isc_boolean_t bucket_empty = ISC_FALSE;
2193	unsigned int bucketnum;
2194	isc_boolean_t destroy = ISC_FALSE;
2195
2196	find = event->ev_sender;
2197	fctx = event->ev_arg;
2198	REQUIRE(VALID_FCTX(fctx));
2199	res = fctx->res;
2200
2201	UNUSED(task);
2202
2203	FCTXTRACE("finddone");
2204
2205	bucketnum = fctx->bucketnum;
2206	LOCK(&res->buckets[bucketnum].lock);
2207
2208	INSIST(fctx->pending > 0);
2209	fctx->pending--;
2210
2211	if (ADDRWAIT(fctx)) {
2212		/*
2213		 * The fetch is waiting for a name to be found.
2214		 */
2215		INSIST(!SHUTTINGDOWN(fctx));
2216		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
2217		if (event->ev_type == DNS_EVENT_ADBMOREADDRESSES)
2218			want_try = ISC_TRUE;
2219		else {
2220			fctx->findfail++;
2221			if (fctx->pending == 0) {
2222				/*
2223				 * We've got nothing else to wait for and don't
2224				 * know the answer.  There's nothing to do but
2225				 * fail the fctx.
2226				 */
2227				want_done = ISC_TRUE;
2228			}
2229		}
2230	} else if (SHUTTINGDOWN(fctx) && fctx->pending == 0 &&
2231		   fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) {
2232
2233		if (fctx->references == 0) {
2234			bucket_empty = fctx_unlink(fctx);
2235			destroy = ISC_TRUE;
2236		}
2237	}
2238	UNLOCK(&res->buckets[bucketnum].lock);
2239
2240	isc_event_free(&event);
2241	dns_adb_destroyfind(&find);
2242
2243	if (want_try)
2244		fctx_try(fctx, ISC_TRUE, ISC_FALSE);
2245	else if (want_done)
2246		fctx_done(fctx, ISC_R_FAILURE, __LINE__);
2247	else if (destroy) {
2248			fctx_destroy(fctx);
2249		if (bucket_empty)
2250			empty_bucket(res);
2251	}
2252}
2253
2254
2255static inline isc_boolean_t
2256bad_server(fetchctx_t *fctx, isc_sockaddr_t *address) {
2257	isc_sockaddr_t *sa;
2258
2259	for (sa = ISC_LIST_HEAD(fctx->bad);
2260	     sa != NULL;
2261	     sa = ISC_LIST_NEXT(sa, link)) {
2262		if (isc_sockaddr_equal(sa, address))
2263			return (ISC_TRUE);
2264	}
2265
2266	return (ISC_FALSE);
2267}
2268
2269static inline isc_boolean_t
2270mark_bad(fetchctx_t *fctx) {
2271	dns_adbfind_t *curr;
2272	dns_adbaddrinfo_t *addrinfo;
2273	isc_boolean_t all_bad = ISC_TRUE;
2274
2275	/*
2276	 * Mark all known bad servers, so we don't try to talk to them
2277	 * again.
2278	 */
2279
2280	/*
2281	 * Mark any bad nameservers.
2282	 */
2283	for (curr = ISC_LIST_HEAD(fctx->finds);
2284	     curr != NULL;
2285	     curr = ISC_LIST_NEXT(curr, publink)) {
2286		for (addrinfo = ISC_LIST_HEAD(curr->list);
2287		     addrinfo != NULL;
2288		     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2289			if (bad_server(fctx, &addrinfo->sockaddr))
2290				addrinfo->flags |= FCTX_ADDRINFO_MARK;
2291			else
2292				all_bad = ISC_FALSE;
2293		}
2294	}
2295
2296	/*
2297	 * Mark any bad forwarders.
2298	 */
2299	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
2300	     addrinfo != NULL;
2301	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2302		if (bad_server(fctx, &addrinfo->sockaddr))
2303			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2304		else
2305			all_bad = ISC_FALSE;
2306	}
2307
2308	/*
2309	 * Mark any bad alternates.
2310	 */
2311	for (curr = ISC_LIST_HEAD(fctx->altfinds);
2312	     curr != NULL;
2313	     curr = ISC_LIST_NEXT(curr, publink)) {
2314		for (addrinfo = ISC_LIST_HEAD(curr->list);
2315		     addrinfo != NULL;
2316		     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2317			if (bad_server(fctx, &addrinfo->sockaddr))
2318				addrinfo->flags |= FCTX_ADDRINFO_MARK;
2319			else
2320				all_bad = ISC_FALSE;
2321		}
2322	}
2323
2324	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
2325	     addrinfo != NULL;
2326	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2327		if (bad_server(fctx, &addrinfo->sockaddr))
2328			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2329		else
2330			all_bad = ISC_FALSE;
2331	}
2332
2333	return (all_bad);
2334}
2335
2336static void
2337add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_result_t reason,
2338	badnstype_t badtype)
2339{
2340	char namebuf[DNS_NAME_FORMATSIZE];
2341	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
2342	char classbuf[64];
2343	char typebuf[64];
2344	char code[64];
2345	isc_buffer_t b;
2346	isc_sockaddr_t *sa;
2347	const char *spc = "";
2348	isc_sockaddr_t *address = &addrinfo->sockaddr;
2349
2350	if (reason == DNS_R_LAME)
2351		fctx->lamecount++;
2352	else {
2353		switch (badtype) {
2354		case badns_unreachable:
2355			fctx->neterr++;
2356			break;
2357		case badns_response:
2358			fctx->badresp++;
2359			break;
2360		case badns_validation:
2361			break;	/* counted as 'valfail' */
2362		}
2363	}
2364
2365	if (bad_server(fctx, address)) {
2366		/*
2367		 * We already know this server is bad.
2368		 */
2369		return;
2370	}
2371
2372	FCTXTRACE("add_bad");
2373
2374	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
2375	if (sa == NULL)
2376		return;
2377	*sa = *address;
2378	ISC_LIST_INITANDAPPEND(fctx->bad, sa, link);
2379
2380	if (reason == DNS_R_LAME)       /* already logged */
2381		return;
2382
2383	if (reason == DNS_R_UNEXPECTEDRCODE &&
2384	    fctx->rmessage->rcode == dns_rcode_servfail &&
2385	    ISFORWARDER(addrinfo))
2386		return;
2387
2388	if (reason == DNS_R_UNEXPECTEDRCODE) {
2389		isc_buffer_init(&b, code, sizeof(code) - 1);
2390		dns_rcode_totext(fctx->rmessage->rcode, &b);
2391		code[isc_buffer_usedlength(&b)] = '\0';
2392		spc = " ";
2393	} else if (reason == DNS_R_UNEXPECTEDOPCODE) {
2394		isc_buffer_init(&b, code, sizeof(code) - 1);
2395		dns_opcode_totext((dns_opcode_t)fctx->rmessage->opcode, &b);
2396		code[isc_buffer_usedlength(&b)] = '\0';
2397		spc = " ";
2398	} else {
2399		code[0] = '\0';
2400	}
2401	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
2402	dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
2403	dns_rdataclass_format(fctx->res->rdclass, classbuf, sizeof(classbuf));
2404	isc_sockaddr_format(address, addrbuf, sizeof(addrbuf));
2405	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
2406		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
2407		      "error (%s%s%s) resolving '%s/%s/%s': %s",
2408		      dns_result_totext(reason), spc, code,
2409		      namebuf, typebuf, classbuf, addrbuf);
2410}
2411
2412/*
2413 * Sort addrinfo list by RTT.
2414 */
2415static void
2416sort_adbfind(dns_adbfind_t *find) {
2417	dns_adbaddrinfo_t *best, *curr;
2418	dns_adbaddrinfolist_t sorted;
2419
2420	/* Lame N^2 bubble sort. */
2421	ISC_LIST_INIT(sorted);
2422	while (!ISC_LIST_EMPTY(find->list)) {
2423		best = ISC_LIST_HEAD(find->list);
2424		curr = ISC_LIST_NEXT(best, publink);
2425		while (curr != NULL) {
2426			if (curr->srtt < best->srtt)
2427				best = curr;
2428			curr = ISC_LIST_NEXT(curr, publink);
2429		}
2430		ISC_LIST_UNLINK(find->list, best, publink);
2431		ISC_LIST_APPEND(sorted, best, publink);
2432	}
2433	find->list = sorted;
2434}
2435
2436/*
2437 * Sort a list of finds by server RTT.
2438 */
2439static void
2440sort_finds(dns_adbfindlist_t *findlist) {
2441	dns_adbfind_t *best, *curr;
2442	dns_adbfindlist_t sorted;
2443	dns_adbaddrinfo_t *addrinfo, *bestaddrinfo;
2444
2445	/* Sort each find's addrinfo list by SRTT. */
2446	for (curr = ISC_LIST_HEAD(*findlist);
2447	     curr != NULL;
2448	     curr = ISC_LIST_NEXT(curr, publink))
2449		sort_adbfind(curr);
2450
2451	/* Lame N^2 bubble sort. */
2452	ISC_LIST_INIT(sorted);
2453	while (!ISC_LIST_EMPTY(*findlist)) {
2454		best = ISC_LIST_HEAD(*findlist);
2455		bestaddrinfo = ISC_LIST_HEAD(best->list);
2456		INSIST(bestaddrinfo != NULL);
2457		curr = ISC_LIST_NEXT(best, publink);
2458		while (curr != NULL) {
2459			addrinfo = ISC_LIST_HEAD(curr->list);
2460			INSIST(addrinfo != NULL);
2461			if (addrinfo->srtt < bestaddrinfo->srtt) {
2462				best = curr;
2463				bestaddrinfo = addrinfo;
2464			}
2465			curr = ISC_LIST_NEXT(curr, publink);
2466		}
2467		ISC_LIST_UNLINK(*findlist, best, publink);
2468		ISC_LIST_APPEND(sorted, best, publink);
2469	}
2470	*findlist = sorted;
2471}
2472
2473static void
2474findname(fetchctx_t *fctx, dns_name_t *name, in_port_t port,
2475	 unsigned int options, unsigned int flags, isc_stdtime_t now,
2476	 isc_boolean_t *need_alternate)
2477{
2478	dns_adbaddrinfo_t *ai;
2479	dns_adbfind_t *find;
2480	dns_resolver_t *res;
2481	isc_boolean_t unshared;
2482	isc_result_t result;
2483
2484	res = fctx->res;
2485	unshared = ISC_TF((fctx->options | DNS_FETCHOPT_UNSHARED) != 0);
2486	/*
2487	 * If this name is a subdomain of the query domain, tell
2488	 * the ADB to start looking using zone/hint data. This keeps us
2489	 * from getting stuck if the nameserver is beneath the zone cut
2490	 * and we don't know its address (e.g. because the A record has
2491	 * expired).
2492	 */
2493	if (dns_name_issubdomain(name, &fctx->domain))
2494		options |= DNS_ADBFIND_STARTATZONE;
2495	options |= DNS_ADBFIND_GLUEOK;
2496	options |= DNS_ADBFIND_HINTOK;
2497
2498	/*
2499	 * See what we know about this address.
2500	 */
2501	find = NULL;
2502	result = dns_adb_createfind(fctx->adb,
2503				    res->buckets[fctx->bucketnum].task,
2504				    fctx_finddone, fctx, name,
2505				    &fctx->name, fctx->type,
2506				    options, now, NULL,
2507				    res->view->dstport, &find);
2508	if (result != ISC_R_SUCCESS) {
2509		if (result == DNS_R_ALIAS) {
2510			/*
2511			 * XXXRTH  Follow the CNAME/DNAME chain?
2512			 */
2513			dns_adb_destroyfind(&find);
2514			fctx->adberr++;
2515		}
2516	} else if (!ISC_LIST_EMPTY(find->list)) {
2517		/*
2518		 * We have at least some of the addresses for the
2519		 * name.
2520		 */
2521		INSIST((find->options & DNS_ADBFIND_WANTEVENT) == 0);
2522		if (flags != 0 || port != 0) {
2523			for (ai = ISC_LIST_HEAD(find->list);
2524			     ai != NULL;
2525			     ai = ISC_LIST_NEXT(ai, publink)) {
2526				ai->flags |= flags;
2527				if (port != 0)
2528					isc_sockaddr_setport(&ai->sockaddr,
2529							     port);
2530			}
2531		}
2532		if ((flags & FCTX_ADDRINFO_FORWARDER) != 0)
2533			ISC_LIST_APPEND(fctx->altfinds, find, publink);
2534		else
2535			ISC_LIST_APPEND(fctx->finds, find, publink);
2536	} else {
2537		/*
2538		 * We don't know any of the addresses for this
2539		 * name.
2540		 */
2541		if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
2542			/*
2543			 * We're looking for them and will get an
2544			 * event about it later.
2545			 */
2546			fctx->pending++;
2547			/*
2548			 * Bootstrap.
2549			 */
2550			if (need_alternate != NULL &&
2551			    !*need_alternate && unshared &&
2552			    ((res->dispatchv4 == NULL &&
2553			      find->result_v6 != DNS_R_NXDOMAIN) ||
2554			     (res->dispatchv6 == NULL &&
2555			      find->result_v4 != DNS_R_NXDOMAIN)))
2556				*need_alternate = ISC_TRUE;
2557		} else {
2558			if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0)
2559				fctx->lamecount++; /* cached lame server */
2560			else
2561				fctx->adberr++; /* unreachable server, etc. */
2562
2563			/*
2564			 * If we know there are no addresses for
2565			 * the family we are using then try to add
2566			 * an alternative server.
2567			 */
2568			if (need_alternate != NULL && !*need_alternate &&
2569			    ((res->dispatchv4 == NULL &&
2570			      find->result_v6 == DNS_R_NXRRSET) ||
2571			     (res->dispatchv6 == NULL &&
2572			      find->result_v4 == DNS_R_NXRRSET)))
2573				*need_alternate = ISC_TRUE;
2574			dns_adb_destroyfind(&find);
2575		}
2576	}
2577}
2578
2579static isc_boolean_t
2580isstrictsubdomain(dns_name_t *name1, dns_name_t *name2) {
2581	int order;
2582	unsigned int nlabels;
2583	dns_namereln_t namereln;
2584
2585	namereln = dns_name_fullcompare(name1, name2, &order, &nlabels);
2586	return (ISC_TF(namereln == dns_namereln_subdomain));
2587}
2588
2589static isc_result_t
2590fctx_getaddresses(fetchctx_t *fctx, isc_boolean_t badcache) {
2591	dns_rdata_t rdata = DNS_RDATA_INIT;
2592	isc_result_t result;
2593	dns_resolver_t *res;
2594	isc_stdtime_t now;
2595	unsigned int stdoptions = 0;
2596	isc_sockaddr_t *sa;
2597	dns_adbaddrinfo_t *ai;
2598	isc_boolean_t all_bad;
2599	dns_rdata_ns_t ns;
2600	isc_boolean_t need_alternate = ISC_FALSE;
2601
2602	FCTXTRACE("getaddresses");
2603
2604	/*
2605	 * Don't pound on remote servers.  (Failsafe!)
2606	 */
2607	fctx->restarts++;
2608	if (fctx->restarts > 10) {
2609		FCTXTRACE("too many restarts");
2610		return (DNS_R_SERVFAIL);
2611	}
2612
2613	res = fctx->res;
2614
2615	/*
2616	 * Forwarders.
2617	 */
2618
2619	INSIST(ISC_LIST_EMPTY(fctx->forwaddrs));
2620	INSIST(ISC_LIST_EMPTY(fctx->altaddrs));
2621
2622	/*
2623	 * If this fctx has forwarders, use them; otherwise use any
2624	 * selective forwarders specified in the view; otherwise use the
2625	 * resolver's forwarders (if any).
2626	 */
2627	sa = ISC_LIST_HEAD(fctx->forwarders);
2628	if (sa == NULL) {
2629		dns_forwarders_t *forwarders = NULL;
2630		dns_name_t *name = &fctx->name;
2631		dns_name_t suffix;
2632		unsigned int labels;
2633		dns_fixedname_t fixed;
2634		dns_name_t *domain;
2635
2636		/*
2637		 * DS records are found in the parent server.
2638		 * Strip label to get the correct forwarder (if any).
2639		 */
2640		if (dns_rdatatype_atparent(fctx->type) &&
2641		    dns_name_countlabels(name) > 1) {
2642			dns_name_init(&suffix, NULL);
2643			labels = dns_name_countlabels(name);
2644			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
2645			name = &suffix;
2646		}
2647
2648		dns_fixedname_init(&fixed);
2649		domain = dns_fixedname_name(&fixed);
2650		result = dns_fwdtable_find2(fctx->res->view->fwdtable, name,
2651					    domain, &forwarders);
2652		if (result == ISC_R_SUCCESS) {
2653			sa = ISC_LIST_HEAD(forwarders->addrs);
2654			fctx->fwdpolicy = forwarders->fwdpolicy;
2655			if (fctx->fwdpolicy == dns_fwdpolicy_only &&
2656			    isstrictsubdomain(domain, &fctx->domain)) {
2657				dns_name_free(&fctx->domain, fctx->mctx);
2658				dns_name_init(&fctx->domain, NULL);
2659				result = dns_name_dup(domain, fctx->mctx,
2660						      &fctx->domain);
2661				if (result != ISC_R_SUCCESS)
2662					return (result);
2663			}
2664		}
2665	}
2666
2667	while (sa != NULL) {
2668		if ((isc_sockaddr_pf(sa) == AF_INET &&
2669			 fctx->res->dispatchv4 == NULL) ||
2670		    (isc_sockaddr_pf(sa) == AF_INET6 &&
2671			fctx->res->dispatchv6 == NULL)) {
2672				sa = ISC_LIST_NEXT(sa, link);
2673				continue;
2674		}
2675		ai = NULL;
2676		result = dns_adb_findaddrinfo(fctx->adb,
2677					      sa, &ai, 0);  /* XXXMLG */
2678		if (result == ISC_R_SUCCESS) {
2679			dns_adbaddrinfo_t *cur;
2680			ai->flags |= FCTX_ADDRINFO_FORWARDER;
2681			cur = ISC_LIST_HEAD(fctx->forwaddrs);
2682			while (cur != NULL && cur->srtt < ai->srtt)
2683				cur = ISC_LIST_NEXT(cur, publink);
2684			if (cur != NULL)
2685				ISC_LIST_INSERTBEFORE(fctx->forwaddrs, cur,
2686						      ai, publink);
2687			else
2688				ISC_LIST_APPEND(fctx->forwaddrs, ai, publink);
2689		}
2690		sa = ISC_LIST_NEXT(sa, link);
2691	}
2692
2693	/*
2694	 * If the forwarding policy is "only", we don't need the addresses
2695	 * of the nameservers.
2696	 */
2697	if (fctx->fwdpolicy == dns_fwdpolicy_only)
2698		goto out;
2699
2700	/*
2701	 * Normal nameservers.
2702	 */
2703
2704	stdoptions = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_EMPTYEVENT;
2705	if (fctx->restarts == 1) {
2706		/*
2707		 * To avoid sending out a flood of queries likely to
2708		 * result in NXRRSET, we suppress fetches for address
2709		 * families we don't have the first time through,
2710		 * provided that we have addresses in some family we
2711		 * can use.
2712		 *
2713		 * We don't want to set this option all the time, since
2714		 * if fctx->restarts > 1, we've clearly been having trouble
2715		 * with the addresses we had, so getting more could help.
2716		 */
2717		stdoptions |= DNS_ADBFIND_AVOIDFETCHES;
2718	}
2719	if (res->dispatchv4 != NULL)
2720		stdoptions |= DNS_ADBFIND_INET;
2721	if (res->dispatchv6 != NULL)
2722		stdoptions |= DNS_ADBFIND_INET6;
2723	isc_stdtime_get(&now);
2724
2725	INSIST(ISC_LIST_EMPTY(fctx->finds));
2726	INSIST(ISC_LIST_EMPTY(fctx->altfinds));
2727
2728	for (result = dns_rdataset_first(&fctx->nameservers);
2729	     result == ISC_R_SUCCESS;
2730	     result = dns_rdataset_next(&fctx->nameservers))
2731	{
2732		dns_rdataset_current(&fctx->nameservers, &rdata);
2733		/*
2734		 * Extract the name from the NS record.
2735		 */
2736		result = dns_rdata_tostruct(&rdata, &ns, NULL);
2737		if (result != ISC_R_SUCCESS)
2738			continue;
2739
2740		findname(fctx, &ns.name, 0, stdoptions, 0, now,
2741			 &need_alternate);
2742		dns_rdata_reset(&rdata);
2743		dns_rdata_freestruct(&ns);
2744	}
2745	if (result != ISC_R_NOMORE)
2746		return (result);
2747
2748	/*
2749	 * Do we need to use 6 to 4?
2750	 */
2751	if (need_alternate) {
2752		int family;
2753		alternate_t *a;
2754		family = (res->dispatchv6 != NULL) ? AF_INET6 : AF_INET;
2755		for (a = ISC_LIST_HEAD(fctx->res->alternates);
2756		     a != NULL;
2757		     a = ISC_LIST_NEXT(a, link)) {
2758			if (!a->isaddress) {
2759				findname(fctx, &a->_u._n.name, a->_u._n.port,
2760					 stdoptions, FCTX_ADDRINFO_FORWARDER,
2761					 now, NULL);
2762				continue;
2763			}
2764			if (isc_sockaddr_pf(&a->_u.addr) != family)
2765				continue;
2766			ai = NULL;
2767			result = dns_adb_findaddrinfo(fctx->adb, &a->_u.addr,
2768						      &ai, 0);
2769			if (result == ISC_R_SUCCESS) {
2770				dns_adbaddrinfo_t *cur;
2771				ai->flags |= FCTX_ADDRINFO_FORWARDER;
2772				cur = ISC_LIST_HEAD(fctx->altaddrs);
2773				while (cur != NULL && cur->srtt < ai->srtt)
2774					cur = ISC_LIST_NEXT(cur, publink);
2775				if (cur != NULL)
2776					ISC_LIST_INSERTBEFORE(fctx->altaddrs,
2777							      cur, ai, publink);
2778				else
2779					ISC_LIST_APPEND(fctx->altaddrs, ai,
2780							publink);
2781			}
2782		}
2783	}
2784
2785 out:
2786	/*
2787	 * Mark all known bad servers.
2788	 */
2789	all_bad = mark_bad(fctx);
2790
2791	/*
2792	 * How are we doing?
2793	 */
2794	if (all_bad) {
2795		/*
2796		 * We've got no addresses.
2797		 */
2798		if (fctx->pending > 0) {
2799			/*
2800			 * We're fetching the addresses, but don't have any
2801			 * yet.   Tell the caller to wait for an answer.
2802			 */
2803			result = DNS_R_WAIT;
2804		} else {
2805			isc_time_t expire;
2806			isc_interval_t i;
2807			/*
2808			 * We've lost completely.  We don't know any
2809			 * addresses, and the ADB has told us it can't get
2810			 * them.
2811			 */
2812			FCTXTRACE("no addresses");
2813			isc_interval_set(&i, DNS_BADCACHE_TTL(fctx), 0);
2814			result = isc_time_nowplusinterval(&expire, &i);
2815			if (badcache &&
2816			    (fctx->type == dns_rdatatype_dnskey ||
2817			     fctx->type == dns_rdatatype_dlv ||
2818			     fctx->type == dns_rdatatype_ds) &&
2819			     result == ISC_R_SUCCESS)
2820				dns_resolver_addbadcache(fctx->res,
2821							 &fctx->name,
2822							 fctx->type, &expire);
2823			result = ISC_R_FAILURE;
2824		}
2825	} else {
2826		/*
2827		 * We've found some addresses.  We might still be looking
2828		 * for more addresses.
2829		 */
2830		sort_finds(&fctx->finds);
2831		sort_finds(&fctx->altfinds);
2832		result = ISC_R_SUCCESS;
2833	}
2834
2835	return (result);
2836}
2837
2838static inline void
2839possibly_mark(fetchctx_t *fctx, dns_adbaddrinfo_t *addr)
2840{
2841	isc_netaddr_t na;
2842	char buf[ISC_NETADDR_FORMATSIZE];
2843	isc_sockaddr_t *sa;
2844	isc_boolean_t aborted = ISC_FALSE;
2845	isc_boolean_t bogus;
2846	dns_acl_t *blackhole;
2847	isc_netaddr_t ipaddr;
2848	dns_peer_t *peer = NULL;
2849	dns_resolver_t *res;
2850	const char *msg = NULL;
2851
2852	sa = &addr->sockaddr;
2853
2854	res = fctx->res;
2855	isc_netaddr_fromsockaddr(&ipaddr, sa);
2856	blackhole = dns_dispatchmgr_getblackhole(res->dispatchmgr);
2857	(void) dns_peerlist_peerbyaddr(res->view->peers, &ipaddr, &peer);
2858
2859	if (blackhole != NULL) {
2860		int match;
2861
2862		if (dns_acl_match(&ipaddr, NULL, blackhole,
2863				  &res->view->aclenv,
2864				  &match, NULL) == ISC_R_SUCCESS &&
2865		    match > 0)
2866			aborted = ISC_TRUE;
2867	}
2868
2869	if (peer != NULL &&
2870	    dns_peer_getbogus(peer, &bogus) == ISC_R_SUCCESS &&
2871	    bogus)
2872		aborted = ISC_TRUE;
2873
2874	if (aborted) {
2875		addr->flags |= FCTX_ADDRINFO_MARK;
2876		msg = "ignoring blackholed / bogus server: ";
2877	} else if (isc_sockaddr_ismulticast(sa)) {
2878		addr->flags |= FCTX_ADDRINFO_MARK;
2879		msg = "ignoring multicast address: ";
2880	} else if (isc_sockaddr_isexperimental(sa)) {
2881		addr->flags |= FCTX_ADDRINFO_MARK;
2882		msg = "ignoring experimental address: ";
2883	} else if (sa->type.sa.sa_family != AF_INET6) {
2884		return;
2885	} else if (IN6_IS_ADDR_V4MAPPED(&sa->type.sin6.sin6_addr)) {
2886		addr->flags |= FCTX_ADDRINFO_MARK;
2887		msg = "ignoring IPv6 mapped IPV4 address: ";
2888	} else if (IN6_IS_ADDR_V4COMPAT(&sa->type.sin6.sin6_addr)) {
2889		addr->flags |= FCTX_ADDRINFO_MARK;
2890		msg = "ignoring IPv6 compatibility IPV4 address: ";
2891	} else
2892		return;
2893
2894	if (!isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(3)))
2895		return;
2896
2897	isc_netaddr_fromsockaddr(&na, sa);
2898	isc_netaddr_format(&na, buf, sizeof(buf));
2899	FCTXTRACE2(msg, buf);
2900}
2901
2902static inline dns_adbaddrinfo_t *
2903fctx_nextaddress(fetchctx_t *fctx) {
2904	dns_adbfind_t *find, *start;
2905	dns_adbaddrinfo_t *addrinfo;
2906	dns_adbaddrinfo_t *faddrinfo;
2907
2908	/*
2909	 * Return the next untried address, if any.
2910	 */
2911
2912	/*
2913	 * Find the first unmarked forwarder (if any).
2914	 */
2915	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
2916	     addrinfo != NULL;
2917	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2918		if (!UNMARKED(addrinfo))
2919			continue;
2920		possibly_mark(fctx, addrinfo);
2921		if (UNMARKED(addrinfo)) {
2922			addrinfo->flags |= FCTX_ADDRINFO_MARK;
2923			fctx->find = NULL;
2924			return (addrinfo);
2925		}
2926	}
2927
2928	/*
2929	 * No forwarders.  Move to the next find.
2930	 */
2931
2932	fctx->attributes |= FCTX_ATTR_TRIEDFIND;
2933
2934	find = fctx->find;
2935	if (find == NULL)
2936		find = ISC_LIST_HEAD(fctx->finds);
2937	else {
2938		find = ISC_LIST_NEXT(find, publink);
2939		if (find == NULL)
2940			find = ISC_LIST_HEAD(fctx->finds);
2941	}
2942
2943	/*
2944	 * Find the first unmarked addrinfo.
2945	 */
2946	addrinfo = NULL;
2947	if (find != NULL) {
2948		start = find;
2949		do {
2950			for (addrinfo = ISC_LIST_HEAD(find->list);
2951			     addrinfo != NULL;
2952			     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2953				if (!UNMARKED(addrinfo))
2954					continue;
2955				possibly_mark(fctx, addrinfo);
2956				if (UNMARKED(addrinfo)) {
2957					addrinfo->flags |= FCTX_ADDRINFO_MARK;
2958					break;
2959				}
2960			}
2961			if (addrinfo != NULL)
2962				break;
2963			find = ISC_LIST_NEXT(find, publink);
2964			if (find == NULL)
2965				find = ISC_LIST_HEAD(fctx->finds);
2966		} while (find != start);
2967	}
2968
2969	fctx->find = find;
2970	if (addrinfo != NULL)
2971		return (addrinfo);
2972
2973	/*
2974	 * No nameservers left.  Try alternates.
2975	 */
2976
2977	fctx->attributes |= FCTX_ATTR_TRIEDALT;
2978
2979	find = fctx->altfind;
2980	if (find == NULL)
2981		find = ISC_LIST_HEAD(fctx->altfinds);
2982	else {
2983		find = ISC_LIST_NEXT(find, publink);
2984		if (find == NULL)
2985			find = ISC_LIST_HEAD(fctx->altfinds);
2986	}
2987
2988	/*
2989	 * Find the first unmarked addrinfo.
2990	 */
2991	addrinfo = NULL;
2992	if (find != NULL) {
2993		start = find;
2994		do {
2995			for (addrinfo = ISC_LIST_HEAD(find->list);
2996			     addrinfo != NULL;
2997			     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
2998				if (!UNMARKED(addrinfo))
2999					continue;
3000				possibly_mark(fctx, addrinfo);
3001				if (UNMARKED(addrinfo)) {
3002					addrinfo->flags |= FCTX_ADDRINFO_MARK;
3003					break;
3004				}
3005			}
3006			if (addrinfo != NULL)
3007				break;
3008			find = ISC_LIST_NEXT(find, publink);
3009			if (find == NULL)
3010				find = ISC_LIST_HEAD(fctx->altfinds);
3011		} while (find != start);
3012	}
3013
3014	faddrinfo = addrinfo;
3015
3016	/*
3017	 * See if we have a better alternate server by address.
3018	 */
3019
3020	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs);
3021	     addrinfo != NULL;
3022	     addrinfo = ISC_LIST_NEXT(addrinfo, publink)) {
3023		if (!UNMARKED(addrinfo))
3024			continue;
3025		possibly_mark(fctx, addrinfo);
3026		if (UNMARKED(addrinfo) &&
3027		    (faddrinfo == NULL ||
3028		     addrinfo->srtt < faddrinfo->srtt)) {
3029			if (faddrinfo != NULL)
3030				faddrinfo->flags &= ~FCTX_ADDRINFO_MARK;
3031			addrinfo->flags |= FCTX_ADDRINFO_MARK;
3032			break;
3033		}
3034	}
3035
3036	if (addrinfo == NULL) {
3037		addrinfo = faddrinfo;
3038		fctx->altfind = find;
3039	}
3040
3041	return (addrinfo);
3042}
3043
3044static void
3045fctx_try(fetchctx_t *fctx, isc_boolean_t retrying, isc_boolean_t badcache) {
3046	isc_result_t result;
3047	dns_adbaddrinfo_t *addrinfo;
3048
3049	FCTXTRACE("try");
3050
3051	REQUIRE(!ADDRWAIT(fctx));
3052
3053	addrinfo = fctx_nextaddress(fctx);
3054	if (addrinfo == NULL) {
3055		/*
3056		 * We have no more addresses.  Start over.
3057		 */
3058		fctx_cancelqueries(fctx, ISC_TRUE);
3059		fctx_cleanupfinds(fctx);
3060		fctx_cleanupaltfinds(fctx);
3061		fctx_cleanupforwaddrs(fctx);
3062		fctx_cleanupaltaddrs(fctx);
3063		result = fctx_getaddresses(fctx, badcache);
3064		if (result == DNS_R_WAIT) {
3065			/*
3066			 * Sleep waiting for addresses.
3067			 */
3068			FCTXTRACE("addrwait");
3069			fctx->attributes |= FCTX_ATTR_ADDRWAIT;
3070			return;
3071		} else if (result != ISC_R_SUCCESS) {
3072			/*
3073			 * Something bad happened.
3074			 */
3075			fctx_done(fctx, result, __LINE__);
3076			return;
3077		}
3078
3079		addrinfo = fctx_nextaddress(fctx);
3080		/*
3081		 * While we may have addresses from the ADB, they
3082		 * might be bad ones.  In this case, return SERVFAIL.
3083		 */
3084		if (addrinfo == NULL) {
3085			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
3086			return;
3087		}
3088	}
3089
3090	result = fctx_query(fctx, addrinfo, fctx->options);
3091	if (result != ISC_R_SUCCESS)
3092		fctx_done(fctx, result, __LINE__);
3093	else if (retrying)
3094		inc_stats(fctx->res, dns_resstatscounter_retry);
3095}
3096
3097static isc_boolean_t
3098fctx_unlink(fetchctx_t *fctx) {
3099	dns_resolver_t *res;
3100	unsigned int bucketnum;
3101
3102	/*
3103	 * Caller must be holding the bucket lock.
3104	 */
3105
3106	REQUIRE(VALID_FCTX(fctx));
3107	REQUIRE(fctx->state == fetchstate_done ||
3108		fctx->state == fetchstate_init);
3109	REQUIRE(ISC_LIST_EMPTY(fctx->events));
3110	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
3111	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
3112	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
3113	REQUIRE(fctx->pending == 0);
3114	REQUIRE(fctx->references == 0);
3115	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
3116
3117	FCTXTRACE("unlink");
3118
3119	res = fctx->res;
3120	bucketnum = fctx->bucketnum;
3121
3122	ISC_LIST_UNLINK(res->buckets[bucketnum].fctxs, fctx, link);
3123
3124	LOCK(&res->nlock);
3125	res->nfctx--;
3126	UNLOCK(&res->nlock);
3127
3128	if (res->buckets[bucketnum].exiting &&
3129	    ISC_LIST_EMPTY(res->buckets[bucketnum].fctxs))
3130		return (ISC_TRUE);
3131
3132	return (ISC_FALSE);
3133}
3134
3135static void
3136fctx_destroy(fetchctx_t *fctx) {
3137	isc_sockaddr_t *sa, *next_sa;
3138
3139	REQUIRE(VALID_FCTX(fctx));
3140	REQUIRE(fctx->state == fetchstate_done ||
3141		fctx->state == fetchstate_init);
3142	REQUIRE(ISC_LIST_EMPTY(fctx->events));
3143	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
3144	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
3145	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
3146	REQUIRE(fctx->pending == 0);
3147	REQUIRE(fctx->references == 0);
3148	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
3149	REQUIRE(!ISC_LINK_LINKED(fctx, link));
3150
3151	FCTXTRACE("destroy");
3152
3153	/*
3154	 * Free bad.
3155	 */
3156	for (sa = ISC_LIST_HEAD(fctx->bad);
3157	     sa != NULL;
3158	     sa = next_sa) {
3159		next_sa = ISC_LIST_NEXT(sa, link);
3160		ISC_LIST_UNLINK(fctx->bad, sa, link);
3161		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3162	}
3163
3164	for (sa = ISC_LIST_HEAD(fctx->edns);
3165	     sa != NULL;
3166	     sa = next_sa) {
3167		next_sa = ISC_LIST_NEXT(sa, link);
3168		ISC_LIST_UNLINK(fctx->edns, sa, link);
3169		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3170	}
3171
3172	for (sa = ISC_LIST_HEAD(fctx->edns512);
3173	     sa != NULL;
3174	     sa = next_sa) {
3175		next_sa = ISC_LIST_NEXT(sa, link);
3176		ISC_LIST_UNLINK(fctx->edns512, sa, link);
3177		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3178	}
3179
3180	for (sa = ISC_LIST_HEAD(fctx->bad_edns);
3181	     sa != NULL;
3182	     sa = next_sa) {
3183		next_sa = ISC_LIST_NEXT(sa, link);
3184		ISC_LIST_UNLINK(fctx->bad_edns, sa, link);
3185		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
3186	}
3187
3188	isc_timer_detach(&fctx->timer);
3189	dns_message_destroy(&fctx->rmessage);
3190	dns_message_destroy(&fctx->qmessage);
3191	if (dns_name_countlabels(&fctx->domain) > 0)
3192		dns_name_free(&fctx->domain, fctx->mctx);
3193	if (dns_rdataset_isassociated(&fctx->nameservers))
3194		dns_rdataset_disassociate(&fctx->nameservers);
3195	dns_name_free(&fctx->name, fctx->mctx);
3196	dns_db_detach(&fctx->cache);
3197	dns_adb_detach(&fctx->adb);
3198	isc_mem_free(fctx->mctx, fctx->info);
3199	isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx));
3200}
3201
3202/*
3203 * Fetch event handlers.
3204 */
3205
3206static void
3207fctx_timeout(isc_task_t *task, isc_event_t *event) {
3208	fetchctx_t *fctx = event->ev_arg;
3209	isc_timerevent_t *tevent = (isc_timerevent_t *)event;
3210	resquery_t *query;
3211
3212	REQUIRE(VALID_FCTX(fctx));
3213
3214	UNUSED(task);
3215
3216	FCTXTRACE("timeout");
3217
3218	inc_stats(fctx->res, dns_resstatscounter_querytimeout);
3219
3220	if (event->ev_type == ISC_TIMEREVENT_LIFE) {
3221		fctx->reason = NULL;
3222		fctx_done(fctx, ISC_R_TIMEDOUT, __LINE__);
3223	} else {
3224		isc_result_t result;
3225
3226		fctx->timeouts++;
3227		fctx->timeout = ISC_TRUE;
3228		/*
3229		 * We could cancel the running queries here, or we could let
3230		 * them keep going.  Since we normally use separate sockets for
3231		 * different queries, we adopt the former approach to reduce
3232		 * the number of open sockets: cancel the oldest query if it
3233		 * expired after the query had started (this is usually the
3234		 * case but is not always so, depending on the task schedule
3235		 * timing).
3236		 */
3237		query = ISC_LIST_HEAD(fctx->queries);
3238		if (query != NULL &&
3239		    isc_time_compare(&tevent->due, &query->start) >= 0) {
3240			fctx_cancelquery(&query, NULL, NULL, ISC_TRUE);
3241		}
3242		fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
3243		/*
3244		 * Our timer has triggered.  Reestablish the fctx lifetime
3245		 * timer.
3246		 */
3247		result = fctx_starttimer(fctx);
3248		if (result != ISC_R_SUCCESS)
3249			fctx_done(fctx, result, __LINE__);
3250		else
3251			/*
3252			 * Keep trying.
3253			 */
3254			fctx_try(fctx, ISC_TRUE, ISC_FALSE);
3255	}
3256
3257	isc_event_free(&event);
3258}
3259
3260static void
3261fctx_shutdown(fetchctx_t *fctx) {
3262	isc_event_t *cevent;
3263
3264	/*
3265	 * Start the shutdown process for fctx, if it isn't already underway.
3266	 */
3267
3268	FCTXTRACE("shutdown");
3269
3270	/*
3271	 * The caller must be holding the appropriate bucket lock.
3272	 */
3273
3274	if (fctx->want_shutdown)
3275		return;
3276
3277	fctx->want_shutdown = ISC_TRUE;
3278
3279	/*
3280	 * Unless we're still initializing (in which case the
3281	 * control event is still outstanding), we need to post
3282	 * the control event to tell the fetch we want it to
3283	 * exit.
3284	 */
3285	if (fctx->state != fetchstate_init) {
3286		cevent = &fctx->control_event;
3287		isc_task_send(fctx->res->buckets[fctx->bucketnum].task,
3288			      &cevent);
3289	}
3290}
3291
3292static void
3293fctx_doshutdown(isc_task_t *task, isc_event_t *event) {
3294	fetchctx_t *fctx = event->ev_arg;
3295	isc_boolean_t bucket_empty = ISC_FALSE;
3296	dns_resolver_t *res;
3297	unsigned int bucketnum;
3298	dns_validator_t *validator;
3299	isc_boolean_t destroy = ISC_FALSE;
3300
3301	REQUIRE(VALID_FCTX(fctx));
3302
3303	UNUSED(task);
3304
3305	res = fctx->res;
3306	bucketnum = fctx->bucketnum;
3307
3308	FCTXTRACE("doshutdown");
3309
3310	/*
3311	 * An fctx that is shutting down is no longer in ADDRWAIT mode.
3312	 */
3313	fctx->attributes &= ~FCTX_ATTR_ADDRWAIT;
3314
3315	/*
3316	 * Cancel all pending validators.  Note that this must be done
3317	 * without the bucket lock held, since that could cause deadlock.
3318	 */
3319	validator = ISC_LIST_HEAD(fctx->validators);
3320	while (validator != NULL) {
3321		dns_validator_cancel(validator);
3322		validator = ISC_LIST_NEXT(validator, link);
3323	}
3324
3325	if (fctx->nsfetch != NULL)
3326		dns_resolver_cancelfetch(fctx->nsfetch);
3327
3328	/*
3329	 * Shut down anything that is still running on behalf of this
3330	 * fetch.  To avoid deadlock with the ADB, we must do this
3331	 * before we lock the bucket lock.
3332	 */
3333	fctx_stopeverything(fctx, ISC_FALSE);
3334
3335	LOCK(&res->buckets[bucketnum].lock);
3336
3337	fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
3338
3339	INSIST(fctx->state == fetchstate_active ||
3340	       fctx->state == fetchstate_done);
3341	INSIST(fctx->want_shutdown);
3342
3343	if (fctx->state != fetchstate_done) {
3344		fctx->state = fetchstate_done;
3345		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
3346	}
3347
3348	if (fctx->references == 0 && fctx->pending == 0 &&
3349	    fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) {
3350		bucket_empty = fctx_unlink(fctx);
3351		destroy = ISC_TRUE;
3352	}
3353
3354	UNLOCK(&res->buckets[bucketnum].lock);
3355
3356	if (destroy) {
3357		fctx_destroy(fctx);
3358		if (bucket_empty)
3359			empty_bucket(res);
3360	}
3361}
3362
3363static void
3364fctx_start(isc_task_t *task, isc_event_t *event) {
3365	fetchctx_t *fctx = event->ev_arg;
3366	isc_boolean_t done = ISC_FALSE, bucket_empty = ISC_FALSE;
3367	dns_resolver_t *res;
3368	unsigned int bucketnum;
3369	isc_boolean_t destroy = ISC_FALSE;
3370
3371	REQUIRE(VALID_FCTX(fctx));
3372
3373	UNUSED(task);
3374
3375	res = fctx->res;
3376	bucketnum = fctx->bucketnum;
3377
3378	FCTXTRACE("start");
3379
3380	LOCK(&res->buckets[bucketnum].lock);
3381
3382	INSIST(fctx->state == fetchstate_init);
3383	if (fctx->want_shutdown) {
3384		/*
3385		 * We haven't started this fctx yet, and we've been requested
3386		 * to shut it down.
3387		 */
3388		fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN;
3389		fctx->state = fetchstate_done;
3390		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
3391		/*
3392		 * Since we haven't started, we INSIST that we have no
3393		 * pending ADB finds and no pending validations.
3394		 */
3395		INSIST(fctx->pending == 0);
3396		INSIST(fctx->nqueries == 0);
3397		INSIST(ISC_LIST_EMPTY(fctx->validators));
3398		if (fctx->references == 0) {
3399			/*
3400			 * It's now safe to destroy this fctx.
3401			 */
3402			bucket_empty = fctx_unlink(fctx);
3403			destroy = ISC_TRUE;
3404		}
3405		done = ISC_TRUE;
3406	} else {
3407		/*
3408		 * Normal fctx startup.
3409		 */
3410		fctx->state = fetchstate_active;
3411		/*
3412		 * Reset the control event for later use in shutting down
3413		 * the fctx.
3414		 */
3415		ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
3416			       DNS_EVENT_FETCHCONTROL, fctx_doshutdown, fctx,
3417			       NULL, NULL, NULL);
3418	}
3419
3420	UNLOCK(&res->buckets[bucketnum].lock);
3421
3422	if (!done) {
3423		isc_result_t result;
3424
3425		INSIST(!destroy);
3426
3427		/*
3428		 * All is well.  Start working on the fetch.
3429		 */
3430		result = fctx_starttimer(fctx);
3431		if (result != ISC_R_SUCCESS)
3432			fctx_done(fctx, result, __LINE__);
3433		else
3434			fctx_try(fctx, ISC_FALSE, ISC_FALSE);
3435	} else if (destroy) {
3436			fctx_destroy(fctx);
3437		if (bucket_empty)
3438			empty_bucket(res);
3439	}
3440}
3441
3442/*
3443 * Fetch Creation, Joining, and Cancelation.
3444 */
3445
3446static inline isc_result_t
3447fctx_join(fetchctx_t *fctx, isc_task_t *task, isc_sockaddr_t *client,
3448	  dns_messageid_t id, isc_taskaction_t action, void *arg,
3449	  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
3450	  dns_fetch_t *fetch)
3451{
3452	isc_task_t *clone;
3453	dns_fetchevent_t *event;
3454
3455	FCTXTRACE("join");
3456
3457	/*
3458	 * We store the task we're going to send this event to in the
3459	 * sender field.  We'll make the fetch the sender when we actually
3460	 * send the event.
3461	 */
3462	clone = NULL;
3463	isc_task_attach(task, &clone);
3464	event = (dns_fetchevent_t *)
3465		isc_event_allocate(fctx->res->mctx, clone, DNS_EVENT_FETCHDONE,
3466				   action, arg, sizeof(*event));
3467	if (event == NULL) {
3468		isc_task_detach(&clone);
3469		return (ISC_R_NOMEMORY);
3470	}
3471	event->result = DNS_R_SERVFAIL;
3472	event->qtype = fctx->type;
3473	event->db = NULL;
3474	event->node = NULL;
3475	event->rdataset = rdataset;
3476	event->sigrdataset = sigrdataset;
3477	event->fetch = fetch;
3478	event->client = client;
3479	event->id = id;
3480	dns_fixedname_init(&event->foundname);
3481
3482	/*
3483	 * Make sure that we can store the sigrdataset in the
3484	 * first event if it is needed by any of the events.
3485	 */
3486	if (event->sigrdataset != NULL)
3487		ISC_LIST_PREPEND(fctx->events, event, ev_link);
3488	else
3489		ISC_LIST_APPEND(fctx->events, event, ev_link);
3490	fctx->references++;
3491	fctx->client = client;
3492
3493	fetch->magic = DNS_FETCH_MAGIC;
3494	fetch->private = fctx;
3495
3496	return (ISC_R_SUCCESS);
3497}
3498
3499static inline void
3500log_ns_ttl(fetchctx_t *fctx, const char *where) {
3501	char namebuf[DNS_NAME_FORMATSIZE];
3502	char domainbuf[DNS_NAME_FORMATSIZE];
3503
3504	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3505	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
3506	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3507		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
3508		      "log_ns_ttl: fctx %p: %s: %s (in '%s'?): %u %u",
3509		      fctx, where, namebuf, domainbuf,
3510		      fctx->ns_ttl_ok, fctx->ns_ttl);
3511}
3512
3513static isc_result_t
3514fctx_create(dns_resolver_t *res, dns_name_t *name, dns_rdatatype_t type,
3515	    dns_name_t *domain, dns_rdataset_t *nameservers,
3516	    unsigned int options, unsigned int bucketnum, fetchctx_t **fctxp)
3517{
3518	fetchctx_t *fctx;
3519	isc_result_t result;
3520	isc_result_t iresult;
3521	isc_interval_t interval;
3522	dns_fixedname_t fixed;
3523	unsigned int findoptions = 0;
3524	char buf[DNS_NAME_FORMATSIZE + DNS_RDATATYPE_FORMATSIZE];
3525	char typebuf[DNS_RDATATYPE_FORMATSIZE];
3526	dns_name_t suffix;
3527	isc_mem_t *mctx;
3528
3529	/*
3530	 * Caller must be holding the lock for bucket number 'bucketnum'.
3531	 */
3532	REQUIRE(fctxp != NULL && *fctxp == NULL);
3533
3534	mctx = res->buckets[bucketnum].mctx;
3535	fctx = isc_mem_get(mctx, sizeof(*fctx));
3536	if (fctx == NULL)
3537		return (ISC_R_NOMEMORY);
3538	dns_name_format(name, buf, sizeof(buf));
3539	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
3540	strcat(buf, "/");       /* checked */
3541	strcat(buf, typebuf);   /* checked */
3542	fctx->info = isc_mem_strdup(mctx, buf);
3543	if (fctx->info == NULL) {
3544		result = ISC_R_NOMEMORY;
3545		goto cleanup_fetch;
3546	}
3547	FCTXTRACE("create");
3548	dns_name_init(&fctx->name, NULL);
3549	result = dns_name_dup(name, mctx, &fctx->name);
3550	if (result != ISC_R_SUCCESS)
3551		goto cleanup_info;
3552	dns_name_init(&fctx->domain, NULL);
3553	dns_rdataset_init(&fctx->nameservers);
3554
3555	fctx->type = type;
3556	fctx->options = options;
3557	/*
3558	 * Note!  We do not attach to the task.  We are relying on the
3559	 * resolver to ensure that this task doesn't go away while we are
3560	 * using it.
3561	 */
3562	fctx->res = res;
3563	fctx->references = 0;
3564	fctx->bucketnum = bucketnum;
3565	fctx->state = fetchstate_init;
3566	fctx->want_shutdown = ISC_FALSE;
3567	fctx->cloned = ISC_FALSE;
3568	ISC_LIST_INIT(fctx->queries);
3569	ISC_LIST_INIT(fctx->finds);
3570	ISC_LIST_INIT(fctx->altfinds);
3571	ISC_LIST_INIT(fctx->forwaddrs);
3572	ISC_LIST_INIT(fctx->altaddrs);
3573	ISC_LIST_INIT(fctx->forwarders);
3574	fctx->fwdpolicy = dns_fwdpolicy_none;
3575	ISC_LIST_INIT(fctx->bad);
3576	ISC_LIST_INIT(fctx->edns);
3577	ISC_LIST_INIT(fctx->edns512);
3578	ISC_LIST_INIT(fctx->bad_edns);
3579	ISC_LIST_INIT(fctx->validators);
3580	fctx->validator = NULL;
3581	fctx->find = NULL;
3582	fctx->altfind = NULL;
3583	fctx->pending = 0;
3584	fctx->restarts = 0;
3585	fctx->querysent = 0;
3586	fctx->referrals = 0;
3587	TIME_NOW(&fctx->start);
3588	fctx->timeouts = 0;
3589	fctx->lamecount = 0;
3590	fctx->adberr = 0;
3591	fctx->neterr = 0;
3592	fctx->badresp = 0;
3593	fctx->findfail = 0;
3594	fctx->valfail = 0;
3595	fctx->result = ISC_R_FAILURE;
3596	fctx->vresult = ISC_R_SUCCESS;
3597	fctx->exitline = -1;	/* sentinel */
3598	fctx->logged = ISC_FALSE;
3599	fctx->attributes = 0;
3600	fctx->spilled = ISC_FALSE;
3601	fctx->nqueries = 0;
3602	fctx->reason = NULL;
3603	fctx->rand_buf = 0;
3604	fctx->rand_bits = 0;
3605	fctx->timeout = ISC_FALSE;
3606	fctx->addrinfo = NULL;
3607	fctx->client = NULL;
3608	fctx->ns_ttl = 0;
3609	fctx->ns_ttl_ok = ISC_FALSE;
3610
3611	dns_name_init(&fctx->nsname, NULL);
3612	fctx->nsfetch = NULL;
3613	dns_rdataset_init(&fctx->nsrrset);
3614
3615	if (domain == NULL) {
3616		dns_forwarders_t *forwarders = NULL;
3617		unsigned int labels;
3618		dns_name_t *fwdname = name;
3619
3620		/*
3621		 * DS records are found in the parent server.
3622		 * Strip label to get the correct forwarder (if any).
3623		 */
3624		if (dns_rdatatype_atparent(fctx->type) &&
3625		    dns_name_countlabels(name) > 1) {
3626			dns_name_init(&suffix, NULL);
3627			labels = dns_name_countlabels(name);
3628			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
3629			fwdname = &suffix;
3630		}
3631		dns_fixedname_init(&fixed);
3632		domain = dns_fixedname_name(&fixed);
3633		result = dns_fwdtable_find2(fctx->res->view->fwdtable, fwdname,
3634					    domain, &forwarders);
3635		if (result == ISC_R_SUCCESS)
3636			fctx->fwdpolicy = forwarders->fwdpolicy;
3637
3638		if (fctx->fwdpolicy != dns_fwdpolicy_only) {
3639			/*
3640			 * The caller didn't supply a query domain and
3641			 * nameservers, and we're not in forward-only mode,
3642			 * so find the best nameservers to use.
3643			 */
3644			if (dns_rdatatype_atparent(fctx->type))
3645				findoptions |= DNS_DBFIND_NOEXACT;
3646			result = dns_view_findzonecut(res->view, name, domain,
3647						      0, findoptions, ISC_TRUE,
3648						      &fctx->nameservers,
3649						      NULL);
3650			if (result != ISC_R_SUCCESS)
3651				goto cleanup_name;
3652			result = dns_name_dup(domain, mctx, &fctx->domain);
3653			if (result != ISC_R_SUCCESS) {
3654				dns_rdataset_disassociate(&fctx->nameservers);
3655				goto cleanup_name;
3656			}
3657			fctx->ns_ttl = fctx->nameservers.ttl;
3658			fctx->ns_ttl_ok = ISC_TRUE;
3659		} else {
3660			/*
3661			 * We're in forward-only mode.  Set the query domain.
3662			 */
3663			result = dns_name_dup(domain, mctx, &fctx->domain);
3664			if (result != ISC_R_SUCCESS)
3665				goto cleanup_name;
3666		}
3667	} else {
3668		result = dns_name_dup(domain, mctx, &fctx->domain);
3669		if (result != ISC_R_SUCCESS)
3670			goto cleanup_name;
3671		dns_rdataset_clone(nameservers, &fctx->nameservers);
3672		fctx->ns_ttl = fctx->nameservers.ttl;
3673		fctx->ns_ttl_ok = ISC_TRUE;
3674	}
3675
3676	log_ns_ttl(fctx, "fctx_create");
3677
3678	INSIST(dns_name_issubdomain(&fctx->name, &fctx->domain));
3679
3680	fctx->qmessage = NULL;
3681	result = dns_message_create(mctx, DNS_MESSAGE_INTENTRENDER,
3682				    &fctx->qmessage);
3683
3684	if (result != ISC_R_SUCCESS)
3685		goto cleanup_domain;
3686
3687	fctx->rmessage = NULL;
3688	result = dns_message_create(mctx, DNS_MESSAGE_INTENTPARSE,
3689				    &fctx->rmessage);
3690
3691	if (result != ISC_R_SUCCESS)
3692		goto cleanup_qmessage;
3693
3694	/*
3695	 * Compute an expiration time for the entire fetch.
3696	 */
3697	isc_interval_set(&interval, res->query_timeout, 0);
3698	iresult = isc_time_nowplusinterval(&fctx->expires, &interval);
3699	if (iresult != ISC_R_SUCCESS) {
3700		UNEXPECTED_ERROR(__FILE__, __LINE__,
3701				 "isc_time_nowplusinterval: %s",
3702				 isc_result_totext(iresult));
3703		result = ISC_R_UNEXPECTED;
3704		goto cleanup_rmessage;
3705	}
3706
3707	/*
3708	 * Default retry interval initialization.  We set the interval now
3709	 * mostly so it won't be uninitialized.  It will be set to the
3710	 * correct value before a query is issued.
3711	 */
3712	isc_interval_set(&fctx->interval, 2, 0);
3713
3714	/*
3715	 * Create an inactive timer.  It will be made active when the fetch
3716	 * is actually started.
3717	 */
3718	fctx->timer = NULL;
3719	iresult = isc_timer_create(res->timermgr, isc_timertype_inactive,
3720				   NULL, NULL,
3721				   res->buckets[bucketnum].task, fctx_timeout,
3722				   fctx, &fctx->timer);
3723	if (iresult != ISC_R_SUCCESS) {
3724		UNEXPECTED_ERROR(__FILE__, __LINE__,
3725				 "isc_timer_create: %s",
3726				 isc_result_totext(iresult));
3727		result = ISC_R_UNEXPECTED;
3728		goto cleanup_rmessage;
3729	}
3730
3731	/*
3732	 * Attach to the view's cache and adb.
3733	 */
3734	fctx->cache = NULL;
3735	dns_db_attach(res->view->cachedb, &fctx->cache);
3736	fctx->adb = NULL;
3737	dns_adb_attach(res->view->adb, &fctx->adb);
3738	fctx->mctx = NULL;
3739	isc_mem_attach(mctx, &fctx->mctx);
3740
3741	ISC_LIST_INIT(fctx->events);
3742	ISC_LINK_INIT(fctx, link);
3743	fctx->magic = FCTX_MAGIC;
3744
3745	ISC_LIST_APPEND(res->buckets[bucketnum].fctxs, fctx, link);
3746
3747	LOCK(&res->nlock);
3748	res->nfctx++;
3749	UNLOCK(&res->nlock);
3750
3751	*fctxp = fctx;
3752
3753	return (ISC_R_SUCCESS);
3754
3755 cleanup_rmessage:
3756	dns_message_destroy(&fctx->rmessage);
3757
3758 cleanup_qmessage:
3759	dns_message_destroy(&fctx->qmessage);
3760
3761 cleanup_domain:
3762	if (dns_name_countlabels(&fctx->domain) > 0)
3763		dns_name_free(&fctx->domain, mctx);
3764	if (dns_rdataset_isassociated(&fctx->nameservers))
3765		dns_rdataset_disassociate(&fctx->nameservers);
3766
3767 cleanup_name:
3768	dns_name_free(&fctx->name, mctx);
3769
3770 cleanup_info:
3771	isc_mem_free(mctx, fctx->info);
3772
3773 cleanup_fetch:
3774	isc_mem_put(mctx, fctx, sizeof(*fctx));
3775
3776	return (result);
3777}
3778
3779/*
3780 * Handle Responses
3781 */
3782static inline isc_boolean_t
3783is_lame(fetchctx_t *fctx) {
3784	dns_message_t *message = fctx->rmessage;
3785	dns_name_t *name;
3786	dns_rdataset_t *rdataset;
3787	isc_result_t result;
3788
3789	if (message->rcode != dns_rcode_noerror &&
3790	    message->rcode != dns_rcode_nxdomain)
3791		return (ISC_FALSE);
3792
3793	if (message->counts[DNS_SECTION_ANSWER] != 0)
3794		return (ISC_FALSE);
3795
3796	if (message->counts[DNS_SECTION_AUTHORITY] == 0)
3797		return (ISC_FALSE);
3798
3799	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
3800	while (result == ISC_R_SUCCESS) {
3801		name = NULL;
3802		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
3803		for (rdataset = ISC_LIST_HEAD(name->list);
3804		     rdataset != NULL;
3805		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
3806			dns_namereln_t namereln;
3807			int order;
3808			unsigned int labels;
3809			if (rdataset->type != dns_rdatatype_ns)
3810				continue;
3811			namereln = dns_name_fullcompare(name, &fctx->domain,
3812							&order, &labels);
3813			if (namereln == dns_namereln_equal &&
3814			    (message->flags & DNS_MESSAGEFLAG_AA) != 0)
3815				return (ISC_FALSE);
3816			if (namereln == dns_namereln_subdomain)
3817				return (ISC_FALSE);
3818			return (ISC_TRUE);
3819		}
3820		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
3821	}
3822
3823	return (ISC_FALSE);
3824}
3825
3826static inline void
3827log_lame(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo) {
3828	char namebuf[DNS_NAME_FORMATSIZE];
3829	char domainbuf[DNS_NAME_FORMATSIZE];
3830	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3831
3832	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3833	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
3834	isc_sockaddr_format(&addrinfo->sockaddr, addrbuf, sizeof(addrbuf));
3835	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
3836		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
3837		      "lame server resolving '%s' (in '%s'?): %s",
3838		      namebuf, domainbuf, addrbuf);
3839}
3840
3841static inline void
3842log_formerr(fetchctx_t *fctx, const char *format, ...) {
3843	char nsbuf[ISC_SOCKADDR_FORMATSIZE];
3844	char clbuf[ISC_SOCKADDR_FORMATSIZE];
3845	const char *clmsg = "";
3846	char msgbuf[2048];
3847	va_list args;
3848
3849	va_start(args, format);
3850	vsnprintf(msgbuf, sizeof(msgbuf), format, args);
3851	va_end(args);
3852
3853	isc_sockaddr_format(&fctx->addrinfo->sockaddr, nsbuf, sizeof(nsbuf));
3854
3855	if (fctx->client != NULL) {
3856		clmsg = " for client ";
3857		isc_sockaddr_format(fctx->client, clbuf, sizeof(clbuf));
3858	} else {
3859		clbuf[0] = '\0';
3860	}
3861
3862	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3863		      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
3864		      "DNS format error from %s resolving %s%s%s: %s",
3865		      nsbuf, fctx->info, clmsg, clbuf, msgbuf);
3866}
3867
3868static inline isc_result_t
3869same_question(fetchctx_t *fctx) {
3870	isc_result_t result;
3871	dns_message_t *message = fctx->rmessage;
3872	dns_name_t *name;
3873	dns_rdataset_t *rdataset;
3874
3875	/*
3876	 * Caller must be holding the fctx lock.
3877	 */
3878
3879	/*
3880	 * XXXRTH  Currently we support only one question.
3881	 */
3882	if (message->counts[DNS_SECTION_QUESTION] != 1) {
3883		log_formerr(fctx, "too many questions");
3884		return (DNS_R_FORMERR);
3885	}
3886
3887	result = dns_message_firstname(message, DNS_SECTION_QUESTION);
3888	if (result != ISC_R_SUCCESS)
3889		return (result);
3890	name = NULL;
3891	dns_message_currentname(message, DNS_SECTION_QUESTION, &name);
3892	rdataset = ISC_LIST_HEAD(name->list);
3893	INSIST(rdataset != NULL);
3894	INSIST(ISC_LIST_NEXT(rdataset, link) == NULL);
3895
3896	if (fctx->type != rdataset->type ||
3897	    fctx->res->rdclass != rdataset->rdclass ||
3898	    !dns_name_equal(&fctx->name, name)) {
3899		char namebuf[DNS_NAME_FORMATSIZE];
3900		char class[DNS_RDATACLASS_FORMATSIZE];
3901		char type[DNS_RDATATYPE_FORMATSIZE];
3902
3903		dns_name_format(name, namebuf, sizeof(namebuf));
3904		dns_rdataclass_format(rdataset->rdclass, class, sizeof(class));
3905		dns_rdatatype_format(rdataset->type, type, sizeof(type));
3906		log_formerr(fctx, "question section mismatch: got %s/%s/%s",
3907			    namebuf, class, type);
3908		return (DNS_R_FORMERR);
3909	}
3910
3911	return (ISC_R_SUCCESS);
3912}
3913
3914static void
3915clone_results(fetchctx_t *fctx) {
3916	dns_fetchevent_t *event, *hevent;
3917	isc_result_t result;
3918	dns_name_t *name, *hname;
3919
3920	FCTXTRACE("clone_results");
3921
3922	/*
3923	 * Set up any other events to have the same data as the first
3924	 * event.
3925	 *
3926	 * Caller must be holding the appropriate lock.
3927	 */
3928
3929	fctx->cloned = ISC_TRUE;
3930	hevent = ISC_LIST_HEAD(fctx->events);
3931	if (hevent == NULL)
3932		return;
3933	hname = dns_fixedname_name(&hevent->foundname);
3934	for (event = ISC_LIST_NEXT(hevent, ev_link);
3935	     event != NULL;
3936	     event = ISC_LIST_NEXT(event, ev_link)) {
3937		name = dns_fixedname_name(&event->foundname);
3938		result = dns_name_copy(hname, name, NULL);
3939		if (result != ISC_R_SUCCESS)
3940			event->result = result;
3941		else
3942			event->result = hevent->result;
3943		dns_db_attach(hevent->db, &event->db);
3944		dns_db_attachnode(hevent->db, hevent->node, &event->node);
3945		INSIST(hevent->rdataset != NULL);
3946		INSIST(event->rdataset != NULL);
3947		if (dns_rdataset_isassociated(hevent->rdataset))
3948			dns_rdataset_clone(hevent->rdataset, event->rdataset);
3949		INSIST(! (hevent->sigrdataset == NULL &&
3950			  event->sigrdataset != NULL));
3951		if (hevent->sigrdataset != NULL &&
3952		    dns_rdataset_isassociated(hevent->sigrdataset) &&
3953		    event->sigrdataset != NULL)
3954			dns_rdataset_clone(hevent->sigrdataset,
3955					   event->sigrdataset);
3956	}
3957}
3958
3959#define CACHE(r)        (((r)->attributes & DNS_RDATASETATTR_CACHE) != 0)
3960#define ANSWER(r)       (((r)->attributes & DNS_RDATASETATTR_ANSWER) != 0)
3961#define ANSWERSIG(r)    (((r)->attributes & DNS_RDATASETATTR_ANSWERSIG) != 0)
3962#define EXTERNAL(r)     (((r)->attributes & DNS_RDATASETATTR_EXTERNAL) != 0)
3963#define CHAINING(r)     (((r)->attributes & DNS_RDATASETATTR_CHAINING) != 0)
3964#define CHASE(r)        (((r)->attributes & DNS_RDATASETATTR_CHASE) != 0)
3965#define CHECKNAMES(r)   (((r)->attributes & DNS_RDATASETATTR_CHECKNAMES) != 0)
3966
3967
3968/*
3969 * Destroy '*fctx' if it is ready to be destroyed (i.e., if it has
3970 * no references and is no longer waiting for any events).
3971 *
3972 * Requires:
3973 *      '*fctx' is shutting down.
3974 *
3975 * Returns:
3976 *	true if the resolver is exiting and this is the last fctx in the bucket.
3977 */
3978static isc_boolean_t
3979maybe_destroy(fetchctx_t *fctx, isc_boolean_t locked) {
3980	unsigned int bucketnum;
3981	isc_boolean_t bucket_empty = ISC_FALSE;
3982	dns_resolver_t *res = fctx->res;
3983	dns_validator_t *validator, *next_validator;
3984	isc_boolean_t destroy = ISC_FALSE;
3985
3986	REQUIRE(SHUTTINGDOWN(fctx));
3987
3988	bucketnum = fctx->bucketnum;
3989	if (!locked)
3990		LOCK(&res->buckets[bucketnum].lock);
3991	if (fctx->pending != 0 || fctx->nqueries != 0)
3992		goto unlock;
3993
3994	for (validator = ISC_LIST_HEAD(fctx->validators);
3995	     validator != NULL; validator = next_validator) {
3996		next_validator = ISC_LIST_NEXT(validator, link);
3997		dns_validator_cancel(validator);
3998	}
3999
4000	if (fctx->references == 0 && ISC_LIST_EMPTY(fctx->validators)) {
4001		bucket_empty = fctx_unlink(fctx);
4002		destroy = ISC_TRUE;
4003	}
4004 unlock:
4005	if (!locked)
4006		UNLOCK(&res->buckets[bucketnum].lock);
4007	if (destroy)
4008		fctx_destroy(fctx);
4009	return (bucket_empty);
4010}
4011
4012/*
4013 * The validator has finished.
4014 */
4015static void
4016validated(isc_task_t *task, isc_event_t *event) {
4017	dns_adbaddrinfo_t *addrinfo;
4018	dns_dbnode_t *node = NULL;
4019	dns_dbnode_t *nsnode = NULL;
4020	dns_fetchevent_t *hevent;
4021	dns_name_t *name;
4022	dns_rdataset_t *ardataset = NULL;
4023	dns_rdataset_t *asigrdataset = NULL;
4024	dns_rdataset_t *rdataset;
4025	dns_rdataset_t *sigrdataset;
4026	dns_resolver_t *res;
4027	dns_valarg_t *valarg;
4028	dns_validatorevent_t *vevent;
4029	fetchctx_t *fctx;
4030	isc_boolean_t chaining;
4031	isc_boolean_t negative;
4032	isc_boolean_t sentresponse;
4033	isc_result_t eresult = ISC_R_SUCCESS;
4034	isc_result_t result = ISC_R_SUCCESS;
4035	isc_stdtime_t now;
4036	isc_uint32_t ttl;
4037
4038	UNUSED(task); /* for now */
4039
4040	REQUIRE(event->ev_type == DNS_EVENT_VALIDATORDONE);
4041	valarg = event->ev_arg;
4042	fctx = valarg->fctx;
4043	res = fctx->res;
4044	addrinfo = valarg->addrinfo;
4045	REQUIRE(VALID_FCTX(fctx));
4046	REQUIRE(!ISC_LIST_EMPTY(fctx->validators));
4047
4048	vevent = (dns_validatorevent_t *)event;
4049	fctx->vresult = vevent->result;
4050
4051	FCTXTRACE("received validation completion event");
4052
4053	LOCK(&res->buckets[fctx->bucketnum].lock);
4054
4055	ISC_LIST_UNLINK(fctx->validators, vevent->validator, link);
4056	fctx->validator = NULL;
4057
4058	/*
4059	 * Destroy the validator early so that we can
4060	 * destroy the fctx if necessary.
4061	 */
4062	dns_validator_destroy(&vevent->validator);
4063	isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
4064
4065	negative = ISC_TF(vevent->rdataset == NULL);
4066
4067	sentresponse = ISC_TF((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0);
4068
4069	/*
4070	 * If shutting down, ignore the results.  Check to see if we're
4071	 * done waiting for validator completions and ADB pending events; if
4072	 * so, destroy the fctx.
4073	 */
4074	if (SHUTTINGDOWN(fctx) && !sentresponse) {
4075		isc_uint32_t bucketnum = fctx->bucketnum;
4076		isc_boolean_t bucket_empty;
4077		bucket_empty = maybe_destroy(fctx, ISC_TRUE);
4078		UNLOCK(&res->buckets[bucketnum].lock);
4079		if (bucket_empty)
4080			empty_bucket(res);
4081		goto cleanup_event;
4082	}
4083
4084	isc_stdtime_get(&now);
4085
4086	/*
4087	 * If chaining, we need to make sure that the right result code is
4088	 * returned, and that the rdatasets are bound.
4089	 */
4090	if (vevent->result == ISC_R_SUCCESS &&
4091	    !negative &&
4092	    vevent->rdataset != NULL &&
4093	    CHAINING(vevent->rdataset))
4094	{
4095		if (vevent->rdataset->type == dns_rdatatype_cname)
4096			eresult = DNS_R_CNAME;
4097		else {
4098			INSIST(vevent->rdataset->type == dns_rdatatype_dname);
4099			eresult = DNS_R_DNAME;
4100		}
4101		chaining = ISC_TRUE;
4102	} else
4103		chaining = ISC_FALSE;
4104
4105	/*
4106	 * Either we're not shutting down, or we are shutting down but want
4107	 * to cache the result anyway (if this was a validation started by
4108	 * a query with cd set)
4109	 */
4110
4111	hevent = ISC_LIST_HEAD(fctx->events);
4112	if (hevent != NULL) {
4113		if (!negative && !chaining &&
4114		    (fctx->type == dns_rdatatype_any ||
4115		     fctx->type == dns_rdatatype_rrsig ||
4116		     fctx->type == dns_rdatatype_sig)) {
4117			/*
4118			 * Don't bind rdatasets; the caller
4119			 * will iterate the node.
4120			 */
4121		} else {
4122			ardataset = hevent->rdataset;
4123			asigrdataset = hevent->sigrdataset;
4124		}
4125	}
4126
4127	if (vevent->result != ISC_R_SUCCESS) {
4128		FCTXTRACE("validation failed");
4129		inc_stats(res, dns_resstatscounter_valfail);
4130		fctx->valfail++;
4131		fctx->vresult = vevent->result;
4132		if (fctx->vresult != DNS_R_BROKENCHAIN) {
4133			result = ISC_R_NOTFOUND;
4134			if (vevent->rdataset != NULL)
4135				result = dns_db_findnode(fctx->cache,
4136							 vevent->name,
4137							 ISC_TRUE, &node);
4138			if (result == ISC_R_SUCCESS)
4139				(void)dns_db_deleterdataset(fctx->cache, node,
4140							     NULL,
4141							    vevent->type, 0);
4142			if (result == ISC_R_SUCCESS &&
4143			     vevent->sigrdataset != NULL)
4144				(void)dns_db_deleterdataset(fctx->cache, node,
4145							    NULL,
4146							    dns_rdatatype_rrsig,
4147							    vevent->type);
4148			if (result == ISC_R_SUCCESS)
4149				dns_db_detachnode(fctx->cache, &node);
4150		}
4151		if (fctx->vresult == DNS_R_BROKENCHAIN && !negative) {
4152			/*
4153			 * Cache the data as pending for later validation.
4154			 */
4155			result = ISC_R_NOTFOUND;
4156			if (vevent->rdataset != NULL)
4157				result = dns_db_findnode(fctx->cache,
4158							 vevent->name,
4159							 ISC_TRUE, &node);
4160			if (result == ISC_R_SUCCESS) {
4161				(void)dns_db_addrdataset(fctx->cache, node,
4162							 NULL, now,
4163							 vevent->rdataset, 0,
4164							 NULL);
4165			}
4166			if (result == ISC_R_SUCCESS &&
4167			    vevent->sigrdataset != NULL)
4168				(void)dns_db_addrdataset(fctx->cache, node,
4169							 NULL, now,
4170							 vevent->sigrdataset,
4171							 0, NULL);
4172			if (result == ISC_R_SUCCESS)
4173				dns_db_detachnode(fctx->cache, &node);
4174		}
4175		result = fctx->vresult;
4176		add_bad(fctx, addrinfo, result, badns_validation);
4177		isc_event_free(&event);
4178		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4179		INSIST(fctx->validator == NULL);
4180		fctx->validator = ISC_LIST_HEAD(fctx->validators);
4181		if (fctx->validator != NULL)
4182			dns_validator_send(fctx->validator);
4183		else if (sentresponse)
4184			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4185		else if (result == DNS_R_BROKENCHAIN) {
4186			isc_result_t tresult;
4187			isc_time_t expire;
4188			isc_interval_t i;
4189
4190			isc_interval_set(&i, DNS_BADCACHE_TTL(fctx), 0);
4191			tresult = isc_time_nowplusinterval(&expire, &i);
4192			if (negative &&
4193			    (fctx->type == dns_rdatatype_dnskey ||
4194			     fctx->type == dns_rdatatype_dlv ||
4195			     fctx->type == dns_rdatatype_ds) &&
4196			     tresult == ISC_R_SUCCESS)
4197				dns_resolver_addbadcache(res, &fctx->name,
4198							 fctx->type, &expire);
4199			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4200		} else
4201			fctx_try(fctx, ISC_TRUE, ISC_TRUE); /* Locks bucket. */
4202		return;
4203	}
4204
4205
4206	if (negative) {
4207		dns_rdatatype_t covers;
4208		FCTXTRACE("nonexistence validation OK");
4209
4210		inc_stats(res, dns_resstatscounter_valnegsuccess);
4211
4212		if (fctx->rmessage->rcode == dns_rcode_nxdomain)
4213			covers = dns_rdatatype_any;
4214		else
4215			covers = fctx->type;
4216
4217		result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE,
4218					 &node);
4219		if (result != ISC_R_SUCCESS)
4220			goto noanswer_response;
4221
4222		/*
4223		 * If we are asking for a SOA record set the cache time
4224		 * to zero to facilitate locating the containing zone of
4225		 * a arbitrary zone.
4226		 */
4227		ttl = res->view->maxncachettl;
4228		if (fctx->type == dns_rdatatype_soa &&
4229		    covers == dns_rdatatype_any && res->zero_no_soa_ttl)
4230			ttl = 0;
4231
4232		result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
4233					   covers, now, ttl, vevent->optout,
4234					   ardataset, &eresult);
4235		if (result != ISC_R_SUCCESS)
4236			goto noanswer_response;
4237		goto answer_response;
4238	} else
4239		inc_stats(res, dns_resstatscounter_valsuccess);
4240
4241	FCTXTRACE("validation OK");
4242
4243	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL) {
4244
4245		result = dns_rdataset_addnoqname(vevent->rdataset,
4246				   vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF]);
4247		RUNTIME_CHECK(result == ISC_R_SUCCESS);
4248		INSIST(vevent->sigrdataset != NULL);
4249		vevent->sigrdataset->ttl = vevent->rdataset->ttl;
4250		if (vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER] != NULL) {
4251			result = dns_rdataset_addclosest(vevent->rdataset,
4252				 vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER]);
4253			RUNTIME_CHECK(result == ISC_R_SUCCESS);
4254		}
4255	}
4256
4257	/*
4258	 * The data was already cached as pending data.
4259	 * Re-cache it as secure and bind the cached
4260	 * rdatasets to the first event on the fetch
4261	 * event list.
4262	 */
4263	result = dns_db_findnode(fctx->cache, vevent->name, ISC_TRUE, &node);
4264	if (result != ISC_R_SUCCESS)
4265		goto noanswer_response;
4266
4267	result = dns_db_addrdataset(fctx->cache, node, NULL, now,
4268				    vevent->rdataset, 0, ardataset);
4269	if (result != ISC_R_SUCCESS &&
4270	    result != DNS_R_UNCHANGED)
4271		goto noanswer_response;
4272	if (ardataset != NULL && NEGATIVE(ardataset)) {
4273		if (NXDOMAIN(ardataset))
4274			eresult = DNS_R_NCACHENXDOMAIN;
4275		else
4276			eresult = DNS_R_NCACHENXRRSET;
4277	} else if (vevent->sigrdataset != NULL) {
4278		result = dns_db_addrdataset(fctx->cache, node, NULL, now,
4279					    vevent->sigrdataset, 0,
4280					    asigrdataset);
4281		if (result != ISC_R_SUCCESS &&
4282		    result != DNS_R_UNCHANGED)
4283			goto noanswer_response;
4284	}
4285
4286	if (sentresponse) {
4287		isc_boolean_t bucket_empty = ISC_FALSE;
4288		/*
4289		 * If we only deferred the destroy because we wanted to cache
4290		 * the data, destroy now.
4291		 */
4292		dns_db_detachnode(fctx->cache, &node);
4293		if (SHUTTINGDOWN(fctx))
4294			bucket_empty = maybe_destroy(fctx, ISC_TRUE);
4295		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4296		if (bucket_empty)
4297			empty_bucket(res);
4298		goto cleanup_event;
4299	}
4300
4301	if (!ISC_LIST_EMPTY(fctx->validators)) {
4302		INSIST(!negative);
4303		INSIST(fctx->type == dns_rdatatype_any ||
4304		       fctx->type == dns_rdatatype_rrsig ||
4305		       fctx->type == dns_rdatatype_sig);
4306		/*
4307		 * Don't send a response yet - we have
4308		 * more rdatasets that still need to
4309		 * be validated.
4310		 */
4311		dns_db_detachnode(fctx->cache, &node);
4312		UNLOCK(&res->buckets[fctx->bucketnum].lock);
4313		dns_validator_send(ISC_LIST_HEAD(fctx->validators));
4314		goto cleanup_event;
4315	}
4316
4317 answer_response:
4318	/*
4319	 * Cache any NS/NSEC records that happened to be validated.
4320	 */
4321	result = dns_message_firstname(fctx->rmessage, DNS_SECTION_AUTHORITY);
4322	while (result == ISC_R_SUCCESS) {
4323		name = NULL;
4324		dns_message_currentname(fctx->rmessage, DNS_SECTION_AUTHORITY,
4325					&name);
4326		for (rdataset = ISC_LIST_HEAD(name->list);
4327		     rdataset != NULL;
4328		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
4329			if ((rdataset->type != dns_rdatatype_ns &&
4330			     rdataset->type != dns_rdatatype_nsec) ||
4331			    rdataset->trust != dns_trust_secure)
4332				continue;
4333			for (sigrdataset = ISC_LIST_HEAD(name->list);
4334			     sigrdataset != NULL;
4335			     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4336				if (sigrdataset->type != dns_rdatatype_rrsig ||
4337				    sigrdataset->covers != rdataset->type)
4338					continue;
4339				break;
4340			}
4341			if (sigrdataset == NULL ||
4342			    sigrdataset->trust != dns_trust_secure)
4343				continue;
4344			result = dns_db_findnode(fctx->cache, name, ISC_TRUE,
4345						 &nsnode);
4346			if (result != ISC_R_SUCCESS)
4347				continue;
4348
4349			result = dns_db_addrdataset(fctx->cache, nsnode, NULL,
4350						    now, rdataset, 0, NULL);
4351			if (result == ISC_R_SUCCESS)
4352				result = dns_db_addrdataset(fctx->cache, nsnode,
4353							    NULL, now,
4354							    sigrdataset, 0,
4355							    NULL);
4356			dns_db_detachnode(fctx->cache, &nsnode);
4357			if (result != ISC_R_SUCCESS)
4358				continue;
4359		}
4360		result = dns_message_nextname(fctx->rmessage,
4361					      DNS_SECTION_AUTHORITY);
4362	}
4363
4364	result = ISC_R_SUCCESS;
4365
4366	/*
4367	 * Respond with an answer, positive or negative,
4368	 * as opposed to an error.  'node' must be non-NULL.
4369	 */
4370
4371	fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4372
4373	if (hevent != NULL) {
4374		hevent->result = eresult;
4375		RUNTIME_CHECK(dns_name_copy(vevent->name,
4376			      dns_fixedname_name(&hevent->foundname), NULL)
4377			      == ISC_R_SUCCESS);
4378		dns_db_attach(fctx->cache, &hevent->db);
4379		dns_db_transfernode(fctx->cache, &node, &hevent->node);
4380		clone_results(fctx);
4381	}
4382
4383 noanswer_response:
4384	if (node != NULL)
4385		dns_db_detachnode(fctx->cache, &node);
4386
4387	UNLOCK(&res->buckets[fctx->bucketnum].lock);
4388	fctx_done(fctx, result, __LINE__); /* Locks bucket. */
4389
4390 cleanup_event:
4391	INSIST(node == NULL);
4392	isc_event_free(&event);
4393}
4394
4395static inline isc_result_t
4396cache_name(fetchctx_t *fctx, dns_name_t *name, dns_adbaddrinfo_t *addrinfo,
4397	   isc_stdtime_t now)
4398{
4399	dns_rdataset_t *rdataset, *sigrdataset;
4400	dns_rdataset_t *addedrdataset, *ardataset, *asigrdataset;
4401	dns_rdataset_t *valrdataset = NULL, *valsigrdataset = NULL;
4402	dns_dbnode_t *node, **anodep;
4403	dns_db_t **adbp;
4404	dns_name_t *aname;
4405	dns_resolver_t *res;
4406	isc_boolean_t need_validation, secure_domain, have_answer;
4407	isc_result_t result, eresult;
4408	dns_fetchevent_t *event;
4409	unsigned int options;
4410	isc_task_t *task;
4411	isc_boolean_t fail;
4412	unsigned int valoptions = 0;
4413
4414	/*
4415	 * The appropriate bucket lock must be held.
4416	 */
4417
4418	res = fctx->res;
4419	need_validation = ISC_FALSE;
4420	POST(need_validation);
4421	secure_domain = ISC_FALSE;
4422	have_answer = ISC_FALSE;
4423	eresult = ISC_R_SUCCESS;
4424	task = res->buckets[fctx->bucketnum].task;
4425
4426	/*
4427	 * Is DNSSEC validation required for this name?
4428	 */
4429	if (res->view->enablevalidation) {
4430		result = dns_view_issecuredomain(res->view, name,
4431						 &secure_domain);
4432		if (result != ISC_R_SUCCESS)
4433			return (result);
4434
4435		if (!secure_domain && res->view->dlv != NULL) {
4436			valoptions = DNS_VALIDATOR_DLV;
4437			secure_domain = ISC_TRUE;
4438		}
4439	}
4440
4441	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
4442		need_validation = ISC_FALSE;
4443	else
4444		need_validation = secure_domain;
4445
4446	adbp = NULL;
4447	aname = NULL;
4448	anodep = NULL;
4449	ardataset = NULL;
4450	asigrdataset = NULL;
4451	event = NULL;
4452	if ((name->attributes & DNS_NAMEATTR_ANSWER) != 0 &&
4453	    !need_validation) {
4454		have_answer = ISC_TRUE;
4455		event = ISC_LIST_HEAD(fctx->events);
4456		if (event != NULL) {
4457			adbp = &event->db;
4458			aname = dns_fixedname_name(&event->foundname);
4459			result = dns_name_copy(name, aname, NULL);
4460			if (result != ISC_R_SUCCESS)
4461				return (result);
4462			anodep = &event->node;
4463			/*
4464			 * If this is an ANY, SIG or RRSIG query, we're not
4465			 * going to return any rdatasets, unless we encountered
4466			 * a CNAME or DNAME as "the answer".  In this case,
4467			 * we're going to return DNS_R_CNAME or DNS_R_DNAME
4468			 * and we must set up the rdatasets.
4469			 */
4470			if ((fctx->type != dns_rdatatype_any &&
4471			     fctx->type != dns_rdatatype_rrsig &&
4472			     fctx->type != dns_rdatatype_sig) ||
4473			    (name->attributes & DNS_NAMEATTR_CHAINING) != 0) {
4474				ardataset = event->rdataset;
4475				asigrdataset = event->sigrdataset;
4476			}
4477		}
4478	}
4479
4480	/*
4481	 * Find or create the cache node.
4482	 */
4483	node = NULL;
4484	result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
4485	if (result != ISC_R_SUCCESS)
4486		return (result);
4487
4488	/*
4489	 * Cache or validate each cacheable rdataset.
4490	 */
4491	fail = ISC_TF((fctx->res->options & DNS_RESOLVER_CHECKNAMESFAIL) != 0);
4492	for (rdataset = ISC_LIST_HEAD(name->list);
4493	     rdataset != NULL;
4494	     rdataset = ISC_LIST_NEXT(rdataset, link)) {
4495		if (!CACHE(rdataset))
4496			continue;
4497		if (CHECKNAMES(rdataset)) {
4498			char namebuf[DNS_NAME_FORMATSIZE];
4499			char typebuf[DNS_RDATATYPE_FORMATSIZE];
4500			char classbuf[DNS_RDATATYPE_FORMATSIZE];
4501
4502			dns_name_format(name, namebuf, sizeof(namebuf));
4503			dns_rdatatype_format(rdataset->type, typebuf,
4504					     sizeof(typebuf));
4505			dns_rdataclass_format(rdataset->rdclass, classbuf,
4506					      sizeof(classbuf));
4507			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4508				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
4509				      "check-names %s %s/%s/%s",
4510				      fail ? "failure" : "warning",
4511				      namebuf, typebuf, classbuf);
4512			if (fail) {
4513				if (ANSWER(rdataset)) {
4514					dns_db_detachnode(fctx->cache, &node);
4515					return (DNS_R_BADNAME);
4516				}
4517				continue;
4518			}
4519		}
4520
4521		/*
4522		 * Enforce the configure maximum cache TTL.
4523		 */
4524		if (rdataset->ttl > res->view->maxcachettl)
4525			rdataset->ttl = res->view->maxcachettl;
4526
4527		/*
4528		 * If this RRset is in a secure domain, is in bailiwick,
4529		 * and is not glue, attempt DNSSEC validation.	(We do not
4530		 * attempt to validate glue or out-of-bailiwick data--even
4531		 * though there might be some performance benefit to doing
4532		 * so--because it makes it simpler and safer to ensure that
4533		 * records from a secure domain are only cached if validated
4534		 * within the context of a query to the domain that owns
4535		 * them.)
4536		 */
4537		if (secure_domain && rdataset->trust != dns_trust_glue &&
4538		    !EXTERNAL(rdataset)) {
4539			dns_trust_t trust;
4540
4541			/*
4542			 * RRSIGs are validated as part of validating the
4543			 * type they cover.
4544			 */
4545			if (rdataset->type == dns_rdatatype_rrsig)
4546				continue;
4547			/*
4548			 * Find the SIG for this rdataset, if we have it.
4549			 */
4550			for (sigrdataset = ISC_LIST_HEAD(name->list);
4551			     sigrdataset != NULL;
4552			     sigrdataset = ISC_LIST_NEXT(sigrdataset, link)) {
4553				if (sigrdataset->type == dns_rdatatype_rrsig &&
4554				    sigrdataset->covers == rdataset->type)
4555					break;
4556			}
4557			if (sigrdataset == NULL) {
4558				if (!ANSWER(rdataset) && need_validation) {
4559					/*
4560					 * Ignore non-answer rdatasets that
4561					 * are missing signatures.
4562					 */
4563					continue;
4564				}
4565			}
4566
4567			/*
4568			 * Normalize the rdataset and sigrdataset TTLs.
4569			 */
4570			if (sigrdataset != NULL) {
4571				rdataset->ttl = ISC_MIN(rdataset->ttl,
4572							sigrdataset->ttl);
4573				sigrdataset->ttl = rdataset->ttl;
4574			}
4575
4576			/*
4577			 * Cache this rdataset/sigrdataset pair as
4578			 * pending data.  Track whether it was additional
4579			 * or not.
4580			 */
4581			if (rdataset->trust == dns_trust_additional)
4582				trust = dns_trust_pending_additional;
4583			else
4584				trust = dns_trust_pending_answer;
4585
4586			rdataset->trust = trust;
4587			if (sigrdataset != NULL)
4588				sigrdataset->trust = trust;
4589			if (!need_validation || !ANSWER(rdataset)) {
4590				addedrdataset = ardataset;
4591				result = dns_db_addrdataset(fctx->cache, node,
4592							    NULL, now, rdataset,
4593							    0, addedrdataset);
4594				if (result == DNS_R_UNCHANGED) {
4595					result = ISC_R_SUCCESS;
4596					if (!need_validation &&
4597					    ardataset != NULL &&
4598					    NEGATIVE(ardataset)) {
4599						/*
4600						 * The answer in the cache is
4601						 * better than the answer we
4602						 * found, and is a negative
4603						 * cache entry, so we must set
4604						 * eresult appropriately.
4605						 */
4606						if (NXDOMAIN(ardataset))
4607							eresult =
4608							   DNS_R_NCACHENXDOMAIN;
4609						else
4610							eresult =
4611							   DNS_R_NCACHENXRRSET;
4612						/*
4613						 * We have a negative response
4614						 * from the cache so don't
4615						 * attempt to add the RRSIG
4616						 * rrset.
4617						 */
4618						continue;
4619					}
4620				}
4621				if (result != ISC_R_SUCCESS)
4622					break;
4623				if (sigrdataset != NULL) {
4624					addedrdataset = asigrdataset;
4625					result = dns_db_addrdataset(fctx->cache,
4626								node, NULL, now,
4627								sigrdataset, 0,
4628								addedrdataset);
4629					if (result == DNS_R_UNCHANGED)
4630						result = ISC_R_SUCCESS;
4631					if (result != ISC_R_SUCCESS)
4632						break;
4633				} else if (!ANSWER(rdataset))
4634					continue;
4635			}
4636
4637			if (ANSWER(rdataset) && need_validation) {
4638				if (fctx->type != dns_rdatatype_any &&
4639				    fctx->type != dns_rdatatype_rrsig &&
4640				    fctx->type != dns_rdatatype_sig) {
4641					/*
4642					 * This is The Answer.  We will
4643					 * validate it, but first we cache
4644					 * the rest of the response - it may
4645					 * contain useful keys.
4646					 */
4647					INSIST(valrdataset == NULL &&
4648					       valsigrdataset == NULL);
4649					valrdataset = rdataset;
4650					valsigrdataset = sigrdataset;
4651				} else {
4652					/*
4653					 * This is one of (potentially)
4654					 * multiple answers to an ANY
4655					 * or SIG query.  To keep things
4656					 * simple, we just start the
4657					 * validator right away rather
4658					 * than caching first and
4659					 * having to remember which
4660					 * rdatasets needed validation.
4661					 */
4662					result = valcreate(fctx, addrinfo,
4663							   name, rdataset->type,
4664							   rdataset,
4665							   sigrdataset,
4666							   valoptions, task);
4667					/*
4668					 * Defer any further validations.
4669					 * This prevents multiple validators
4670					 * from manipulating fctx->rmessage
4671					 * simultaneously.
4672					 */
4673					valoptions |= DNS_VALIDATOR_DEFER;
4674				}
4675			} else if (CHAINING(rdataset)) {
4676				if (rdataset->type == dns_rdatatype_cname)
4677					eresult = DNS_R_CNAME;
4678				else {
4679					INSIST(rdataset->type ==
4680					       dns_rdatatype_dname);
4681					eresult = DNS_R_DNAME;
4682				}
4683			}
4684		} else if (!EXTERNAL(rdataset)) {
4685			/*
4686			 * It's OK to cache this rdataset now.
4687			 */
4688			if (ANSWER(rdataset))
4689				addedrdataset = ardataset;
4690			else if (ANSWERSIG(rdataset))
4691				addedrdataset = asigrdataset;
4692			else
4693				addedrdataset = NULL;
4694			if (CHAINING(rdataset)) {
4695				if (rdataset->type == dns_rdatatype_cname)
4696					eresult = DNS_R_CNAME;
4697				else {
4698					INSIST(rdataset->type ==
4699					       dns_rdatatype_dname);
4700					eresult = DNS_R_DNAME;
4701				}
4702			}
4703			if (rdataset->trust == dns_trust_glue &&
4704			    (rdataset->type == dns_rdatatype_ns ||
4705			     (rdataset->type == dns_rdatatype_rrsig &&
4706			      rdataset->covers == dns_rdatatype_ns))) {
4707				/*
4708				 * If the trust level is 'dns_trust_glue'
4709				 * then we are adding data from a referral
4710				 * we got while executing the search algorithm.
4711				 * New referral data always takes precedence
4712				 * over the existing cache contents.
4713				 */
4714				options = DNS_DBADD_FORCE;
4715			} else
4716				options = 0;
4717			/*
4718			 * Now we can add the rdataset.
4719			 */
4720			result = dns_db_addrdataset(fctx->cache,
4721						    node, NULL, now,
4722						    rdataset,
4723						    options,
4724						    addedrdataset);
4725			if (result == DNS_R_UNCHANGED) {
4726				if (ANSWER(rdataset) &&
4727				    ardataset != NULL &&
4728				    NEGATIVE(ardataset)) {
4729					/*
4730					 * The answer in the cache is better
4731					 * than the answer we found, and is
4732					 * a negative cache entry, so we
4733					 * must set eresult appropriately.
4734					 */
4735					if (NXDOMAIN(ardataset))
4736						eresult = DNS_R_NCACHENXDOMAIN;
4737					else
4738						eresult = DNS_R_NCACHENXRRSET;
4739				}
4740				result = ISC_R_SUCCESS;
4741			} else if (result != ISC_R_SUCCESS)
4742				break;
4743		}
4744	}
4745
4746	if (valrdataset != NULL)
4747		result = valcreate(fctx, addrinfo, name, fctx->type,
4748				   valrdataset, valsigrdataset, valoptions,
4749				   task);
4750
4751	if (result == ISC_R_SUCCESS && have_answer) {
4752		fctx->attributes |= FCTX_ATTR_HAVEANSWER;
4753		if (event != NULL) {
4754			/*
4755			 * Negative results must be indicated in event->result.
4756			 */
4757			if (dns_rdataset_isassociated(event->rdataset) &&
4758			    NEGATIVE(event->rdataset)) {
4759				INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
4760				       eresult == DNS_R_NCACHENXRRSET);
4761			}
4762			event->result = eresult;
4763			dns_db_attach(fctx->cache, adbp);
4764			dns_db_transfernode(fctx->cache, &node, anodep);
4765			clone_results(fctx);
4766		}
4767	}
4768
4769	if (node != NULL)
4770		dns_db_detachnode(fctx->cache, &node);
4771
4772	return (result);
4773}
4774
4775static inline isc_result_t
4776cache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_stdtime_t now)
4777{
4778	isc_result_t result;
4779	dns_section_t section;
4780	dns_name_t *name;
4781
4782	FCTXTRACE("cache_message");
4783
4784	fctx->attributes &= ~FCTX_ATTR_WANTCACHE;
4785
4786	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4787
4788	for (section = DNS_SECTION_ANSWER;
4789	     section <= DNS_SECTION_ADDITIONAL;
4790	     section++) {
4791		result = dns_message_firstname(fctx->rmessage, section);
4792		while (result == ISC_R_SUCCESS) {
4793			name = NULL;
4794			dns_message_currentname(fctx->rmessage, section,
4795						&name);
4796			if ((name->attributes & DNS_NAMEATTR_CACHE) != 0) {
4797				result = cache_name(fctx, name, addrinfo, now);
4798				if (result != ISC_R_SUCCESS)
4799					break;
4800			}
4801			result = dns_message_nextname(fctx->rmessage, section);
4802		}
4803		if (result != ISC_R_NOMORE)
4804			break;
4805	}
4806	if (result == ISC_R_NOMORE)
4807		result = ISC_R_SUCCESS;
4808
4809	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4810
4811	return (result);
4812}
4813
4814/*
4815 * Do what dns_ncache_addoptout() does, and then compute an appropriate eresult.
4816 */
4817static isc_result_t
4818ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node,
4819		  dns_rdatatype_t covers, isc_stdtime_t now, dns_ttl_t maxttl,
4820		  isc_boolean_t optout, dns_rdataset_t *ardataset,
4821		  isc_result_t *eresultp)
4822{
4823	isc_result_t result;
4824	dns_rdataset_t rdataset;
4825
4826	if (ardataset == NULL) {
4827		dns_rdataset_init(&rdataset);
4828		ardataset = &rdataset;
4829	}
4830	result = dns_ncache_addoptout(message, cache, node, covers, now,
4831				     maxttl, optout, ardataset);
4832	if (result == DNS_R_UNCHANGED || result == ISC_R_SUCCESS) {
4833		/*
4834		 * If the cache now contains a negative entry and we
4835		 * care about whether it is DNS_R_NCACHENXDOMAIN or
4836		 * DNS_R_NCACHENXRRSET then extract it.
4837		 */
4838		if (NEGATIVE(ardataset)) {
4839			/*
4840			 * The cache data is a negative cache entry.
4841			 */
4842			if (NXDOMAIN(ardataset))
4843				*eresultp = DNS_R_NCACHENXDOMAIN;
4844			else
4845				*eresultp = DNS_R_NCACHENXRRSET;
4846		} else {
4847			/*
4848			 * Either we don't care about the nature of the
4849			 * cache rdataset (because no fetch is interested
4850			 * in the outcome), or the cache rdataset is not
4851			 * a negative cache entry.  Whichever case it is,
4852			 * we can return success.
4853			 *
4854			 * XXXRTH  There's a CNAME/DNAME problem here.
4855			 */
4856			*eresultp = ISC_R_SUCCESS;
4857		}
4858		result = ISC_R_SUCCESS;
4859	}
4860	if (ardataset == &rdataset && dns_rdataset_isassociated(ardataset))
4861		dns_rdataset_disassociate(ardataset);
4862
4863	return (result);
4864}
4865
4866static inline isc_result_t
4867ncache_message(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
4868	       dns_rdatatype_t covers, isc_stdtime_t now)
4869{
4870	isc_result_t result, eresult;
4871	dns_name_t *name;
4872	dns_resolver_t *res;
4873	dns_db_t **adbp;
4874	dns_dbnode_t *node, **anodep;
4875	dns_rdataset_t *ardataset;
4876	isc_boolean_t need_validation, secure_domain;
4877	dns_name_t *aname;
4878	dns_fetchevent_t *event;
4879	isc_uint32_t ttl;
4880	unsigned int valoptions = 0;
4881
4882	FCTXTRACE("ncache_message");
4883
4884	fctx->attributes &= ~FCTX_ATTR_WANTNCACHE;
4885
4886	res = fctx->res;
4887	need_validation = ISC_FALSE;
4888	POST(need_validation);
4889	secure_domain = ISC_FALSE;
4890	eresult = ISC_R_SUCCESS;
4891	name = &fctx->name;
4892	node = NULL;
4893
4894	/*
4895	 * XXXMPA remove when we follow cnames and adjust the setting
4896	 * of FCTX_ATTR_WANTNCACHE in noanswer_response().
4897	 */
4898	INSIST(fctx->rmessage->counts[DNS_SECTION_ANSWER] == 0);
4899
4900	/*
4901	 * Is DNSSEC validation required for this name?
4902	 */
4903	if (fctx->res->view->enablevalidation) {
4904		result = dns_view_issecuredomain(res->view, name,
4905						 &secure_domain);
4906		if (result != ISC_R_SUCCESS)
4907			return (result);
4908
4909		if (!secure_domain && res->view->dlv != NULL) {
4910			valoptions = DNS_VALIDATOR_DLV;
4911			secure_domain = ISC_TRUE;
4912		}
4913	}
4914
4915	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0)
4916		need_validation = ISC_FALSE;
4917	else
4918		need_validation = secure_domain;
4919
4920	if (secure_domain) {
4921		/*
4922		 * Mark all rdatasets as pending.
4923		 */
4924		dns_rdataset_t *trdataset;
4925		dns_name_t *tname;
4926
4927		result = dns_message_firstname(fctx->rmessage,
4928					       DNS_SECTION_AUTHORITY);
4929		while (result == ISC_R_SUCCESS) {
4930			tname = NULL;
4931			dns_message_currentname(fctx->rmessage,
4932						DNS_SECTION_AUTHORITY,
4933						&tname);
4934			for (trdataset = ISC_LIST_HEAD(tname->list);
4935			     trdataset != NULL;
4936			     trdataset = ISC_LIST_NEXT(trdataset, link))
4937				trdataset->trust = dns_trust_pending_answer;
4938			result = dns_message_nextname(fctx->rmessage,
4939						      DNS_SECTION_AUTHORITY);
4940		}
4941		if (result != ISC_R_NOMORE)
4942			return (result);
4943
4944	}
4945
4946	if (need_validation) {
4947		/*
4948		 * Do negative response validation.
4949		 */
4950		result = valcreate(fctx, addrinfo, name, fctx->type,
4951				   NULL, NULL, valoptions,
4952				   res->buckets[fctx->bucketnum].task);
4953		/*
4954		 * If validation is necessary, return now.  Otherwise continue
4955		 * to process the message, letting the validation complete
4956		 * in its own good time.
4957		 */
4958		return (result);
4959	}
4960
4961	LOCK(&res->buckets[fctx->bucketnum].lock);
4962
4963	adbp = NULL;
4964	aname = NULL;
4965	anodep = NULL;
4966	ardataset = NULL;
4967	if (!HAVE_ANSWER(fctx)) {
4968		event = ISC_LIST_HEAD(fctx->events);
4969		if (event != NULL) {
4970			adbp = &event->db;
4971			aname = dns_fixedname_name(&event->foundname);
4972			result = dns_name_copy(name, aname, NULL);
4973			if (result != ISC_R_SUCCESS)
4974				goto unlock;
4975			anodep = &event->node;
4976			ardataset = event->rdataset;
4977		}
4978	} else
4979		event = NULL;
4980
4981	result = dns_db_findnode(fctx->cache, name, ISC_TRUE, &node);
4982	if (result != ISC_R_SUCCESS)
4983		goto unlock;
4984
4985	/*
4986	 * If we are asking for a SOA record set the cache time
4987	 * to zero to facilitate locating the containing zone of
4988	 * a arbitrary zone.
4989	 */
4990	ttl = fctx->res->view->maxncachettl;
4991	if (fctx->type == dns_rdatatype_soa &&
4992	    covers == dns_rdatatype_any &&
4993	    fctx->res->zero_no_soa_ttl)
4994		ttl = 0;
4995
4996	result = ncache_adderesult(fctx->rmessage, fctx->cache, node,
4997				   covers, now, ttl, ISC_FALSE,
4998				   ardataset, &eresult);
4999	if (result != ISC_R_SUCCESS)
5000		goto unlock;
5001
5002	if (!HAVE_ANSWER(fctx)) {
5003		fctx->attributes |= FCTX_ATTR_HAVEANSWER;
5004		if (event != NULL) {
5005			event->result = eresult;
5006			dns_db_attach(fctx->cache, adbp);
5007			dns_db_transfernode(fctx->cache, &node, anodep);
5008			clone_results(fctx);
5009		}
5010	}
5011
5012 unlock:
5013	UNLOCK(&res->buckets[fctx->bucketnum].lock);
5014
5015	if (node != NULL)
5016		dns_db_detachnode(fctx->cache, &node);
5017
5018	return (result);
5019}
5020
5021static inline void
5022mark_related(dns_name_t *name, dns_rdataset_t *rdataset,
5023	     isc_boolean_t external, isc_boolean_t gluing)
5024{
5025	name->attributes |= DNS_NAMEATTR_CACHE;
5026	if (gluing) {
5027		rdataset->trust = dns_trust_glue;
5028		/*
5029		 * Glue with 0 TTL causes problems.  We force the TTL to
5030		 * 1 second to prevent this.
5031		 */
5032		if (rdataset->ttl == 0)
5033			rdataset->ttl = 1;
5034	} else
5035		rdataset->trust = dns_trust_additional;
5036	/*
5037	 * Avoid infinite loops by only marking new rdatasets.
5038	 */
5039	if (!CACHE(rdataset)) {
5040		name->attributes |= DNS_NAMEATTR_CHASE;
5041		rdataset->attributes |= DNS_RDATASETATTR_CHASE;
5042	}
5043	rdataset->attributes |= DNS_RDATASETATTR_CACHE;
5044	if (external)
5045		rdataset->attributes |= DNS_RDATASETATTR_EXTERNAL;
5046}
5047
5048static isc_result_t
5049check_section(void *arg, dns_name_t *addname, dns_rdatatype_t type,
5050	      dns_section_t section)
5051{
5052	fetchctx_t *fctx = arg;
5053	isc_result_t result;
5054	dns_name_t *name;
5055	dns_rdataset_t *rdataset;
5056	isc_boolean_t external;
5057	dns_rdatatype_t rtype;
5058	isc_boolean_t gluing;
5059
5060	REQUIRE(VALID_FCTX(fctx));
5061
5062#if CHECK_FOR_GLUE_IN_ANSWER
5063	if (section == DNS_SECTION_ANSWER && type != dns_rdatatype_a)
5064		return (ISC_R_SUCCESS);
5065#endif
5066
5067	if (GLUING(fctx))
5068		gluing = ISC_TRUE;
5069	else
5070		gluing = ISC_FALSE;
5071	name = NULL;
5072	rdataset = NULL;
5073	result = dns_message_findname(fctx->rmessage, section, addname,
5074				      dns_rdatatype_any, 0, &name, NULL);
5075	if (result == ISC_R_SUCCESS) {
5076		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
5077		if (type == dns_rdatatype_a) {
5078			for (rdataset = ISC_LIST_HEAD(name->list);
5079			     rdataset != NULL;
5080			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5081				if (rdataset->type == dns_rdatatype_rrsig)
5082					rtype = rdataset->covers;
5083				else
5084					rtype = rdataset->type;
5085				if (rtype == dns_rdatatype_a ||
5086				    rtype == dns_rdatatype_aaaa)
5087					mark_related(name, rdataset, external,
5088						     gluing);
5089			}
5090		} else {
5091			result = dns_message_findtype(name, type, 0,
5092						      &rdataset);
5093			if (result == ISC_R_SUCCESS) {
5094				mark_related(name, rdataset, external, gluing);
5095				/*
5096				 * Do we have its SIG too?
5097				 */
5098				rdataset = NULL;
5099				result = dns_message_findtype(name,
5100						      dns_rdatatype_rrsig,
5101						      type, &rdataset);
5102				if (result == ISC_R_SUCCESS)
5103					mark_related(name, rdataset, external,
5104						     gluing);
5105			}
5106		}
5107	}
5108
5109	return (ISC_R_SUCCESS);
5110}
5111
5112static isc_result_t
5113check_related(void *arg, dns_name_t *addname, dns_rdatatype_t type) {
5114	return (check_section(arg, addname, type, DNS_SECTION_ADDITIONAL));
5115}
5116
5117#ifndef CHECK_FOR_GLUE_IN_ANSWER
5118#define CHECK_FOR_GLUE_IN_ANSWER 0
5119#endif
5120#if CHECK_FOR_GLUE_IN_ANSWER
5121static isc_result_t
5122check_answer(void *arg, dns_name_t *addname, dns_rdatatype_t type) {
5123	return (check_section(arg, addname, type, DNS_SECTION_ANSWER));
5124}
5125#endif
5126
5127static void
5128chase_additional(fetchctx_t *fctx) {
5129	isc_boolean_t rescan;
5130	dns_section_t section = DNS_SECTION_ADDITIONAL;
5131	isc_result_t result;
5132
5133 again:
5134	rescan = ISC_FALSE;
5135
5136	for (result = dns_message_firstname(fctx->rmessage, section);
5137	     result == ISC_R_SUCCESS;
5138	     result = dns_message_nextname(fctx->rmessage, section)) {
5139		dns_name_t *name = NULL;
5140		dns_rdataset_t *rdataset;
5141		dns_message_currentname(fctx->rmessage, DNS_SECTION_ADDITIONAL,
5142					&name);
5143		if ((name->attributes & DNS_NAMEATTR_CHASE) == 0)
5144			continue;
5145		name->attributes &= ~DNS_NAMEATTR_CHASE;
5146		for (rdataset = ISC_LIST_HEAD(name->list);
5147		     rdataset != NULL;
5148		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5149			if (CHASE(rdataset)) {
5150				rdataset->attributes &= ~DNS_RDATASETATTR_CHASE;
5151				(void)dns_rdataset_additionaldata(rdataset,
5152								  check_related,
5153								  fctx);
5154				rescan = ISC_TRUE;
5155			}
5156		}
5157	}
5158	if (rescan)
5159		goto again;
5160}
5161
5162static inline isc_result_t
5163cname_target(dns_rdataset_t *rdataset, dns_name_t *tname) {
5164	isc_result_t result;
5165	dns_rdata_t rdata = DNS_RDATA_INIT;
5166	dns_rdata_cname_t cname;
5167
5168	result = dns_rdataset_first(rdataset);
5169	if (result != ISC_R_SUCCESS)
5170		return (result);
5171	dns_rdataset_current(rdataset, &rdata);
5172	result = dns_rdata_tostruct(&rdata, &cname, NULL);
5173	if (result != ISC_R_SUCCESS)
5174		return (result);
5175	dns_name_init(tname, NULL);
5176	dns_name_clone(&cname.cname, tname);
5177	dns_rdata_freestruct(&cname);
5178
5179	return (ISC_R_SUCCESS);
5180}
5181
5182static inline isc_result_t
5183dname_target(fetchctx_t *fctx, dns_rdataset_t *rdataset, dns_name_t *qname,
5184	     dns_name_t *oname, dns_fixedname_t *fixeddname)
5185{
5186	isc_result_t result;
5187	dns_rdata_t rdata = DNS_RDATA_INIT;
5188	unsigned int nlabels;
5189	int order;
5190	dns_namereln_t namereln;
5191	dns_rdata_dname_t dname;
5192	dns_fixedname_t prefix;
5193
5194	/*
5195	 * Get the target name of the DNAME.
5196	 */
5197	result = dns_rdataset_first(rdataset);
5198	if (result != ISC_R_SUCCESS)
5199		return (result);
5200	dns_rdataset_current(rdataset, &rdata);
5201	result = dns_rdata_tostruct(&rdata, &dname, NULL);
5202	if (result != ISC_R_SUCCESS)
5203		return (result);
5204
5205	/*
5206	 * Get the prefix of qname.
5207	 */
5208	namereln = dns_name_fullcompare(qname, oname, &order, &nlabels);
5209	if (namereln != dns_namereln_subdomain) {
5210		char qbuf[DNS_NAME_FORMATSIZE];
5211		char obuf[DNS_NAME_FORMATSIZE];
5212
5213		dns_rdata_freestruct(&dname);
5214		dns_name_format(qname, qbuf, sizeof(qbuf));
5215		dns_name_format(oname, obuf, sizeof(obuf));
5216		log_formerr(fctx, "unrelated DNAME in answer: "
5217				   "%s is not in %s", qbuf, obuf);
5218		return (DNS_R_FORMERR);
5219	}
5220	dns_fixedname_init(&prefix);
5221	dns_name_split(qname, nlabels, dns_fixedname_name(&prefix), NULL);
5222	dns_fixedname_init(fixeddname);
5223	result = dns_name_concatenate(dns_fixedname_name(&prefix),
5224				      &dname.dname,
5225				      dns_fixedname_name(fixeddname), NULL);
5226	dns_rdata_freestruct(&dname);
5227	return (result);
5228}
5229
5230static isc_boolean_t
5231is_answeraddress_allowed(dns_view_t *view, dns_name_t *name,
5232			 dns_rdataset_t *rdataset)
5233{
5234	isc_result_t result;
5235	dns_rdata_t rdata = DNS_RDATA_INIT;
5236	struct in_addr ina;
5237	struct in6_addr in6a;
5238	isc_netaddr_t netaddr;
5239	char addrbuf[ISC_NETADDR_FORMATSIZE];
5240	char namebuf[DNS_NAME_FORMATSIZE];
5241	char classbuf[64];
5242	char typebuf[64];
5243	int match;
5244
5245	/* By default, we allow any addresses. */
5246	if (view->denyansweracl == NULL)
5247		return (ISC_TRUE);
5248
5249	/*
5250	 * If the owner name matches one in the exclusion list, either exactly
5251	 * or partially, allow it.
5252	 */
5253	if (view->answeracl_exclude != NULL) {
5254		dns_rbtnode_t *node = NULL;
5255
5256		result = dns_rbt_findnode(view->answeracl_exclude, name, NULL,
5257					  &node, NULL, 0, NULL, NULL);
5258
5259		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
5260			return (ISC_TRUE);
5261	}
5262
5263	/*
5264	 * Otherwise, search the filter list for a match for each address
5265	 * record.  If a match is found, the address should be filtered,
5266	 * so should the entire answer.
5267	 */
5268	for (result = dns_rdataset_first(rdataset);
5269	     result == ISC_R_SUCCESS;
5270	     result = dns_rdataset_next(rdataset)) {
5271		dns_rdata_reset(&rdata);
5272		dns_rdataset_current(rdataset, &rdata);
5273		if (rdataset->type == dns_rdatatype_a) {
5274			INSIST(rdata.length == sizeof(ina.s_addr));
5275			memcpy(&ina.s_addr, rdata.data, sizeof(ina.s_addr));
5276			isc_netaddr_fromin(&netaddr, &ina);
5277		} else {
5278			INSIST(rdata.length == sizeof(in6a.s6_addr));
5279			memcpy(in6a.s6_addr, rdata.data, sizeof(in6a.s6_addr));
5280			isc_netaddr_fromin6(&netaddr, &in6a);
5281		}
5282
5283		result = dns_acl_match(&netaddr, NULL, view->denyansweracl,
5284				       &view->aclenv, &match, NULL);
5285
5286		if (result == ISC_R_SUCCESS && match > 0) {
5287			isc_netaddr_format(&netaddr, addrbuf, sizeof(addrbuf));
5288			dns_name_format(name, namebuf, sizeof(namebuf));
5289			dns_rdatatype_format(rdataset->type, typebuf,
5290					     sizeof(typebuf));
5291			dns_rdataclass_format(rdataset->rdclass, classbuf,
5292					      sizeof(classbuf));
5293			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5294				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5295				      "answer address %s denied for %s/%s/%s",
5296				      addrbuf, namebuf, typebuf, classbuf);
5297			return (ISC_FALSE);
5298		}
5299	}
5300
5301	return (ISC_TRUE);
5302}
5303
5304static isc_boolean_t
5305is_answertarget_allowed(dns_view_t *view, dns_name_t *name,
5306			dns_rdatatype_t type, dns_name_t *tname,
5307			dns_name_t *domain)
5308{
5309	isc_result_t result;
5310	dns_rbtnode_t *node = NULL;
5311	char qnamebuf[DNS_NAME_FORMATSIZE];
5312	char tnamebuf[DNS_NAME_FORMATSIZE];
5313	char classbuf[64];
5314	char typebuf[64];
5315
5316	/* By default, we allow any target name. */
5317	if (view->denyanswernames == NULL)
5318		return (ISC_TRUE);
5319
5320	/*
5321	 * If the owner name matches one in the exclusion list, either exactly
5322	 * or partially, allow it.
5323	 */
5324	if (view->answernames_exclude != NULL) {
5325		result = dns_rbt_findnode(view->answernames_exclude, name, NULL,
5326					  &node, NULL, 0, NULL, NULL);
5327		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
5328			return (ISC_TRUE);
5329	}
5330
5331	/*
5332	 * If the target name is a subdomain of the search domain, allow it.
5333	 */
5334	if (dns_name_issubdomain(tname, domain))
5335		return (ISC_TRUE);
5336
5337	/*
5338	 * Otherwise, apply filters.
5339	 */
5340	result = dns_rbt_findnode(view->denyanswernames, tname, NULL, &node,
5341				  NULL, 0, NULL, NULL);
5342	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
5343		dns_name_format(name, qnamebuf, sizeof(qnamebuf));
5344		dns_name_format(tname, tnamebuf, sizeof(tnamebuf));
5345		dns_rdatatype_format(type, typebuf, sizeof(typebuf));
5346		dns_rdataclass_format(view->rdclass, classbuf,
5347				      sizeof(classbuf));
5348		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5349			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5350			      "%s target %s denied for %s/%s",
5351			      typebuf, tnamebuf, qnamebuf, classbuf);
5352		return (ISC_FALSE);
5353	}
5354
5355	return (ISC_TRUE);
5356}
5357
5358static void
5359trim_ns_ttl(fetchctx_t *fctx, dns_name_t *name, dns_rdataset_t *rdataset) {
5360	char ns_namebuf[DNS_NAME_FORMATSIZE];
5361	char namebuf[DNS_NAME_FORMATSIZE];
5362	char tbuf[DNS_RDATATYPE_FORMATSIZE];
5363
5364	if (fctx->ns_ttl_ok && rdataset->ttl > fctx->ns_ttl) {
5365		dns_name_format(name, ns_namebuf, sizeof(ns_namebuf));
5366		dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
5367		dns_rdatatype_format(fctx->type, tbuf, sizeof(tbuf));
5368
5369		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5370			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
5371			      "fctx %p: trimming ttl of %s/NS for %s/%s: "
5372			      "%u -> %u", fctx, ns_namebuf, namebuf, tbuf,
5373			      rdataset->ttl, fctx->ns_ttl);
5374		rdataset->ttl = fctx->ns_ttl;
5375	}
5376}
5377
5378/*
5379 * Handle a no-answer response (NXDOMAIN, NXRRSET, or referral).
5380 * If look_in_options has LOOK_FOR_NS_IN_ANSWER then we look in the answer
5381 * section for the NS RRset if the query type is NS; if it has
5382 * LOOK_FOR_GLUE_IN_ANSWER we look for glue incorrectly returned in the answer
5383 * section for A and AAAA queries.
5384 */
5385#define LOOK_FOR_NS_IN_ANSWER 0x1
5386#define LOOK_FOR_GLUE_IN_ANSWER 0x2
5387
5388static isc_result_t
5389noanswer_response(fetchctx_t *fctx, dns_name_t *oqname,
5390		  unsigned int look_in_options)
5391{
5392	isc_result_t result;
5393	dns_message_t *message;
5394	dns_name_t *name, *qname, *ns_name, *soa_name, *ds_name;
5395	dns_rdataset_t *rdataset, *ns_rdataset;
5396	isc_boolean_t aa, negative_response;
5397	dns_rdatatype_t type;
5398	dns_section_t section;
5399
5400	FCTXTRACE("noanswer_response");
5401
5402	if ((look_in_options & LOOK_FOR_NS_IN_ANSWER) != 0) {
5403		INSIST(fctx->type == dns_rdatatype_ns);
5404		section = DNS_SECTION_ANSWER;
5405	} else
5406		section = DNS_SECTION_AUTHORITY;
5407
5408	message = fctx->rmessage;
5409
5410	/*
5411	 * Setup qname.
5412	 */
5413	if (oqname == NULL) {
5414		/*
5415		 * We have a normal, non-chained negative response or
5416		 * referral.
5417		 */
5418		if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
5419			aa = ISC_TRUE;
5420		else
5421			aa = ISC_FALSE;
5422		qname = &fctx->name;
5423	} else {
5424		/*
5425		 * We're being invoked by answer_response() after it has
5426		 * followed a CNAME/DNAME chain.
5427		 */
5428		qname = oqname;
5429		aa = ISC_FALSE;
5430		/*
5431		 * If the current qname is not a subdomain of the query
5432		 * domain, there's no point in looking at the authority
5433		 * section without doing DNSSEC validation.
5434		 *
5435		 * Until we do that validation, we'll just return success
5436		 * in this case.
5437		 */
5438		if (!dns_name_issubdomain(qname, &fctx->domain))
5439			return (ISC_R_SUCCESS);
5440	}
5441
5442	/*
5443	 * We have to figure out if this is a negative response, or a
5444	 * referral.
5445	 */
5446
5447	/*
5448	 * Sometimes we can tell if its a negative response by looking at
5449	 * the message header.
5450	 */
5451	negative_response = ISC_FALSE;
5452	if (message->rcode == dns_rcode_nxdomain ||
5453	    (message->counts[DNS_SECTION_ANSWER] == 0 &&
5454	     message->counts[DNS_SECTION_AUTHORITY] == 0))
5455		negative_response = ISC_TRUE;
5456
5457	/*
5458	 * Process the authority section.
5459	 */
5460	ns_name = NULL;
5461	ns_rdataset = NULL;
5462	soa_name = NULL;
5463	ds_name = NULL;
5464	result = dns_message_firstname(message, section);
5465	while (result == ISC_R_SUCCESS) {
5466		name = NULL;
5467		dns_message_currentname(message, section, &name);
5468		if (dns_name_issubdomain(name, &fctx->domain)) {
5469			/*
5470			 * Look for NS/SOA RRsets first.
5471			 */
5472			for (rdataset = ISC_LIST_HEAD(name->list);
5473			     rdataset != NULL;
5474			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5475				type = rdataset->type;
5476				if (type == dns_rdatatype_rrsig)
5477					type = rdataset->covers;
5478				if (((type == dns_rdatatype_ns ||
5479				      type == dns_rdatatype_soa) &&
5480				     !dns_name_issubdomain(qname, name))) {
5481					char qbuf[DNS_NAME_FORMATSIZE];
5482					char nbuf[DNS_NAME_FORMATSIZE];
5483					char tbuf[DNS_RDATATYPE_FORMATSIZE];
5484					dns_rdatatype_format(fctx->type, tbuf,
5485							     sizeof(tbuf));
5486					dns_name_format(name, nbuf,
5487							     sizeof(nbuf));
5488					dns_name_format(qname, qbuf,
5489							     sizeof(qbuf));
5490					log_formerr(fctx,
5491						    "unrelated %s %s in "
5492						    "%s authority section",
5493						    tbuf, qbuf, nbuf);
5494					return (DNS_R_FORMERR);
5495				}
5496				if (type == dns_rdatatype_ns) {
5497					/*
5498					 * NS or RRSIG NS.
5499					 *
5500					 * Only one set of NS RRs is allowed.
5501					 */
5502					if (rdataset->type ==
5503					    dns_rdatatype_ns) {
5504						if (ns_name != NULL &&
5505						    name != ns_name) {
5506							log_formerr(fctx,
5507								"multiple NS "
5508								"RRsets in "
5509								"authority "
5510								"section");
5511							return (DNS_R_FORMERR);
5512						}
5513						ns_name = name;
5514						ns_rdataset = rdataset;
5515					}
5516					name->attributes |=
5517						DNS_NAMEATTR_CACHE;
5518					rdataset->attributes |=
5519						DNS_RDATASETATTR_CACHE;
5520					rdataset->trust = dns_trust_glue;
5521				}
5522				if (type == dns_rdatatype_soa) {
5523					/*
5524					 * SOA, or RRSIG SOA.
5525					 *
5526					 * Only one SOA is allowed.
5527					 */
5528					if (rdataset->type ==
5529					    dns_rdatatype_soa) {
5530						if (soa_name != NULL &&
5531						    name != soa_name) {
5532							log_formerr(fctx,
5533								"multiple SOA "
5534								"RRs in "
5535								"authority "
5536								"section");
5537							return (DNS_R_FORMERR);
5538						}
5539						soa_name = name;
5540					}
5541					name->attributes |=
5542						DNS_NAMEATTR_NCACHE;
5543					rdataset->attributes |=
5544						DNS_RDATASETATTR_NCACHE;
5545					if (aa)
5546						rdataset->trust =
5547						    dns_trust_authauthority;
5548					else if (ISFORWARDER(fctx->addrinfo))
5549						rdataset->trust =
5550							dns_trust_answer;
5551					else
5552						rdataset->trust =
5553							dns_trust_additional;
5554				}
5555			}
5556		}
5557		result = dns_message_nextname(message, section);
5558		if (result == ISC_R_NOMORE)
5559			break;
5560		else if (result != ISC_R_SUCCESS)
5561			return (result);
5562	}
5563
5564	log_ns_ttl(fctx, "noanswer_response");
5565
5566	if (ns_rdataset != NULL && dns_name_equal(&fctx->domain, ns_name) &&
5567	    !dns_name_equal(ns_name, dns_rootname))
5568		trim_ns_ttl(fctx, ns_name, ns_rdataset);
5569
5570	/*
5571	 * A negative response has a SOA record (Type 2)
5572	 * and a optional NS RRset (Type 1) or it has neither
5573	 * a SOA or a NS RRset (Type 3, handled above) or
5574	 * rcode is NXDOMAIN (handled above) in which case
5575	 * the NS RRset is allowed (Type 4).
5576	 */
5577	if (soa_name != NULL)
5578		negative_response = ISC_TRUE;
5579
5580	result = dns_message_firstname(message, section);
5581	while (result == ISC_R_SUCCESS) {
5582		name = NULL;
5583		dns_message_currentname(message, section, &name);
5584		if (dns_name_issubdomain(name, &fctx->domain)) {
5585			for (rdataset = ISC_LIST_HEAD(name->list);
5586			     rdataset != NULL;
5587			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5588				type = rdataset->type;
5589				if (type == dns_rdatatype_rrsig)
5590					type = rdataset->covers;
5591				if (type == dns_rdatatype_nsec ||
5592				    type == dns_rdatatype_nsec3) {
5593					/*
5594					 * NSEC or RRSIG NSEC.
5595					 */
5596					if (negative_response) {
5597						name->attributes |=
5598							DNS_NAMEATTR_NCACHE;
5599						rdataset->attributes |=
5600							DNS_RDATASETATTR_NCACHE;
5601					} else if (type == dns_rdatatype_nsec) {
5602						name->attributes |=
5603							DNS_NAMEATTR_CACHE;
5604						rdataset->attributes |=
5605							DNS_RDATASETATTR_CACHE;
5606					}
5607					if (aa)
5608						rdataset->trust =
5609						    dns_trust_authauthority;
5610					else if (ISFORWARDER(fctx->addrinfo))
5611						rdataset->trust =
5612							dns_trust_answer;
5613					else
5614						rdataset->trust =
5615							dns_trust_additional;
5616					/*
5617					 * No additional data needs to be
5618					 * marked.
5619					 */
5620				} else if (type == dns_rdatatype_ds) {
5621					/*
5622					 * DS or SIG DS.
5623					 *
5624					 * These should only be here if
5625					 * this is a referral, and there
5626					 * should only be one DS RRset.
5627					 */
5628					if (ns_name == NULL) {
5629						log_formerr(fctx,
5630							    "DS with no "
5631							    "referral");
5632						return (DNS_R_FORMERR);
5633					}
5634					if (rdataset->type ==
5635					    dns_rdatatype_ds) {
5636						if (ds_name != NULL &&
5637						    name != ds_name) {
5638							log_formerr(fctx,
5639								"DS doesn't "
5640								"match "
5641								"referral "
5642								"(NS)");
5643							return (DNS_R_FORMERR);
5644						}
5645						ds_name = name;
5646					}
5647					name->attributes |=
5648						DNS_NAMEATTR_CACHE;
5649					rdataset->attributes |=
5650						DNS_RDATASETATTR_CACHE;
5651					if (aa)
5652						rdataset->trust =
5653						    dns_trust_authauthority;
5654					else if (ISFORWARDER(fctx->addrinfo))
5655						rdataset->trust =
5656							dns_trust_answer;
5657					else
5658						rdataset->trust =
5659							dns_trust_additional;
5660				}
5661			}
5662		}
5663		result = dns_message_nextname(message, section);
5664		if (result == ISC_R_NOMORE)
5665			break;
5666		else if (result != ISC_R_SUCCESS)
5667			return (result);
5668	}
5669
5670	/*
5671	 * Trigger lookups for DNS nameservers.
5672	 */
5673	if (negative_response && message->rcode == dns_rcode_noerror &&
5674	    fctx->type == dns_rdatatype_ds && soa_name != NULL &&
5675	    dns_name_equal(soa_name, qname) &&
5676	    !dns_name_equal(qname, dns_rootname))
5677		return (DNS_R_CHASEDSSERVERS);
5678
5679	/*
5680	 * Did we find anything?
5681	 */
5682	if (!negative_response && ns_name == NULL) {
5683		/*
5684		 * Nope.
5685		 */
5686		if (oqname != NULL) {
5687			/*
5688			 * We've already got a partial CNAME/DNAME chain,
5689			 * and haven't found else anything useful here, but
5690			 * no error has occurred since we have an answer.
5691			 */
5692			return (ISC_R_SUCCESS);
5693		} else {
5694			/*
5695			 * The responder is insane.
5696			 */
5697			log_formerr(fctx, "invalid response");
5698			return (DNS_R_FORMERR);
5699		}
5700	}
5701
5702	/*
5703	 * If we found both NS and SOA, they should be the same name.
5704	 */
5705	if (ns_name != NULL && soa_name != NULL && ns_name != soa_name) {
5706		log_formerr(fctx, "NS/SOA mismatch");
5707		return (DNS_R_FORMERR);
5708	}
5709
5710	/*
5711	 * Do we have a referral?  (We only want to follow a referral if
5712	 * we're not following a chain.)
5713	 */
5714	if (!negative_response && ns_name != NULL && oqname == NULL) {
5715		/*
5716		 * We already know ns_name is a subdomain of fctx->domain.
5717		 * If ns_name is equal to fctx->domain, we're not making
5718		 * progress.  We return DNS_R_FORMERR so that we'll keep
5719		 * trying other servers.
5720		 */
5721		if (dns_name_equal(ns_name, &fctx->domain)) {
5722			log_formerr(fctx, "non-improving referral");
5723			return (DNS_R_FORMERR);
5724		}
5725
5726		/*
5727		 * If the referral name is not a parent of the query
5728		 * name, consider the responder insane.
5729		 */
5730		if (! dns_name_issubdomain(&fctx->name, ns_name)) {
5731			/* Logged twice */
5732			log_formerr(fctx, "referral to non-parent");
5733			FCTXTRACE("referral to non-parent");
5734			return (DNS_R_FORMERR);
5735		}
5736
5737		/*
5738		 * Mark any additional data related to this rdataset.
5739		 * It's important that we do this before we change the
5740		 * query domain.
5741		 */
5742		INSIST(ns_rdataset != NULL);
5743		fctx->attributes |= FCTX_ATTR_GLUING;
5744		(void)dns_rdataset_additionaldata(ns_rdataset, check_related,
5745						  fctx);
5746#if CHECK_FOR_GLUE_IN_ANSWER
5747		/*
5748		 * Look in the answer section for "glue" that is incorrectly
5749		 * returned as a answer.  This is needed if the server also
5750		 * minimizes the response size by not adding records to the
5751		 * additional section that are in the answer section or if
5752		 * the record gets dropped due to message size constraints.
5753		 */
5754		if ((look_in_options & LOOK_FOR_GLUE_IN_ANSWER) != 0 &&
5755		    (fctx->type == dns_rdatatype_aaaa ||
5756		     fctx->type == dns_rdatatype_a))
5757			(void)dns_rdataset_additionaldata(ns_rdataset,
5758							  check_answer, fctx);
5759#endif
5760		fctx->attributes &= ~FCTX_ATTR_GLUING;
5761		/*
5762		 * NS rdatasets with 0 TTL cause problems.
5763		 * dns_view_findzonecut() will not find them when we
5764		 * try to follow the referral, and we'll SERVFAIL
5765		 * because the best nameservers are now above QDOMAIN.
5766		 * We force the TTL to 1 second to prevent this.
5767		 */
5768		if (ns_rdataset->ttl == 0)
5769			ns_rdataset->ttl = 1;
5770		/*
5771		 * Set the current query domain to the referral name.
5772		 *
5773		 * XXXRTH  We should check if we're in forward-only mode, and
5774		 *		if so we should bail out.
5775		 */
5776		INSIST(dns_name_countlabels(&fctx->domain) > 0);
5777		dns_name_free(&fctx->domain, fctx->mctx);
5778		if (dns_rdataset_isassociated(&fctx->nameservers))
5779			dns_rdataset_disassociate(&fctx->nameservers);
5780		dns_name_init(&fctx->domain, NULL);
5781		result = dns_name_dup(ns_name, fctx->mctx, &fctx->domain);
5782		if (result != ISC_R_SUCCESS)
5783			return (result);
5784		fctx->attributes |= FCTX_ATTR_WANTCACHE;
5785		fctx->ns_ttl_ok = ISC_FALSE;
5786		log_ns_ttl(fctx, "DELEGATION");
5787		return (DNS_R_DELEGATION);
5788	}
5789
5790	/*
5791	 * Since we're not doing a referral, we don't want to cache any
5792	 * NS RRs we may have found.
5793	 */
5794	if (ns_name != NULL)
5795		ns_name->attributes &= ~DNS_NAMEATTR_CACHE;
5796
5797	if (negative_response && oqname == NULL)
5798		fctx->attributes |= FCTX_ATTR_WANTNCACHE;
5799
5800	return (ISC_R_SUCCESS);
5801}
5802
5803static isc_result_t
5804answer_response(fetchctx_t *fctx) {
5805	isc_result_t result;
5806	dns_message_t *message;
5807	dns_name_t *name, *qname, tname, *ns_name;
5808	dns_rdataset_t *rdataset, *ns_rdataset;
5809	isc_boolean_t done, external, chaining, aa, found, want_chaining;
5810	isc_boolean_t have_answer, found_cname, found_type, wanted_chaining;
5811	unsigned int aflag;
5812	dns_rdatatype_t type;
5813	dns_fixedname_t dname, fqname;
5814	dns_view_t *view;
5815
5816	FCTXTRACE("answer_response");
5817
5818	message = fctx->rmessage;
5819
5820	/*
5821	 * Examine the answer section, marking those rdatasets which are
5822	 * part of the answer and should be cached.
5823	 */
5824
5825	done = ISC_FALSE;
5826	found_cname = ISC_FALSE;
5827	found_type = ISC_FALSE;
5828	chaining = ISC_FALSE;
5829	have_answer = ISC_FALSE;
5830	want_chaining = ISC_FALSE;
5831	POST(want_chaining);
5832	if ((message->flags & DNS_MESSAGEFLAG_AA) != 0)
5833		aa = ISC_TRUE;
5834	else
5835		aa = ISC_FALSE;
5836	qname = &fctx->name;
5837	type = fctx->type;
5838	view = fctx->res->view;
5839	result = dns_message_firstname(message, DNS_SECTION_ANSWER);
5840	while (!done && result == ISC_R_SUCCESS) {
5841		name = NULL;
5842		dns_message_currentname(message, DNS_SECTION_ANSWER, &name);
5843		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
5844		if (dns_name_equal(name, qname)) {
5845			wanted_chaining = ISC_FALSE;
5846			for (rdataset = ISC_LIST_HEAD(name->list);
5847			     rdataset != NULL;
5848			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
5849				found = ISC_FALSE;
5850				want_chaining = ISC_FALSE;
5851				aflag = 0;
5852				if (rdataset->type == dns_rdatatype_nsec3) {
5853					/*
5854					 * NSEC3 records are not allowed to
5855					 * appear in the answer section.
5856					 */
5857					log_formerr(fctx, "NSEC3 in answer");
5858					return (DNS_R_FORMERR);
5859				}
5860
5861				/*
5862				 * Apply filters, if given, on answers to reject
5863				 * a malicious attempt of rebinding.
5864				 */
5865				if ((rdataset->type == dns_rdatatype_a ||
5866				     rdataset->type == dns_rdatatype_aaaa) &&
5867				    !is_answeraddress_allowed(view, name,
5868							      rdataset)) {
5869					return (DNS_R_SERVFAIL);
5870				}
5871
5872				if (rdataset->type == type && !found_cname) {
5873					/*
5874					 * We've found an ordinary answer.
5875					 */
5876					found = ISC_TRUE;
5877					found_type = ISC_TRUE;
5878					done = ISC_TRUE;
5879					aflag = DNS_RDATASETATTR_ANSWER;
5880				} else if (type == dns_rdatatype_any) {
5881					/*
5882					 * We've found an answer matching
5883					 * an ANY query.  There may be
5884					 * more.
5885					 */
5886					found = ISC_TRUE;
5887					aflag = DNS_RDATASETATTR_ANSWER;
5888				} else if (rdataset->type == dns_rdatatype_rrsig
5889					   && rdataset->covers == type
5890					   && !found_cname) {
5891					/*
5892					 * We've found a signature that
5893					 * covers the type we're looking for.
5894					 */
5895					found = ISC_TRUE;
5896					found_type = ISC_TRUE;
5897					aflag = DNS_RDATASETATTR_ANSWERSIG;
5898				} else if (rdataset->type ==
5899					   dns_rdatatype_cname
5900					   && !found_type) {
5901					/*
5902					 * We're looking for something else,
5903					 * but we found a CNAME.
5904					 *
5905					 * Getting a CNAME response for some
5906					 * query types is an error.
5907					 */
5908					if (type == dns_rdatatype_rrsig ||
5909					    type == dns_rdatatype_dnskey ||
5910					    type == dns_rdatatype_nsec ||
5911					    type == dns_rdatatype_nsec3) {
5912						char buf[DNS_RDATATYPE_FORMATSIZE];
5913						dns_rdatatype_format(fctx->type,
5914							      buf, sizeof(buf));
5915						log_formerr(fctx,
5916							    "CNAME response "
5917							    "for %s RR", buf);
5918						return (DNS_R_FORMERR);
5919					}
5920					found = ISC_TRUE;
5921					found_cname = ISC_TRUE;
5922					want_chaining = ISC_TRUE;
5923					aflag = DNS_RDATASETATTR_ANSWER;
5924					result = cname_target(rdataset,
5925							      &tname);
5926					if (result != ISC_R_SUCCESS)
5927						return (result);
5928					/* Apply filters on the target name. */
5929					if (!is_answertarget_allowed(view,
5930							name,
5931							rdataset->type,
5932							&tname,
5933							&fctx->domain)) {
5934						return (DNS_R_SERVFAIL);
5935					}
5936				} else if (rdataset->type == dns_rdatatype_rrsig
5937					   && rdataset->covers ==
5938					   dns_rdatatype_cname
5939					   && !found_type) {
5940					/*
5941					 * We're looking for something else,
5942					 * but we found a SIG CNAME.
5943					 */
5944					found = ISC_TRUE;
5945					found_cname = ISC_TRUE;
5946					aflag = DNS_RDATASETATTR_ANSWERSIG;
5947				}
5948
5949				if (found) {
5950					/*
5951					 * We've found an answer to our
5952					 * question.
5953					 */
5954					name->attributes |=
5955						DNS_NAMEATTR_CACHE;
5956					rdataset->attributes |=
5957						DNS_RDATASETATTR_CACHE;
5958					rdataset->trust = dns_trust_answer;
5959					if (!chaining) {
5960						/*
5961						 * This data is "the" answer
5962						 * to our question only if
5963						 * we're not chaining (i.e.
5964						 * if we haven't followed
5965						 * a CNAME or DNAME).
5966						 */
5967						INSIST(!external);
5968						if (aflag ==
5969						    DNS_RDATASETATTR_ANSWER)
5970							have_answer = ISC_TRUE;
5971						name->attributes |=
5972							DNS_NAMEATTR_ANSWER;
5973						rdataset->attributes |= aflag;
5974						if (aa)
5975							rdataset->trust =
5976							  dns_trust_authanswer;
5977					} else if (external) {
5978						/*
5979						 * This data is outside of
5980						 * our query domain, and
5981						 * may not be cached.
5982						 */
5983						rdataset->attributes |=
5984						    DNS_RDATASETATTR_EXTERNAL;
5985					}
5986
5987					/*
5988					 * Mark any additional data related
5989					 * to this rdataset.
5990					 */
5991					(void)dns_rdataset_additionaldata(
5992							rdataset,
5993							check_related,
5994							fctx);
5995
5996					/*
5997					 * CNAME chaining.
5998					 */
5999					if (want_chaining) {
6000						wanted_chaining = ISC_TRUE;
6001						name->attributes |=
6002							DNS_NAMEATTR_CHAINING;
6003						rdataset->attributes |=
6004						    DNS_RDATASETATTR_CHAINING;
6005						qname = &tname;
6006					}
6007				}
6008				/*
6009				 * We could add an "else" clause here and
6010				 * log that we're ignoring this rdataset.
6011				 */
6012			}
6013			/*
6014			 * If wanted_chaining is true, we've done
6015			 * some chaining as the result of processing
6016			 * this node, and thus we need to set
6017			 * chaining to true.
6018			 *
6019			 * We don't set chaining inside of the
6020			 * rdataset loop because doing that would
6021			 * cause us to ignore the signatures of
6022			 * CNAMEs.
6023			 */
6024			if (wanted_chaining)
6025				chaining = ISC_TRUE;
6026		} else {
6027			/*
6028			 * Look for a DNAME (or its SIG).  Anything else is
6029			 * ignored.
6030			 */
6031			wanted_chaining = ISC_FALSE;
6032			for (rdataset = ISC_LIST_HEAD(name->list);
6033			     rdataset != NULL;
6034			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6035				isc_boolean_t found_dname = ISC_FALSE;
6036				dns_name_t *dname_name;
6037
6038				found = ISC_FALSE;
6039				aflag = 0;
6040				if (rdataset->type == dns_rdatatype_dname) {
6041					/*
6042					 * We're looking for something else,
6043					 * but we found a DNAME.
6044					 *
6045					 * If we're not chaining, then the
6046					 * DNAME should not be external.
6047					 */
6048					if (!chaining && external) {
6049						log_formerr(fctx,
6050							    "external DNAME");
6051						return (DNS_R_FORMERR);
6052					}
6053					found = ISC_TRUE;
6054					want_chaining = ISC_TRUE;
6055					POST(want_chaining);
6056					aflag = DNS_RDATASETATTR_ANSWER;
6057					result = dname_target(fctx, rdataset,
6058							      qname, name,
6059							      &dname);
6060					if (result == ISC_R_NOSPACE) {
6061						/*
6062						 * We can't construct the
6063						 * DNAME target.  Do not
6064						 * try to continue.
6065						 */
6066						want_chaining = ISC_FALSE;
6067						POST(want_chaining);
6068					} else if (result != ISC_R_SUCCESS)
6069						return (result);
6070					else
6071						found_dname = ISC_TRUE;
6072
6073					dname_name = dns_fixedname_name(&dname);
6074					if (!is_answertarget_allowed(view,
6075							qname,
6076							rdataset->type,
6077							dname_name,
6078							&fctx->domain)) {
6079						return (DNS_R_SERVFAIL);
6080					}
6081				} else if (rdataset->type == dns_rdatatype_rrsig
6082					   && rdataset->covers ==
6083					   dns_rdatatype_dname) {
6084					/*
6085					 * We've found a signature that
6086					 * covers the DNAME.
6087					 */
6088					found = ISC_TRUE;
6089					aflag = DNS_RDATASETATTR_ANSWERSIG;
6090				}
6091
6092				if (found) {
6093					/*
6094					 * We've found an answer to our
6095					 * question.
6096					 */
6097					name->attributes |=
6098						DNS_NAMEATTR_CACHE;
6099					rdataset->attributes |=
6100						DNS_RDATASETATTR_CACHE;
6101					rdataset->trust = dns_trust_answer;
6102					if (!chaining) {
6103						/*
6104						 * This data is "the" answer
6105						 * to our question only if
6106						 * we're not chaining.
6107						 */
6108						INSIST(!external);
6109						if (aflag ==
6110						    DNS_RDATASETATTR_ANSWER)
6111							have_answer = ISC_TRUE;
6112						name->attributes |=
6113							DNS_NAMEATTR_ANSWER;
6114						rdataset->attributes |= aflag;
6115						if (aa)
6116							rdataset->trust =
6117							  dns_trust_authanswer;
6118					} else if (external) {
6119						rdataset->attributes |=
6120						    DNS_RDATASETATTR_EXTERNAL;
6121					}
6122
6123					/*
6124					 * DNAME chaining.
6125					 */
6126					if (found_dname) {
6127						/*
6128						 * Copy the dname into the
6129						 * qname fixed name.
6130						 *
6131						 * Although we check for
6132						 * failure of the copy
6133						 * operation, in practice it
6134						 * should never fail since
6135						 * we already know that the
6136						 * result fits in a fixedname.
6137						 */
6138						dns_fixedname_init(&fqname);
6139						result = dns_name_copy(
6140						  dns_fixedname_name(&dname),
6141						  dns_fixedname_name(&fqname),
6142						  NULL);
6143						if (result != ISC_R_SUCCESS)
6144							return (result);
6145						wanted_chaining = ISC_TRUE;
6146						name->attributes |=
6147							DNS_NAMEATTR_CHAINING;
6148						rdataset->attributes |=
6149						    DNS_RDATASETATTR_CHAINING;
6150						qname = dns_fixedname_name(
6151								   &fqname);
6152					}
6153				}
6154			}
6155			if (wanted_chaining)
6156				chaining = ISC_TRUE;
6157		}
6158		result = dns_message_nextname(message, DNS_SECTION_ANSWER);
6159	}
6160	if (result == ISC_R_NOMORE)
6161		result = ISC_R_SUCCESS;
6162	if (result != ISC_R_SUCCESS)
6163		return (result);
6164
6165	/*
6166	 * We should have found an answer.
6167	 */
6168	if (!have_answer) {
6169		log_formerr(fctx, "reply has no answer");
6170		return (DNS_R_FORMERR);
6171	}
6172
6173	/*
6174	 * This response is now potentially cacheable.
6175	 */
6176	fctx->attributes |= FCTX_ATTR_WANTCACHE;
6177
6178	/*
6179	 * Did chaining end before we got the final answer?
6180	 */
6181	if (chaining) {
6182		/*
6183		 * Yes.  This may be a negative reply, so hand off
6184		 * authority section processing to the noanswer code.
6185		 * If it isn't a noanswer response, no harm will be
6186		 * done.
6187		 */
6188		return (noanswer_response(fctx, qname, 0));
6189	}
6190
6191	/*
6192	 * We didn't end with an incomplete chain, so the rcode should be
6193	 * "no error".
6194	 */
6195	if (message->rcode != dns_rcode_noerror) {
6196		log_formerr(fctx, "CNAME/DNAME chain complete, but RCODE "
6197				  "indicates error");
6198		return (DNS_R_FORMERR);
6199	}
6200
6201	/*
6202	 * Examine the authority section (if there is one).
6203	 *
6204	 * We expect there to be only one owner name for all the rdatasets
6205	 * in this section, and we expect that it is not external.
6206	 */
6207	done = ISC_FALSE;
6208	ns_name = NULL;
6209	ns_rdataset = NULL;
6210	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6211	while (!done && result == ISC_R_SUCCESS) {
6212		name = NULL;
6213		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
6214		external = ISC_TF(!dns_name_issubdomain(name, &fctx->domain));
6215		if (!external) {
6216			/*
6217			 * We expect to find NS or SIG NS rdatasets, and
6218			 * nothing else.
6219			 */
6220			for (rdataset = ISC_LIST_HEAD(name->list);
6221			     rdataset != NULL;
6222			     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6223				if (rdataset->type == dns_rdatatype_ns ||
6224				    (rdataset->type == dns_rdatatype_rrsig &&
6225				     rdataset->covers == dns_rdatatype_ns)) {
6226					name->attributes |=
6227						DNS_NAMEATTR_CACHE;
6228					rdataset->attributes |=
6229						DNS_RDATASETATTR_CACHE;
6230					if (aa && !chaining)
6231						rdataset->trust =
6232						    dns_trust_authauthority;
6233					else
6234						rdataset->trust =
6235						    dns_trust_additional;
6236
6237					if (rdataset->type == dns_rdatatype_ns) {
6238						ns_name = name;
6239						ns_rdataset = rdataset;
6240					}
6241					/*
6242					 * Mark any additional data related
6243					 * to this rdataset.
6244					 */
6245					(void)dns_rdataset_additionaldata(
6246							rdataset,
6247							check_related,
6248							fctx);
6249					done = ISC_TRUE;
6250				}
6251			}
6252		}
6253		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
6254	}
6255	if (result == ISC_R_NOMORE)
6256		result = ISC_R_SUCCESS;
6257
6258	log_ns_ttl(fctx, "answer_response");
6259
6260	if (ns_rdataset != NULL && dns_name_equal(&fctx->domain, ns_name) &&
6261	    !dns_name_equal(ns_name, dns_rootname))
6262		trim_ns_ttl(fctx, ns_name, ns_rdataset);
6263
6264	return (result);
6265}
6266
6267static isc_boolean_t
6268fctx_decreference(fetchctx_t *fctx) {
6269	isc_boolean_t bucket_empty = ISC_FALSE;
6270
6271	INSIST(fctx->references > 0);
6272	fctx->references--;
6273	if (fctx->references == 0) {
6274		/*
6275		 * No one cares about the result of this fetch anymore.
6276		 */
6277		if (fctx->pending == 0 && fctx->nqueries == 0 &&
6278		    ISC_LIST_EMPTY(fctx->validators) && SHUTTINGDOWN(fctx)) {
6279			/*
6280			 * This fctx is already shutdown; we were just
6281			 * waiting for the last reference to go away.
6282			 */
6283			bucket_empty = fctx_unlink(fctx);
6284			fctx_destroy(fctx);
6285		} else {
6286			/*
6287			 * Initiate shutdown.
6288			 */
6289			fctx_shutdown(fctx);
6290		}
6291	}
6292	return (bucket_empty);
6293}
6294
6295static void
6296resume_dslookup(isc_task_t *task, isc_event_t *event) {
6297	dns_fetchevent_t *fevent;
6298	dns_resolver_t *res;
6299	fetchctx_t *fctx;
6300	isc_result_t result;
6301	isc_boolean_t bucket_empty;
6302	isc_boolean_t locked = ISC_FALSE;
6303	unsigned int bucketnum;
6304	dns_rdataset_t nameservers;
6305	dns_fixedname_t fixed;
6306	dns_name_t *domain;
6307
6308	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
6309	fevent = (dns_fetchevent_t *)event;
6310	fctx = event->ev_arg;
6311	REQUIRE(VALID_FCTX(fctx));
6312	res = fctx->res;
6313
6314	UNUSED(task);
6315	FCTXTRACE("resume_dslookup");
6316
6317	if (fevent->node != NULL)
6318		dns_db_detachnode(fevent->db, &fevent->node);
6319	if (fevent->db != NULL)
6320		dns_db_detach(&fevent->db);
6321
6322	dns_rdataset_init(&nameservers);
6323
6324	bucketnum = fctx->bucketnum;
6325	if (fevent->result == ISC_R_CANCELED) {
6326		dns_resolver_destroyfetch(&fctx->nsfetch);
6327		fctx_done(fctx, ISC_R_CANCELED, __LINE__);
6328	} else if (fevent->result == ISC_R_SUCCESS) {
6329
6330		FCTXTRACE("resuming DS lookup");
6331
6332		dns_resolver_destroyfetch(&fctx->nsfetch);
6333		if (dns_rdataset_isassociated(&fctx->nameservers))
6334			dns_rdataset_disassociate(&fctx->nameservers);
6335		dns_rdataset_clone(fevent->rdataset, &fctx->nameservers);
6336		fctx->ns_ttl = fctx->nameservers.ttl;
6337		fctx->ns_ttl_ok = ISC_TRUE;
6338		log_ns_ttl(fctx, "resume_dslookup");
6339		dns_name_free(&fctx->domain, fctx->mctx);
6340		dns_name_init(&fctx->domain, NULL);
6341		result = dns_name_dup(&fctx->nsname, fctx->mctx, &fctx->domain);
6342		if (result != ISC_R_SUCCESS) {
6343			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
6344			goto cleanup;
6345		}
6346		/*
6347		 * Try again.
6348		 */
6349		fctx_try(fctx, ISC_TRUE, ISC_FALSE);
6350	} else {
6351		unsigned int n;
6352		dns_rdataset_t *nsrdataset = NULL;
6353
6354		/*
6355		 * Retrieve state from fctx->nsfetch before we destroy it.
6356		 */
6357		dns_fixedname_init(&fixed);
6358		domain = dns_fixedname_name(&fixed);
6359		dns_name_copy(&fctx->nsfetch->private->domain, domain, NULL);
6360		if (dns_name_equal(&fctx->nsname, domain)) {
6361			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
6362			dns_resolver_destroyfetch(&fctx->nsfetch);
6363			goto cleanup;
6364		}
6365		if (dns_rdataset_isassociated(
6366		    &fctx->nsfetch->private->nameservers)) {
6367			dns_rdataset_clone(
6368			    &fctx->nsfetch->private->nameservers,
6369			    &nameservers);
6370			nsrdataset = &nameservers;
6371		} else
6372			domain = NULL;
6373		dns_resolver_destroyfetch(&fctx->nsfetch);
6374		n = dns_name_countlabels(&fctx->nsname);
6375		dns_name_getlabelsequence(&fctx->nsname, 1, n - 1,
6376					  &fctx->nsname);
6377
6378		if (dns_rdataset_isassociated(fevent->rdataset))
6379			dns_rdataset_disassociate(fevent->rdataset);
6380		FCTXTRACE("continuing to look for parent's NS records");
6381		result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
6382						  dns_rdatatype_ns, domain,
6383						  nsrdataset, NULL, 0, task,
6384						  resume_dslookup, fctx,
6385						  &fctx->nsrrset, NULL,
6386						  &fctx->nsfetch);
6387		if (result != ISC_R_SUCCESS)
6388			fctx_done(fctx, result, __LINE__);
6389		else {
6390			LOCK(&res->buckets[bucketnum].lock);
6391			locked = ISC_TRUE;
6392			fctx->references++;
6393		}
6394	}
6395
6396 cleanup:
6397	if (dns_rdataset_isassociated(&nameservers))
6398		dns_rdataset_disassociate(&nameservers);
6399	if (dns_rdataset_isassociated(fevent->rdataset))
6400		dns_rdataset_disassociate(fevent->rdataset);
6401	INSIST(fevent->sigrdataset == NULL);
6402	isc_event_free(&event);
6403	if (!locked)
6404		LOCK(&res->buckets[bucketnum].lock);
6405	bucket_empty = fctx_decreference(fctx);
6406	UNLOCK(&res->buckets[bucketnum].lock);
6407	if (bucket_empty)
6408		empty_bucket(res);
6409}
6410
6411static inline void
6412checknamessection(dns_message_t *message, dns_section_t section) {
6413	isc_result_t result;
6414	dns_name_t *name;
6415	dns_rdata_t rdata = DNS_RDATA_INIT;
6416	dns_rdataset_t *rdataset;
6417
6418	for (result = dns_message_firstname(message, section);
6419	     result == ISC_R_SUCCESS;
6420	     result = dns_message_nextname(message, section))
6421	{
6422		name = NULL;
6423		dns_message_currentname(message, section, &name);
6424		for (rdataset = ISC_LIST_HEAD(name->list);
6425		     rdataset != NULL;
6426		     rdataset = ISC_LIST_NEXT(rdataset, link)) {
6427			for (result = dns_rdataset_first(rdataset);
6428			     result == ISC_R_SUCCESS;
6429			     result = dns_rdataset_next(rdataset)) {
6430				dns_rdataset_current(rdataset, &rdata);
6431				if (!dns_rdata_checkowner(name, rdata.rdclass,
6432							  rdata.type,
6433							  ISC_FALSE) ||
6434				    !dns_rdata_checknames(&rdata, name, NULL))
6435				{
6436					rdataset->attributes |=
6437						DNS_RDATASETATTR_CHECKNAMES;
6438				}
6439				dns_rdata_reset(&rdata);
6440			}
6441		}
6442	}
6443}
6444
6445static void
6446checknames(dns_message_t *message) {
6447
6448	checknamessection(message, DNS_SECTION_ANSWER);
6449	checknamessection(message, DNS_SECTION_AUTHORITY);
6450	checknamessection(message, DNS_SECTION_ADDITIONAL);
6451}
6452
6453/*
6454 * Log server NSID at log level 'level'
6455 */
6456static isc_result_t
6457log_nsid(dns_rdataset_t *opt, resquery_t *query, int level, isc_mem_t *mctx)
6458{
6459	static const char hex[17] = "0123456789abcdef";
6460	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
6461	isc_uint16_t optcode, nsid_len, buflen, i;
6462	isc_result_t result;
6463	isc_buffer_t nsidbuf;
6464	dns_rdata_t rdata;
6465	unsigned char *p, *buf, *nsid;
6466
6467	/* Extract rdata from OPT rdataset */
6468	result = dns_rdataset_first(opt);
6469	if (result != ISC_R_SUCCESS)
6470		return (ISC_R_FAILURE);
6471
6472	dns_rdata_init(&rdata);
6473	dns_rdataset_current(opt, &rdata);
6474	if (rdata.length < 4)
6475		return (ISC_R_FAILURE);
6476
6477	/* Check for NSID */
6478	isc_buffer_init(&nsidbuf, rdata.data, rdata.length);
6479	isc_buffer_add(&nsidbuf, rdata.length);
6480	optcode = isc_buffer_getuint16(&nsidbuf);
6481	nsid_len = isc_buffer_getuint16(&nsidbuf);
6482	if (optcode != DNS_OPT_NSID || nsid_len == 0)
6483		return (ISC_R_FAILURE);
6484
6485	/* Allocate buffer for storing hex version of the NSID */
6486	buflen = nsid_len * 2 + 1;
6487	buf = isc_mem_get(mctx, buflen);
6488	if (buf == NULL)
6489		return (ISC_R_NOSPACE);
6490
6491	/* Convert to hex */
6492	p = buf;
6493	nsid = rdata.data + 4;
6494	for (i = 0; i < nsid_len; i++) {
6495		*p++ = hex[(nsid[0] >> 4) & 0xf];
6496		*p++ = hex[nsid[0] & 0xf];
6497		nsid++;
6498	}
6499	*p = '\0';
6500
6501	isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
6502			    sizeof(addrbuf));
6503	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6504		      DNS_LOGMODULE_RESOLVER, level,
6505		      "received NSID '%s' from %s", buf, addrbuf);
6506
6507	/* Clean up */
6508	isc_mem_put(mctx, buf, buflen);
6509	return (ISC_R_SUCCESS);
6510}
6511
6512static void
6513log_packet(dns_message_t *message, int level, isc_mem_t *mctx) {
6514	isc_buffer_t buffer;
6515	char *buf = NULL;
6516	int len = 1024;
6517	isc_result_t result;
6518
6519	if (! isc_log_wouldlog(dns_lctx, level))
6520		return;
6521
6522	/*
6523	 * Note that these are multiline debug messages.  We want a newline
6524	 * to appear in the log after each message.
6525	 */
6526
6527	do {
6528		buf = isc_mem_get(mctx, len);
6529		if (buf == NULL)
6530			break;
6531		isc_buffer_init(&buffer, buf, len);
6532		result = dns_message_totext(message, &dns_master_style_debug,
6533					    0, &buffer);
6534		if (result == ISC_R_NOSPACE) {
6535			isc_mem_put(mctx, buf, len);
6536			len += 1024;
6537		} else if (result == ISC_R_SUCCESS)
6538			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6539				      DNS_LOGMODULE_RESOLVER, level,
6540				      "received packet:\n%.*s",
6541				      (int)isc_buffer_usedlength(&buffer),
6542				      buf);
6543	} while (result == ISC_R_NOSPACE);
6544
6545	if (buf != NULL)
6546		isc_mem_put(mctx, buf, len);
6547}
6548
6549static isc_boolean_t
6550iscname(fetchctx_t *fctx) {
6551	isc_result_t result;
6552
6553	result = dns_message_findname(fctx->rmessage, DNS_SECTION_ANSWER,
6554				      &fctx->name, dns_rdatatype_cname, 0,
6555				      NULL, NULL);
6556	return (result == ISC_R_SUCCESS ? ISC_TRUE : ISC_FALSE);
6557}
6558
6559static isc_boolean_t
6560betterreferral(fetchctx_t *fctx) {
6561	isc_result_t result;
6562	dns_name_t *name;
6563	dns_rdataset_t *rdataset;
6564	dns_message_t *message = fctx->rmessage;
6565
6566	for (result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6567	     result == ISC_R_SUCCESS;
6568	     result = dns_message_nextname(message, DNS_SECTION_AUTHORITY)) {
6569		name = NULL;
6570		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
6571		if (!isstrictsubdomain(name, &fctx->domain))
6572			continue;
6573		for (rdataset = ISC_LIST_HEAD(name->list);
6574		     rdataset != NULL;
6575		     rdataset = ISC_LIST_NEXT(rdataset, link))
6576			if (rdataset->type == dns_rdatatype_ns)
6577				return (ISC_TRUE);
6578	}
6579	return (ISC_FALSE);
6580}
6581
6582static void
6583resquery_response(isc_task_t *task, isc_event_t *event) {
6584	isc_result_t result = ISC_R_SUCCESS;
6585	resquery_t *query = event->ev_arg;
6586	dns_dispatchevent_t *devent = (dns_dispatchevent_t *)event;
6587	isc_boolean_t keep_trying, get_nameservers, resend;
6588	isc_boolean_t truncated;
6589	dns_message_t *message;
6590	dns_rdataset_t *opt;
6591	fetchctx_t *fctx;
6592	dns_name_t *fname;
6593	dns_fixedname_t foundname;
6594	isc_stdtime_t now;
6595	isc_time_t tnow, *finish;
6596	dns_adbaddrinfo_t *addrinfo;
6597	unsigned int options;
6598	unsigned int findoptions;
6599	isc_result_t broken_server;
6600	badnstype_t broken_type = badns_response;
6601	isc_boolean_t no_response;
6602
6603	REQUIRE(VALID_QUERY(query));
6604	fctx = query->fctx;
6605	options = query->options;
6606	REQUIRE(VALID_FCTX(fctx));
6607	REQUIRE(event->ev_type == DNS_EVENT_DISPATCH);
6608
6609	QTRACE("response");
6610
6611	if (isc_sockaddr_pf(&query->addrinfo->sockaddr) == PF_INET)
6612		inc_stats(fctx->res, dns_resstatscounter_responsev4);
6613	else
6614		inc_stats(fctx->res, dns_resstatscounter_responsev6);
6615
6616	(void)isc_timer_touch(fctx->timer);
6617
6618	keep_trying = ISC_FALSE;
6619	broken_server = ISC_R_SUCCESS;
6620	get_nameservers = ISC_FALSE;
6621	resend = ISC_FALSE;
6622	truncated = ISC_FALSE;
6623	finish = NULL;
6624	no_response = ISC_FALSE;
6625
6626	if (fctx->res->exiting) {
6627		result = ISC_R_SHUTTINGDOWN;
6628		goto done;
6629	}
6630
6631	fctx->timeouts = 0;
6632	fctx->timeout = ISC_FALSE;
6633	fctx->addrinfo = query->addrinfo;
6634
6635	/*
6636	 * XXXRTH  We should really get the current time just once.  We
6637	 *		need a routine to convert from an isc_time_t to an
6638	 *		isc_stdtime_t.
6639	 */
6640	TIME_NOW(&tnow);
6641	finish = &tnow;
6642	isc_stdtime_get(&now);
6643
6644	/*
6645	 * Did the dispatcher have a problem?
6646	 */
6647	if (devent->result != ISC_R_SUCCESS) {
6648		if (devent->result == ISC_R_EOF &&
6649		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
6650			/*
6651			 * The problem might be that they
6652			 * don't understand EDNS0.  Turn it
6653			 * off and try again.
6654			 */
6655			options |= DNS_FETCHOPT_NOEDNS0;
6656			resend = ISC_TRUE;
6657			/*
6658			 * Remember that they don't like EDNS0.
6659			 */
6660			dns_adb_changeflags(fctx->adb,
6661					    query->addrinfo,
6662					    DNS_FETCHOPT_NOEDNS0,
6663					    DNS_FETCHOPT_NOEDNS0);
6664		} else {
6665			/*
6666			 * There's no hope for this query.
6667			 */
6668			keep_trying = ISC_TRUE;
6669
6670			/*
6671			 * If this is a network error on an exclusive query
6672			 * socket, mark the server as bad so that we won't try
6673			 * it for this fetch again.  Also adjust finish and
6674			 * no_response so that we penalize this address in SRTT
6675			 * adjustment later.
6676			 */
6677			if (query->exclusivesocket &&
6678			    (devent->result == ISC_R_HOSTUNREACH ||
6679			     devent->result == ISC_R_NETUNREACH ||
6680			     devent->result == ISC_R_CONNREFUSED ||
6681			     devent->result == ISC_R_CANCELED)) {
6682				    broken_server = devent->result;
6683				    broken_type = badns_unreachable;
6684				    finish = NULL;
6685				    no_response = ISC_TRUE;
6686			}
6687		}
6688		goto done;
6689	}
6690
6691	message = fctx->rmessage;
6692
6693	if (query->tsig != NULL) {
6694		result = dns_message_setquerytsig(message, query->tsig);
6695		if (result != ISC_R_SUCCESS)
6696			goto done;
6697	}
6698
6699	if (query->tsigkey) {
6700		result = dns_message_settsigkey(message, query->tsigkey);
6701		if (result != ISC_R_SUCCESS)
6702			goto done;
6703	}
6704
6705	result = dns_message_parse(message, &devent->buffer, 0);
6706	if (result != ISC_R_SUCCESS) {
6707		switch (result) {
6708		case ISC_R_UNEXPECTEDEND:
6709			if (!message->question_ok ||
6710			    (message->flags & DNS_MESSAGEFLAG_TC) == 0 ||
6711			    (options & DNS_FETCHOPT_TCP) != 0) {
6712				/*
6713				 * Either the message ended prematurely,
6714				 * and/or wasn't marked as being truncated,
6715				 * and/or this is a response to a query we
6716				 * sent over TCP.  In all of these cases,
6717				 * something is wrong with the remote
6718				 * server and we don't want to retry using
6719				 * TCP.
6720				 */
6721				if ((query->options & DNS_FETCHOPT_NOEDNS0)
6722				    == 0) {
6723					/*
6724					 * The problem might be that they
6725					 * don't understand EDNS0.  Turn it
6726					 * off and try again.
6727					 */
6728					options |= DNS_FETCHOPT_NOEDNS0;
6729					resend = ISC_TRUE;
6730					/*
6731					 * Remember that they don't like EDNS0.
6732					 */
6733					dns_adb_changeflags(
6734							fctx->adb,
6735							query->addrinfo,
6736							DNS_FETCHOPT_NOEDNS0,
6737							DNS_FETCHOPT_NOEDNS0);
6738					inc_stats(fctx->res,
6739						 dns_resstatscounter_edns0fail);
6740				} else {
6741					broken_server = result;
6742					keep_trying = ISC_TRUE;
6743				}
6744				goto done;
6745			}
6746			/*
6747			 * We defer retrying via TCP for a bit so we can
6748			 * check out this message further.
6749			 */
6750			truncated = ISC_TRUE;
6751			break;
6752		case DNS_R_FORMERR:
6753			if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
6754				/*
6755				 * The problem might be that they
6756				 * don't understand EDNS0.  Turn it
6757				 * off and try again.
6758				 */
6759				options |= DNS_FETCHOPT_NOEDNS0;
6760				resend = ISC_TRUE;
6761				/*
6762				 * Remember that they don't like EDNS0.
6763				 */
6764				dns_adb_changeflags(fctx->adb,
6765						    query->addrinfo,
6766						    DNS_FETCHOPT_NOEDNS0,
6767						    DNS_FETCHOPT_NOEDNS0);
6768				inc_stats(fctx->res,
6769						 dns_resstatscounter_edns0fail);
6770			} else {
6771				broken_server = DNS_R_UNEXPECTEDRCODE;
6772				keep_trying = ISC_TRUE;
6773			}
6774			goto done;
6775		default:
6776			/*
6777			 * Something bad has happened.
6778			 */
6779			goto done;
6780		}
6781	}
6782
6783
6784	/*
6785	 * Log the incoming packet.
6786	 */
6787	log_packet(message, ISC_LOG_DEBUG(10), fctx->res->mctx);
6788
6789	/*
6790	 * Did we request NSID?  If so, and if the response contains
6791	 * NSID data, log it at INFO level.
6792	 */
6793	opt = dns_message_getopt(message);
6794	if (opt != NULL && (query->options & DNS_FETCHOPT_WANTNSID) != 0)
6795		log_nsid(opt, query, ISC_LOG_INFO, fctx->res->mctx);
6796
6797	/*
6798	 * If the message is signed, check the signature.  If not, this
6799	 * returns success anyway.
6800	 */
6801	result = dns_message_checksig(message, fctx->res->view);
6802	if (result != ISC_R_SUCCESS)
6803		goto done;
6804
6805	/*
6806	 * The dispatcher should ensure we only get responses with QR set.
6807	 */
6808	INSIST((message->flags & DNS_MESSAGEFLAG_QR) != 0);
6809	/*
6810	 * INSIST() that the message comes from the place we sent it to,
6811	 * since the dispatch code should ensure this.
6812	 *
6813	 * INSIST() that the message id is correct (this should also be
6814	 * ensured by the dispatch code).
6815	 */
6816
6817	/*
6818	 * We have an affirmative response to the query and we have
6819	 * previously got a response from this server which indicated
6820	 * EDNS may not be supported so we can now cache the lack of
6821	 * EDNS support.
6822	 */
6823	if (opt == NULL &&
6824	    (message->rcode == dns_rcode_noerror ||
6825	     message->rcode == dns_rcode_nxdomain ||
6826	     message->rcode == dns_rcode_refused ||
6827	     message->rcode == dns_rcode_yxdomain) &&
6828	     bad_edns(fctx, &query->addrinfo->sockaddr)) {
6829		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
6830		isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
6831				    sizeof(addrbuf));
6832		dns_adb_changeflags(fctx->adb, query->addrinfo,
6833				    DNS_FETCHOPT_NOEDNS0,
6834				    DNS_FETCHOPT_NOEDNS0);
6835	}
6836
6837	/*
6838	 * Deal with truncated responses by retrying using TCP.
6839	 */
6840	if ((message->flags & DNS_MESSAGEFLAG_TC) != 0)
6841		truncated = ISC_TRUE;
6842
6843	if (truncated) {
6844		inc_stats(fctx->res, dns_resstatscounter_truncated);
6845		if ((options & DNS_FETCHOPT_TCP) != 0) {
6846			broken_server = DNS_R_TRUNCATEDTCP;
6847			keep_trying = ISC_TRUE;
6848		} else {
6849			options |= DNS_FETCHOPT_TCP;
6850			resend = ISC_TRUE;
6851		}
6852		goto done;
6853	}
6854
6855	/*
6856	 * Is it a query response?
6857	 */
6858	if (message->opcode != dns_opcode_query) {
6859		/* XXXRTH Log */
6860		broken_server = DNS_R_UNEXPECTEDOPCODE;
6861		keep_trying = ISC_TRUE;
6862		goto done;
6863	}
6864
6865	/*
6866	 * Update statistics about erroneous responses.
6867	 */
6868	if (message->rcode != dns_rcode_noerror) {
6869		switch (message->rcode) {
6870		case dns_rcode_nxdomain:
6871			inc_stats(fctx->res, dns_resstatscounter_nxdomain);
6872			break;
6873		case dns_rcode_servfail:
6874			inc_stats(fctx->res, dns_resstatscounter_servfail);
6875			break;
6876		case dns_rcode_formerr:
6877			inc_stats(fctx->res, dns_resstatscounter_formerr);
6878			break;
6879		default:
6880			inc_stats(fctx->res, dns_resstatscounter_othererror);
6881			break;
6882		}
6883	}
6884
6885	/*
6886	 * Is the remote server broken, or does it dislike us?
6887	 */
6888	if (message->rcode != dns_rcode_noerror &&
6889	    message->rcode != dns_rcode_nxdomain) {
6890		if (((message->rcode == dns_rcode_formerr ||
6891		      message->rcode == dns_rcode_notimp) ||
6892		     (message->rcode == dns_rcode_servfail &&
6893		      dns_message_getopt(message) == NULL)) &&
6894		    (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
6895			/*
6896			 * It's very likely they don't like EDNS0.
6897			 * If the response code is SERVFAIL, also check if the
6898			 * response contains an OPT RR and don't cache the
6899			 * failure since it can be returned for various other
6900			 * reasons.
6901			 *
6902			 * XXXRTH  We should check if the question
6903			 *		we're asking requires EDNS0, and
6904			 *		if so, we should bail out.
6905			 */
6906			options |= DNS_FETCHOPT_NOEDNS0;
6907			resend = ISC_TRUE;
6908			/*
6909			 * Remember that they may not like EDNS0.
6910			 */
6911			add_bad_edns(fctx, &query->addrinfo->sockaddr);
6912			inc_stats(fctx->res, dns_resstatscounter_edns0fail);
6913		} else if (message->rcode == dns_rcode_formerr) {
6914			if (ISFORWARDER(query->addrinfo)) {
6915				/*
6916				 * This forwarder doesn't understand us,
6917				 * but other forwarders might.  Keep trying.
6918				 */
6919				broken_server = DNS_R_REMOTEFORMERR;
6920				keep_trying = ISC_TRUE;
6921			} else {
6922				/*
6923				 * The server doesn't understand us.  Since
6924				 * all servers for a zone need similar
6925				 * capabilities, we assume that we will get
6926				 * FORMERR from all servers, and thus we
6927				 * cannot make any more progress with this
6928				 * fetch.
6929				 */
6930				log_formerr(fctx, "server sent FORMERR");
6931				result = DNS_R_FORMERR;
6932			}
6933		} else if (message->rcode == dns_rcode_yxdomain) {
6934			/*
6935			 * DNAME mapping failed because the new name
6936			 * was too long.  There's no chance of success
6937			 * for this fetch.
6938			 */
6939			result = DNS_R_YXDOMAIN;
6940		} else if (message->rcode == dns_rcode_badvers) {
6941			unsigned int flags, mask;
6942			unsigned int version;
6943
6944			resend = ISC_TRUE;
6945			INSIST(opt != NULL);
6946			version = (opt->ttl >> 16) & 0xff;
6947			flags = (version << DNS_FETCHOPT_EDNSVERSIONSHIFT) |
6948				DNS_FETCHOPT_EDNSVERSIONSET;
6949			mask = DNS_FETCHOPT_EDNSVERSIONMASK |
6950			       DNS_FETCHOPT_EDNSVERSIONSET;
6951			switch (version) {
6952			case 0:
6953				dns_adb_changeflags(fctx->adb, query->addrinfo,
6954						    flags, mask);
6955				break;
6956			default:
6957				broken_server = DNS_R_BADVERS;
6958				keep_trying = ISC_TRUE;
6959				break;
6960			}
6961		} else {
6962			/*
6963			 * XXXRTH log.
6964			 */
6965			broken_server = DNS_R_UNEXPECTEDRCODE;
6966			INSIST(broken_server != ISC_R_SUCCESS);
6967			keep_trying = ISC_TRUE;
6968		}
6969		goto done;
6970	}
6971
6972	/*
6973	 * Is the question the same as the one we asked?
6974	 */
6975	result = same_question(fctx);
6976	if (result != ISC_R_SUCCESS) {
6977		/* XXXRTH Log */
6978		if (result == DNS_R_FORMERR)
6979			keep_trying = ISC_TRUE;
6980		goto done;
6981	}
6982
6983	/*
6984	 * Is the server lame?
6985	 */
6986	if (fctx->res->lame_ttl != 0 && !ISFORWARDER(query->addrinfo) &&
6987	    is_lame(fctx)) {
6988		inc_stats(fctx->res, dns_resstatscounter_lame);
6989		log_lame(fctx, query->addrinfo);
6990		result = dns_adb_marklame(fctx->adb, query->addrinfo,
6991					  &fctx->name, fctx->type,
6992					  now + fctx->res->lame_ttl);
6993		if (result != ISC_R_SUCCESS)
6994			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6995				      DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR,
6996				      "could not mark server as lame: %s",
6997				      isc_result_totext(result));
6998		broken_server = DNS_R_LAME;
6999		keep_trying = ISC_TRUE;
7000		goto done;
7001	}
7002
7003	/*
7004	 * Enforce delegations only zones like NET and COM.
7005	 */
7006	if (!ISFORWARDER(query->addrinfo) &&
7007	    dns_view_isdelegationonly(fctx->res->view, &fctx->domain) &&
7008	    !dns_name_equal(&fctx->domain, &fctx->name) &&
7009	    fix_mustbedelegationornxdomain(message, fctx)) {
7010		char namebuf[DNS_NAME_FORMATSIZE];
7011		char domainbuf[DNS_NAME_FORMATSIZE];
7012		char addrbuf[ISC_SOCKADDR_FORMATSIZE];
7013		char classbuf[64];
7014		char typebuf[64];
7015
7016		dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
7017		dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
7018		dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
7019		dns_rdataclass_format(fctx->res->rdclass, classbuf,
7020				      sizeof(classbuf));
7021		isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
7022				    sizeof(addrbuf));
7023
7024		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DELEGATION_ONLY,
7025			     DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7026			     "enforced delegation-only for '%s' (%s/%s/%s) "
7027			     "from %s",
7028			     domainbuf, namebuf, typebuf, classbuf, addrbuf);
7029	}
7030
7031	if ((fctx->res->options & DNS_RESOLVER_CHECKNAMES) != 0)
7032		checknames(message);
7033
7034	/*
7035	 * Clear cache bits.
7036	 */
7037	fctx->attributes &= ~(FCTX_ATTR_WANTNCACHE | FCTX_ATTR_WANTCACHE);
7038
7039	/*
7040	 * Did we get any answers?
7041	 */
7042	if (message->counts[DNS_SECTION_ANSWER] > 0 &&
7043	    (message->rcode == dns_rcode_noerror ||
7044	     message->rcode == dns_rcode_nxdomain)) {
7045		/*
7046		 * [normal case]
7047		 * We've got answers.  If it has an authoritative answer or an
7048		 * answer from a forwarder, we're done.
7049		 */
7050		if ((message->flags & DNS_MESSAGEFLAG_AA) != 0 ||
7051		    ISFORWARDER(query->addrinfo))
7052			result = answer_response(fctx);
7053		else if (iscname(fctx) &&
7054			 fctx->type != dns_rdatatype_any &&
7055			 fctx->type != dns_rdatatype_cname) {
7056			/*
7057			 * A BIND8 server could return a non-authoritative
7058			 * answer when a CNAME is followed.  We should treat
7059			 * it as a valid answer.
7060			 */
7061			result = answer_response(fctx);
7062		} else if (fctx->type != dns_rdatatype_ns &&
7063			   !betterreferral(fctx)) {
7064			/*
7065			 * Lame response !!!.
7066			 */
7067			result = answer_response(fctx);
7068		} else {
7069			if (fctx->type == dns_rdatatype_ns) {
7070				/*
7071				 * A BIND 8 server could incorrectly return a
7072				 * non-authoritative answer to an NS query
7073				 * instead of a referral. Since this answer
7074				 * lacks the SIGs necessary to do DNSSEC
7075				 * validation, we must invoke the following
7076				 * special kludge to treat it as a referral.
7077				 */
7078				result = noanswer_response(fctx, NULL,
7079						   LOOK_FOR_NS_IN_ANSWER);
7080			} else {
7081				/*
7082				 * Some other servers may still somehow include
7083				 * an answer when it should return a referral
7084				 * with an empty answer.  Check to see if we can
7085				 * treat this as a referral by ignoring the
7086				 * answer.  Further more, there may be an
7087				 * implementation that moves A/AAAA glue records
7088				 * to the answer section for that type of
7089				 * delegation when the query is for that glue
7090				 * record.  LOOK_FOR_GLUE_IN_ANSWER will handle
7091				 * such a corner case.
7092				 */
7093				result = noanswer_response(fctx, NULL,
7094						   LOOK_FOR_GLUE_IN_ANSWER);
7095			}
7096			if (result != DNS_R_DELEGATION) {
7097				/*
7098				 * At this point, AA is not set, the response
7099				 * is not a referral, and the server is not a
7100				 * forwarder.  It is technically lame and it's
7101				 * easier to treat it as such than to figure out
7102				 * some more elaborate course of action.
7103				 */
7104				broken_server = DNS_R_LAME;
7105				keep_trying = ISC_TRUE;
7106				goto done;
7107			}
7108			goto force_referral;
7109		}
7110		if (result != ISC_R_SUCCESS) {
7111			if (result == DNS_R_FORMERR)
7112				keep_trying = ISC_TRUE;
7113			goto done;
7114		}
7115	} else if (message->counts[DNS_SECTION_AUTHORITY] > 0 ||
7116		   message->rcode == dns_rcode_noerror ||
7117		   message->rcode == dns_rcode_nxdomain) {
7118		/*
7119		 * NXDOMAIN, NXRDATASET, or referral.
7120		 */
7121		result = noanswer_response(fctx, NULL, 0);
7122		if (result == DNS_R_CHASEDSSERVERS) {
7123		} else if (result == DNS_R_DELEGATION) {
7124		force_referral:
7125			/*
7126			 * We don't have the answer, but we know a better
7127			 * place to look.
7128			 */
7129			get_nameservers = ISC_TRUE;
7130			keep_trying = ISC_TRUE;
7131			/*
7132			 * We have a new set of name servers, and it
7133			 * has not experienced any restarts yet.
7134			 */
7135			fctx->restarts = 0;
7136
7137			/*
7138			 * Update local statistics counters collected for each
7139			 * new zone.
7140			 */
7141			fctx->referrals++;
7142			fctx->querysent = 0;
7143			fctx->lamecount = 0;
7144			fctx->neterr = 0;
7145			fctx->badresp = 0;
7146			fctx->adberr = 0;
7147
7148			result = ISC_R_SUCCESS;
7149		} else if (result != ISC_R_SUCCESS) {
7150			/*
7151			 * Something has gone wrong.
7152			 */
7153			if (result == DNS_R_FORMERR)
7154				keep_trying = ISC_TRUE;
7155			goto done;
7156		}
7157	} else {
7158		/*
7159		 * The server is insane.
7160		 */
7161		/* XXXRTH Log */
7162		broken_server = DNS_R_UNEXPECTEDRCODE;
7163		keep_trying = ISC_TRUE;
7164		goto done;
7165	}
7166
7167	/*
7168	 * Follow additional section data chains.
7169	 */
7170	chase_additional(fctx);
7171
7172	/*
7173	 * Cache the cacheable parts of the message.  This may also cause
7174	 * work to be queued to the DNSSEC validator.
7175	 */
7176	if (WANTCACHE(fctx)) {
7177		result = cache_message(fctx, query->addrinfo, now);
7178		if (result != ISC_R_SUCCESS)
7179			goto done;
7180	}
7181
7182	/*
7183	 * Ncache the negatively cacheable parts of the message.  This may
7184	 * also cause work to be queued to the DNSSEC validator.
7185	 */
7186	if (WANTNCACHE(fctx)) {
7187		dns_rdatatype_t covers;
7188		if (message->rcode == dns_rcode_nxdomain)
7189			covers = dns_rdatatype_any;
7190		else
7191			covers = fctx->type;
7192
7193		/*
7194		 * Cache any negative cache entries in the message.
7195		 */
7196		result = ncache_message(fctx, query->addrinfo, covers, now);
7197	}
7198
7199 done:
7200	/*
7201	 * Remember the query's addrinfo, in case we need to mark the
7202	 * server as broken.
7203	 */
7204	addrinfo = query->addrinfo;
7205
7206	/*
7207	 * Cancel the query.
7208	 *
7209	 * XXXRTH  Don't cancel the query if waiting for validation?
7210	 */
7211	fctx_cancelquery(&query, &devent, finish, no_response);
7212
7213	if (keep_trying) {
7214		if (result == DNS_R_FORMERR)
7215			broken_server = DNS_R_FORMERR;
7216		if (broken_server != ISC_R_SUCCESS) {
7217			/*
7218			 * Add this server to the list of bad servers for
7219			 * this fctx.
7220			 */
7221			add_bad(fctx, addrinfo, broken_server, broken_type);
7222		}
7223
7224		if (get_nameservers) {
7225			dns_name_t *name;
7226			dns_fixedname_init(&foundname);
7227			fname = dns_fixedname_name(&foundname);
7228			if (result != ISC_R_SUCCESS) {
7229				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7230				return;
7231			}
7232			findoptions = 0;
7233			if (dns_rdatatype_atparent(fctx->type))
7234				findoptions |= DNS_DBFIND_NOEXACT;
7235			if ((options & DNS_FETCHOPT_UNSHARED) == 0)
7236				name = &fctx->name;
7237			else
7238				name = &fctx->domain;
7239			result = dns_view_findzonecut(fctx->res->view,
7240						      name, fname,
7241						      now, findoptions,
7242						      ISC_TRUE,
7243						      &fctx->nameservers,
7244						      NULL);
7245			if (result != ISC_R_SUCCESS) {
7246				FCTXTRACE("couldn't find a zonecut");
7247				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7248				return;
7249			}
7250			if (!dns_name_issubdomain(fname, &fctx->domain)) {
7251				/*
7252				 * The best nameservers are now above our
7253				 * QDOMAIN.
7254				 */
7255				FCTXTRACE("nameservers now above QDOMAIN");
7256				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7257				return;
7258			}
7259			dns_name_free(&fctx->domain, fctx->mctx);
7260			dns_name_init(&fctx->domain, NULL);
7261			result = dns_name_dup(fname, fctx->mctx, &fctx->domain);
7262			if (result != ISC_R_SUCCESS) {
7263				fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7264				return;
7265			}
7266			fctx->ns_ttl = fctx->nameservers.ttl;
7267			fctx->ns_ttl_ok = ISC_TRUE;
7268			fctx_cancelqueries(fctx, ISC_TRUE);
7269			fctx_cleanupfinds(fctx);
7270			fctx_cleanupaltfinds(fctx);
7271			fctx_cleanupforwaddrs(fctx);
7272			fctx_cleanupaltaddrs(fctx);
7273		}
7274		/*
7275		 * Try again.
7276		 */
7277		fctx_try(fctx, !get_nameservers, ISC_FALSE);
7278	} else if (resend) {
7279		/*
7280		 * Resend (probably with changed options).
7281		 */
7282		FCTXTRACE("resend");
7283		inc_stats(fctx->res, dns_resstatscounter_retry);
7284		result = fctx_query(fctx, addrinfo, options);
7285		if (result != ISC_R_SUCCESS)
7286			fctx_done(fctx, result, __LINE__);
7287	} else if (result == ISC_R_SUCCESS && !HAVE_ANSWER(fctx)) {
7288		/*
7289		 * All has gone well so far, but we are waiting for the
7290		 * DNSSEC validator to validate the answer.
7291		 */
7292		FCTXTRACE("wait for validator");
7293		fctx_cancelqueries(fctx, ISC_TRUE);
7294		/*
7295		 * We must not retransmit while the validator is working;
7296		 * it has references to the current rmessage.
7297		 */
7298		result = fctx_stopidletimer(fctx);
7299		if (result != ISC_R_SUCCESS)
7300			fctx_done(fctx, result, __LINE__);
7301	} else if (result == DNS_R_CHASEDSSERVERS) {
7302		unsigned int n;
7303		add_bad(fctx, addrinfo, result, broken_type);
7304		fctx_cancelqueries(fctx, ISC_TRUE);
7305		fctx_cleanupfinds(fctx);
7306		fctx_cleanupforwaddrs(fctx);
7307
7308		n = dns_name_countlabels(&fctx->name);
7309		dns_name_getlabelsequence(&fctx->name, 1, n - 1, &fctx->nsname);
7310
7311		FCTXTRACE("suspending DS lookup to find parent's NS records");
7312
7313		result = dns_resolver_createfetch(fctx->res, &fctx->nsname,
7314						  dns_rdatatype_ns,
7315						  NULL, NULL, NULL, 0, task,
7316						  resume_dslookup, fctx,
7317						  &fctx->nsrrset, NULL,
7318						  &fctx->nsfetch);
7319		if (result != ISC_R_SUCCESS)
7320			fctx_done(fctx, result, __LINE__);
7321		else {
7322			LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
7323			fctx->references++;
7324			UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
7325			result = fctx_stopidletimer(fctx);
7326			if (result != ISC_R_SUCCESS)
7327				fctx_done(fctx, result, __LINE__);
7328		}
7329	} else {
7330		/*
7331		 * We're done.
7332		 */
7333		fctx_done(fctx, result, __LINE__);
7334	}
7335}
7336
7337
7338/***
7339 *** Resolver Methods
7340 ***/
7341static void
7342destroy_badcache(dns_resolver_t *res) {
7343	dns_badcache_t *bad, *next;
7344	unsigned int i;
7345
7346	if (res->badcache != NULL) {
7347		for (i = 0; i < res->badhash; i++)
7348			for (bad = res->badcache[i]; bad != NULL;
7349			     bad = next) {
7350				next = bad->next;
7351				isc_mem_put(res->mctx, bad, sizeof(*bad) +
7352					    bad->name.length);
7353				res->badcount--;
7354			}
7355		isc_mem_put(res->mctx, res->badcache,
7356			    sizeof(*res->badcache) * res->badhash);
7357		res->badcache = NULL;
7358		res->badhash = 0;
7359		INSIST(res->badcount == 0);
7360	}
7361}
7362
7363static void
7364destroy(dns_resolver_t *res) {
7365	unsigned int i;
7366	alternate_t *a;
7367
7368	REQUIRE(res->references == 0);
7369	REQUIRE(!res->priming);
7370	REQUIRE(res->primefetch == NULL);
7371
7372	RTRACE("destroy");
7373
7374	INSIST(res->nfctx == 0);
7375
7376	DESTROYLOCK(&res->primelock);
7377	DESTROYLOCK(&res->nlock);
7378	DESTROYLOCK(&res->lock);
7379	for (i = 0; i < res->nbuckets; i++) {
7380		INSIST(ISC_LIST_EMPTY(res->buckets[i].fctxs));
7381		isc_task_shutdown(res->buckets[i].task);
7382		isc_task_detach(&res->buckets[i].task);
7383		DESTROYLOCK(&res->buckets[i].lock);
7384		isc_mem_detach(&res->buckets[i].mctx);
7385	}
7386	isc_mem_put(res->mctx, res->buckets,
7387		    res->nbuckets * sizeof(fctxbucket_t));
7388	if (res->dispatchv4 != NULL)
7389		dns_dispatch_detach(&res->dispatchv4);
7390	if (res->dispatchv6 != NULL)
7391		dns_dispatch_detach(&res->dispatchv6);
7392	while ((a = ISC_LIST_HEAD(res->alternates)) != NULL) {
7393		ISC_LIST_UNLINK(res->alternates, a, link);
7394		if (!a->isaddress)
7395			dns_name_free(&a->_u._n.name, res->mctx);
7396		isc_mem_put(res->mctx, a, sizeof(*a));
7397	}
7398	dns_resolver_reset_algorithms(res);
7399	destroy_badcache(res);
7400	dns_resolver_resetmustbesecure(res);
7401#if USE_ALGLOCK
7402	isc_rwlock_destroy(&res->alglock);
7403#endif
7404#if USE_MBSLOCK
7405	isc_rwlock_destroy(&res->mbslock);
7406#endif
7407	isc_timer_detach(&res->spillattimer);
7408	res->magic = 0;
7409	isc_mem_put(res->mctx, res, sizeof(*res));
7410}
7411
7412static void
7413send_shutdown_events(dns_resolver_t *res) {
7414	isc_event_t *event, *next_event;
7415	isc_task_t *etask;
7416
7417	/*
7418	 * Caller must be holding the resolver lock.
7419	 */
7420
7421	for (event = ISC_LIST_HEAD(res->whenshutdown);
7422	     event != NULL;
7423	     event = next_event) {
7424		next_event = ISC_LIST_NEXT(event, ev_link);
7425		ISC_LIST_UNLINK(res->whenshutdown, event, ev_link);
7426		etask = event->ev_sender;
7427		event->ev_sender = res;
7428		isc_task_sendanddetach(&etask, &event);
7429	}
7430}
7431
7432static void
7433empty_bucket(dns_resolver_t *res) {
7434	RTRACE("empty_bucket");
7435
7436	LOCK(&res->lock);
7437
7438	INSIST(res->activebuckets > 0);
7439	res->activebuckets--;
7440	if (res->activebuckets == 0)
7441		send_shutdown_events(res);
7442
7443	UNLOCK(&res->lock);
7444}
7445
7446static void
7447spillattimer_countdown(isc_task_t *task, isc_event_t *event) {
7448	dns_resolver_t *res = event->ev_arg;
7449	isc_result_t result;
7450	unsigned int count;
7451	isc_boolean_t logit = ISC_FALSE;
7452
7453	REQUIRE(VALID_RESOLVER(res));
7454
7455	UNUSED(task);
7456
7457	LOCK(&res->lock);
7458	INSIST(!res->exiting);
7459	if (res->spillat > res->spillatmin) {
7460		res->spillat--;
7461		logit = ISC_TRUE;
7462	}
7463	if (res->spillat <= res->spillatmin) {
7464		result = isc_timer_reset(res->spillattimer,
7465					 isc_timertype_inactive, NULL,
7466					 NULL, ISC_TRUE);
7467		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7468	}
7469	count = res->spillat;
7470	UNLOCK(&res->lock);
7471	if (logit)
7472		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7473			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7474			      "clients-per-query decreased to %u", count);
7475
7476	isc_event_free(&event);
7477}
7478
7479isc_result_t
7480dns_resolver_create(dns_view_t *view,
7481		    isc_taskmgr_t *taskmgr, unsigned int ntasks,
7482		    isc_socketmgr_t *socketmgr,
7483		    isc_timermgr_t *timermgr,
7484		    unsigned int options,
7485		    dns_dispatchmgr_t *dispatchmgr,
7486		    dns_dispatch_t *dispatchv4,
7487		    dns_dispatch_t *dispatchv6,
7488		    dns_resolver_t **resp)
7489{
7490	dns_resolver_t *res;
7491	isc_result_t result = ISC_R_SUCCESS;
7492	unsigned int i, buckets_created = 0;
7493	isc_task_t *task = NULL;
7494	char name[16];
7495	unsigned dispattr;
7496
7497	/*
7498	 * Create a resolver.
7499	 */
7500
7501	REQUIRE(DNS_VIEW_VALID(view));
7502	REQUIRE(ntasks > 0);
7503	REQUIRE(resp != NULL && *resp == NULL);
7504	REQUIRE(dispatchmgr != NULL);
7505	REQUIRE(dispatchv4 != NULL || dispatchv6 != NULL);
7506
7507	res = isc_mem_get(view->mctx, sizeof(*res));
7508	if (res == NULL)
7509		return (ISC_R_NOMEMORY);
7510	RTRACE("create");
7511	res->mctx = view->mctx;
7512	res->rdclass = view->rdclass;
7513	res->socketmgr = socketmgr;
7514	res->timermgr = timermgr;
7515	res->taskmgr = taskmgr;
7516	res->dispatchmgr = dispatchmgr;
7517	res->view = view;
7518	res->options = options;
7519	res->lame_ttl = 0;
7520	ISC_LIST_INIT(res->alternates);
7521	res->udpsize = RECV_BUFFER_SIZE;
7522	res->algorithms = NULL;
7523	res->badcache = NULL;
7524	res->badcount = 0;
7525	res->badhash = 0;
7526	res->badsweep = 0;
7527	res->mustbesecure = NULL;
7528	res->spillatmin = res->spillat = 10;
7529	res->spillatmax = 100;
7530	res->spillattimer = NULL;
7531	res->zero_no_soa_ttl = ISC_FALSE;
7532	res->query_timeout = DEFAULT_QUERY_TIMEOUT;
7533	res->ndisps = 0;
7534	res->nextdisp = 0; /* meaningless at this point, but init it */
7535	res->nbuckets = ntasks;
7536	res->activebuckets = ntasks;
7537	res->buckets = isc_mem_get(view->mctx,
7538				   ntasks * sizeof(fctxbucket_t));
7539	if (res->buckets == NULL) {
7540		result = ISC_R_NOMEMORY;
7541		goto cleanup_res;
7542	}
7543	for (i = 0; i < ntasks; i++) {
7544		result = isc_mutex_init(&res->buckets[i].lock);
7545		if (result != ISC_R_SUCCESS)
7546			goto cleanup_buckets;
7547		res->buckets[i].task = NULL;
7548		result = isc_task_create(taskmgr, 0, &res->buckets[i].task);
7549		if (result != ISC_R_SUCCESS) {
7550			DESTROYLOCK(&res->buckets[i].lock);
7551			goto cleanup_buckets;
7552		}
7553		res->buckets[i].mctx = NULL;
7554		snprintf(name, sizeof(name), "res%u", i);
7555#ifdef ISC_PLATFORM_USETHREADS
7556		/*
7557		 * Use a separate memory context for each bucket to reduce
7558		 * contention among multiple threads.  Do this only when
7559		 * enabling threads because it will be require more memory.
7560		 */
7561		result = isc_mem_create(0, 0, &res->buckets[i].mctx);
7562		if (result != ISC_R_SUCCESS) {
7563			isc_task_detach(&res->buckets[i].task);
7564			DESTROYLOCK(&res->buckets[i].lock);
7565			goto cleanup_buckets;
7566		}
7567		isc_mem_setname(res->buckets[i].mctx, name, NULL);
7568#else
7569		isc_mem_attach(view->mctx, &res->buckets[i].mctx);
7570#endif
7571		isc_task_setname(res->buckets[i].task, name, res);
7572		ISC_LIST_INIT(res->buckets[i].fctxs);
7573		res->buckets[i].exiting = ISC_FALSE;
7574		buckets_created++;
7575	}
7576
7577	res->dispatchv4 = NULL;
7578	if (dispatchv4 != NULL) {
7579		dns_dispatch_attach(dispatchv4, &res->dispatchv4);
7580		dispattr = dns_dispatch_getattributes(dispatchv4);
7581		res->exclusivev4 =
7582			ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
7583	}
7584
7585	res->dispatchv6 = NULL;
7586	if (dispatchv6 != NULL) {
7587		dns_dispatch_attach(dispatchv6, &res->dispatchv6);
7588		dispattr = dns_dispatch_getattributes(dispatchv6);
7589		res->exclusivev6 =
7590			ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0);
7591	}
7592
7593	res->references = 1;
7594	res->exiting = ISC_FALSE;
7595	res->frozen = ISC_FALSE;
7596	ISC_LIST_INIT(res->whenshutdown);
7597	res->priming = ISC_FALSE;
7598	res->primefetch = NULL;
7599	res->nfctx = 0;
7600
7601	result = isc_mutex_init(&res->lock);
7602	if (result != ISC_R_SUCCESS)
7603		goto cleanup_dispatches;
7604
7605	result = isc_mutex_init(&res->nlock);
7606	if (result != ISC_R_SUCCESS)
7607		goto cleanup_lock;
7608
7609	result = isc_mutex_init(&res->primelock);
7610	if (result != ISC_R_SUCCESS)
7611		goto cleanup_nlock;
7612
7613	task = NULL;
7614	result = isc_task_create(taskmgr, 0, &task);
7615	if (result != ISC_R_SUCCESS)
7616		goto cleanup_primelock;
7617
7618	result = isc_timer_create(timermgr, isc_timertype_inactive, NULL, NULL,
7619				  task, spillattimer_countdown, res,
7620				  &res->spillattimer);
7621	isc_task_detach(&task);
7622	if (result != ISC_R_SUCCESS)
7623		goto cleanup_primelock;
7624
7625#if USE_ALGLOCK
7626	result = isc_rwlock_init(&res->alglock, 0, 0);
7627	if (result != ISC_R_SUCCESS)
7628		goto cleanup_spillattimer;
7629#endif
7630#if USE_MBSLOCK
7631	result = isc_rwlock_init(&res->mbslock, 0, 0);
7632	if (result != ISC_R_SUCCESS)
7633		goto cleanup_alglock;
7634#endif
7635
7636	res->magic = RES_MAGIC;
7637
7638	*resp = res;
7639
7640	return (ISC_R_SUCCESS);
7641
7642#if USE_MBSLOCK
7643 cleanup_alglock:
7644#if USE_ALGLOCK
7645	isc_rwlock_destroy(&res->alglock);
7646#endif
7647#endif
7648#if USE_ALGLOCK || USE_MBSLOCK
7649 cleanup_spillattimer:
7650	isc_timer_detach(&res->spillattimer);
7651#endif
7652
7653 cleanup_primelock:
7654	DESTROYLOCK(&res->primelock);
7655
7656 cleanup_nlock:
7657	DESTROYLOCK(&res->nlock);
7658
7659 cleanup_lock:
7660	DESTROYLOCK(&res->lock);
7661
7662 cleanup_dispatches:
7663	if (res->dispatchv6 != NULL)
7664		dns_dispatch_detach(&res->dispatchv6);
7665	if (res->dispatchv4 != NULL)
7666		dns_dispatch_detach(&res->dispatchv4);
7667
7668 cleanup_buckets:
7669	for (i = 0; i < buckets_created; i++) {
7670		isc_mem_detach(&res->buckets[i].mctx);
7671		DESTROYLOCK(&res->buckets[i].lock);
7672		isc_task_shutdown(res->buckets[i].task);
7673		isc_task_detach(&res->buckets[i].task);
7674	}
7675	isc_mem_put(view->mctx, res->buckets,
7676		    res->nbuckets * sizeof(fctxbucket_t));
7677
7678 cleanup_res:
7679	isc_mem_put(view->mctx, res, sizeof(*res));
7680
7681	return (result);
7682}
7683
7684#ifdef BIND9
7685static void
7686prime_done(isc_task_t *task, isc_event_t *event) {
7687	dns_resolver_t *res;
7688	dns_fetchevent_t *fevent;
7689	dns_fetch_t *fetch;
7690	dns_db_t *db = NULL;
7691
7692	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
7693	fevent = (dns_fetchevent_t *)event;
7694	res = event->ev_arg;
7695	REQUIRE(VALID_RESOLVER(res));
7696
7697	UNUSED(task);
7698
7699	LOCK(&res->lock);
7700
7701	INSIST(res->priming);
7702	res->priming = ISC_FALSE;
7703	LOCK(&res->primelock);
7704	fetch = res->primefetch;
7705	res->primefetch = NULL;
7706	UNLOCK(&res->primelock);
7707
7708	UNLOCK(&res->lock);
7709
7710	if (fevent->result == ISC_R_SUCCESS &&
7711	    res->view->cache != NULL && res->view->hints != NULL) {
7712		dns_cache_attachdb(res->view->cache, &db);
7713		dns_root_checkhints(res->view, res->view->hints, db);
7714		dns_db_detach(&db);
7715	}
7716
7717	if (fevent->node != NULL)
7718		dns_db_detachnode(fevent->db, &fevent->node);
7719	if (fevent->db != NULL)
7720		dns_db_detach(&fevent->db);
7721	if (dns_rdataset_isassociated(fevent->rdataset))
7722		dns_rdataset_disassociate(fevent->rdataset);
7723	INSIST(fevent->sigrdataset == NULL);
7724
7725	isc_mem_put(res->mctx, fevent->rdataset, sizeof(*fevent->rdataset));
7726
7727	isc_event_free(&event);
7728	dns_resolver_destroyfetch(&fetch);
7729}
7730
7731void
7732dns_resolver_prime(dns_resolver_t *res) {
7733	isc_boolean_t want_priming = ISC_FALSE;
7734	dns_rdataset_t *rdataset;
7735	isc_result_t result;
7736
7737	REQUIRE(VALID_RESOLVER(res));
7738	REQUIRE(res->frozen);
7739
7740	RTRACE("dns_resolver_prime");
7741
7742	LOCK(&res->lock);
7743
7744	if (!res->exiting && !res->priming) {
7745		INSIST(res->primefetch == NULL);
7746		res->priming = ISC_TRUE;
7747		want_priming = ISC_TRUE;
7748	}
7749
7750	UNLOCK(&res->lock);
7751
7752	if (want_priming) {
7753		/*
7754		 * To avoid any possible recursive locking problems, we
7755		 * start the priming fetch like any other fetch, and holding
7756		 * no resolver locks.  No one else will try to start it
7757		 * because we're the ones who set res->priming to true.
7758		 * Any other callers of dns_resolver_prime() while we're
7759		 * running will see that res->priming is already true and
7760		 * do nothing.
7761		 */
7762		RTRACE("priming");
7763		rdataset = isc_mem_get(res->mctx, sizeof(*rdataset));
7764		if (rdataset == NULL) {
7765			LOCK(&res->lock);
7766			INSIST(res->priming);
7767			INSIST(res->primefetch == NULL);
7768			res->priming = ISC_FALSE;
7769			UNLOCK(&res->lock);
7770			return;
7771		}
7772		dns_rdataset_init(rdataset);
7773		LOCK(&res->primelock);
7774		result = dns_resolver_createfetch(res, dns_rootname,
7775						  dns_rdatatype_ns,
7776						  NULL, NULL, NULL, 0,
7777						  res->buckets[0].task,
7778						  prime_done,
7779						  res, rdataset, NULL,
7780						  &res->primefetch);
7781		UNLOCK(&res->primelock);
7782		if (result != ISC_R_SUCCESS) {
7783			LOCK(&res->lock);
7784			INSIST(res->priming);
7785			res->priming = ISC_FALSE;
7786			UNLOCK(&res->lock);
7787		}
7788	}
7789}
7790#endif /* BIND9 */
7791
7792void
7793dns_resolver_freeze(dns_resolver_t *res) {
7794	/*
7795	 * Freeze resolver.
7796	 */
7797
7798	REQUIRE(VALID_RESOLVER(res));
7799
7800	res->frozen = ISC_TRUE;
7801}
7802
7803void
7804dns_resolver_attach(dns_resolver_t *source, dns_resolver_t **targetp) {
7805	REQUIRE(VALID_RESOLVER(source));
7806	REQUIRE(targetp != NULL && *targetp == NULL);
7807
7808	RRTRACE(source, "attach");
7809	LOCK(&source->lock);
7810	REQUIRE(!source->exiting);
7811
7812	INSIST(source->references > 0);
7813	source->references++;
7814	INSIST(source->references != 0);
7815	UNLOCK(&source->lock);
7816
7817	*targetp = source;
7818}
7819
7820void
7821dns_resolver_whenshutdown(dns_resolver_t *res, isc_task_t *task,
7822			  isc_event_t **eventp)
7823{
7824	isc_task_t *clone;
7825	isc_event_t *event;
7826
7827	REQUIRE(VALID_RESOLVER(res));
7828	REQUIRE(eventp != NULL);
7829
7830	event = *eventp;
7831	*eventp = NULL;
7832
7833	LOCK(&res->lock);
7834
7835	if (res->exiting && res->activebuckets == 0) {
7836		/*
7837		 * We're already shutdown.  Send the event.
7838		 */
7839		event->ev_sender = res;
7840		isc_task_send(task, &event);
7841	} else {
7842		clone = NULL;
7843		isc_task_attach(task, &clone);
7844		event->ev_sender = clone;
7845		ISC_LIST_APPEND(res->whenshutdown, event, ev_link);
7846	}
7847
7848	UNLOCK(&res->lock);
7849}
7850
7851void
7852dns_resolver_shutdown(dns_resolver_t *res) {
7853	unsigned int i;
7854	fetchctx_t *fctx;
7855	isc_socket_t *sock;
7856	isc_result_t result;
7857
7858	REQUIRE(VALID_RESOLVER(res));
7859
7860	RTRACE("shutdown");
7861
7862	LOCK(&res->lock);
7863
7864	if (!res->exiting) {
7865		RTRACE("exiting");
7866		res->exiting = ISC_TRUE;
7867
7868		for (i = 0; i < res->nbuckets; i++) {
7869			LOCK(&res->buckets[i].lock);
7870			for (fctx = ISC_LIST_HEAD(res->buckets[i].fctxs);
7871			     fctx != NULL;
7872			     fctx = ISC_LIST_NEXT(fctx, link))
7873				fctx_shutdown(fctx);
7874			if (res->dispatchv4 != NULL && !res->exclusivev4) {
7875				sock = dns_dispatch_getsocket(res->dispatchv4);
7876				isc_socket_cancel(sock, res->buckets[i].task,
7877						  ISC_SOCKCANCEL_ALL);
7878			}
7879			if (res->dispatchv6 != NULL && !res->exclusivev6) {
7880				sock = dns_dispatch_getsocket(res->dispatchv6);
7881				isc_socket_cancel(sock, res->buckets[i].task,
7882						  ISC_SOCKCANCEL_ALL);
7883			}
7884			res->buckets[i].exiting = ISC_TRUE;
7885			if (ISC_LIST_EMPTY(res->buckets[i].fctxs)) {
7886				INSIST(res->activebuckets > 0);
7887				res->activebuckets--;
7888			}
7889			UNLOCK(&res->buckets[i].lock);
7890		}
7891		if (res->activebuckets == 0)
7892			send_shutdown_events(res);
7893		result = isc_timer_reset(res->spillattimer,
7894					 isc_timertype_inactive, NULL,
7895					 NULL, ISC_TRUE);
7896		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7897	}
7898
7899	UNLOCK(&res->lock);
7900}
7901
7902void
7903dns_resolver_detach(dns_resolver_t **resp) {
7904	dns_resolver_t *res;
7905	isc_boolean_t need_destroy = ISC_FALSE;
7906
7907	REQUIRE(resp != NULL);
7908	res = *resp;
7909	REQUIRE(VALID_RESOLVER(res));
7910
7911	RTRACE("detach");
7912
7913	LOCK(&res->lock);
7914
7915	INSIST(res->references > 0);
7916	res->references--;
7917	if (res->references == 0) {
7918		INSIST(res->exiting && res->activebuckets == 0);
7919		need_destroy = ISC_TRUE;
7920	}
7921
7922	UNLOCK(&res->lock);
7923
7924	if (need_destroy)
7925		destroy(res);
7926
7927	*resp = NULL;
7928}
7929
7930static inline isc_boolean_t
7931fctx_match(fetchctx_t *fctx, dns_name_t *name, dns_rdatatype_t type,
7932	   unsigned int options)
7933{
7934	/*
7935	 * Don't match fetch contexts that are shutting down.
7936	 */
7937	if (fctx->cloned || fctx->state == fetchstate_done ||
7938	    ISC_LIST_EMPTY(fctx->events))
7939		return (ISC_FALSE);
7940
7941	if (fctx->type != type || fctx->options != options)
7942		return (ISC_FALSE);
7943	return (dns_name_equal(&fctx->name, name));
7944}
7945
7946static inline void
7947log_fetch(dns_name_t *name, dns_rdatatype_t type) {
7948	char namebuf[DNS_NAME_FORMATSIZE];
7949	char typebuf[DNS_RDATATYPE_FORMATSIZE];
7950	int level = ISC_LOG_DEBUG(1);
7951
7952	if (! isc_log_wouldlog(dns_lctx, level))
7953		return;
7954
7955	dns_name_format(name, namebuf, sizeof(namebuf));
7956	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
7957
7958	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7959		      DNS_LOGMODULE_RESOLVER, level,
7960		      "createfetch: %s %s", namebuf, typebuf);
7961}
7962
7963isc_result_t
7964dns_resolver_createfetch(dns_resolver_t *res, dns_name_t *name,
7965			 dns_rdatatype_t type,
7966			 dns_name_t *domain, dns_rdataset_t *nameservers,
7967			 dns_forwarders_t *forwarders,
7968			 unsigned int options, isc_task_t *task,
7969			 isc_taskaction_t action, void *arg,
7970			 dns_rdataset_t *rdataset,
7971			 dns_rdataset_t *sigrdataset,
7972			 dns_fetch_t **fetchp)
7973{
7974	return (dns_resolver_createfetch2(res, name, type, domain,
7975					  nameservers, forwarders, NULL, 0,
7976					  options, task, action, arg,
7977					  rdataset, sigrdataset, fetchp));
7978}
7979
7980isc_result_t
7981dns_resolver_createfetch2(dns_resolver_t *res, dns_name_t *name,
7982			  dns_rdatatype_t type,
7983			  dns_name_t *domain, dns_rdataset_t *nameservers,
7984			  dns_forwarders_t *forwarders,
7985			  isc_sockaddr_t *client, dns_messageid_t id,
7986			  unsigned int options, isc_task_t *task,
7987			  isc_taskaction_t action, void *arg,
7988			  dns_rdataset_t *rdataset,
7989			  dns_rdataset_t *sigrdataset,
7990			  dns_fetch_t **fetchp)
7991{
7992	dns_fetch_t *fetch;
7993	fetchctx_t *fctx = NULL;
7994	isc_result_t result = ISC_R_SUCCESS;
7995	unsigned int bucketnum;
7996	isc_boolean_t new_fctx = ISC_FALSE;
7997	isc_event_t *event;
7998	unsigned int count = 0;
7999	unsigned int spillat;
8000	unsigned int spillatmin;
8001	isc_boolean_t destroy = ISC_FALSE;
8002
8003	UNUSED(forwarders);
8004
8005	REQUIRE(VALID_RESOLVER(res));
8006	REQUIRE(res->frozen);
8007	/* XXXRTH  Check for meta type */
8008	if (domain != NULL) {
8009		REQUIRE(DNS_RDATASET_VALID(nameservers));
8010		REQUIRE(nameservers->type == dns_rdatatype_ns);
8011	} else
8012		REQUIRE(nameservers == NULL);
8013	REQUIRE(forwarders == NULL);
8014	REQUIRE(!dns_rdataset_isassociated(rdataset));
8015	REQUIRE(sigrdataset == NULL ||
8016		!dns_rdataset_isassociated(sigrdataset));
8017	REQUIRE(fetchp != NULL && *fetchp == NULL);
8018
8019	log_fetch(name, type);
8020
8021	/*
8022	 * XXXRTH  use a mempool?
8023	 */
8024	fetch = isc_mem_get(res->mctx, sizeof(*fetch));
8025	if (fetch == NULL)
8026		return (ISC_R_NOMEMORY);
8027
8028	bucketnum = dns_name_fullhash(name, ISC_FALSE) % res->nbuckets;
8029
8030	LOCK(&res->lock);
8031	spillat = res->spillat;
8032	spillatmin = res->spillatmin;
8033	UNLOCK(&res->lock);
8034	LOCK(&res->buckets[bucketnum].lock);
8035
8036	if (res->buckets[bucketnum].exiting) {
8037		result = ISC_R_SHUTTINGDOWN;
8038		goto unlock;
8039	}
8040
8041	if ((options & DNS_FETCHOPT_UNSHARED) == 0) {
8042		for (fctx = ISC_LIST_HEAD(res->buckets[bucketnum].fctxs);
8043		     fctx != NULL;
8044		     fctx = ISC_LIST_NEXT(fctx, link)) {
8045			if (fctx_match(fctx, name, type, options))
8046				break;
8047		}
8048	}
8049
8050	/*
8051	 * Is this a duplicate?
8052	 */
8053	if (fctx != NULL && client != NULL) {
8054		dns_fetchevent_t *fevent;
8055		for (fevent = ISC_LIST_HEAD(fctx->events);
8056		     fevent != NULL;
8057		     fevent = ISC_LIST_NEXT(fevent, ev_link)) {
8058			if (fevent->client != NULL && fevent->id == id &&
8059			    isc_sockaddr_equal(fevent->client, client)) {
8060				result = DNS_R_DUPLICATE;
8061				goto unlock;
8062			}
8063			count++;
8064		}
8065	}
8066	if (count >= spillatmin && spillatmin != 0) {
8067		INSIST(fctx != NULL);
8068		if (count >= spillat)
8069			fctx->spilled = ISC_TRUE;
8070		if (fctx->spilled) {
8071			result = DNS_R_DROP;
8072			goto unlock;
8073		}
8074	}
8075
8076	if (fctx == NULL) {
8077		result = fctx_create(res, name, type, domain, nameservers,
8078				     options, bucketnum, &fctx);
8079		if (result != ISC_R_SUCCESS)
8080			goto unlock;
8081		new_fctx = ISC_TRUE;
8082	}
8083
8084	result = fctx_join(fctx, task, client, id, action, arg,
8085			   rdataset, sigrdataset, fetch);
8086	if (new_fctx) {
8087		if (result == ISC_R_SUCCESS) {
8088			/*
8089			 * Launch this fctx.
8090			 */
8091			event = &fctx->control_event;
8092			ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
8093				       DNS_EVENT_FETCHCONTROL,
8094				       fctx_start, fctx, NULL,
8095				       NULL, NULL);
8096			isc_task_send(res->buckets[bucketnum].task, &event);
8097		} else {
8098			/*
8099			 * We don't care about the result of fctx_unlink()
8100			 * since we know we're not exiting.
8101			 */
8102			(void)fctx_unlink(fctx);
8103			destroy = ISC_TRUE;
8104		}
8105	}
8106
8107 unlock:
8108	UNLOCK(&res->buckets[bucketnum].lock);
8109
8110	if (destroy)
8111		fctx_destroy(fctx);
8112
8113	if (result == ISC_R_SUCCESS) {
8114		FTRACE("created");
8115		*fetchp = fetch;
8116	} else
8117		isc_mem_put(res->mctx, fetch, sizeof(*fetch));
8118
8119	return (result);
8120}
8121
8122void
8123dns_resolver_cancelfetch(dns_fetch_t *fetch) {
8124	fetchctx_t *fctx;
8125	dns_resolver_t *res;
8126	dns_fetchevent_t *event, *next_event;
8127	isc_task_t *etask;
8128
8129	REQUIRE(DNS_FETCH_VALID(fetch));
8130	fctx = fetch->private;
8131	REQUIRE(VALID_FCTX(fctx));
8132	res = fctx->res;
8133
8134	FTRACE("cancelfetch");
8135
8136	LOCK(&res->buckets[fctx->bucketnum].lock);
8137
8138	/*
8139	 * Find the completion event for this fetch (as opposed
8140	 * to those for other fetches that have joined the same
8141	 * fctx) and send it with result = ISC_R_CANCELED.
8142	 */
8143	event = NULL;
8144	if (fctx->state != fetchstate_done) {
8145		for (event = ISC_LIST_HEAD(fctx->events);
8146		     event != NULL;
8147		     event = next_event) {
8148			next_event = ISC_LIST_NEXT(event, ev_link);
8149			if (event->fetch == fetch) {
8150				ISC_LIST_UNLINK(fctx->events, event, ev_link);
8151				break;
8152			}
8153		}
8154	}
8155	if (event != NULL) {
8156		etask = event->ev_sender;
8157		event->ev_sender = fctx;
8158		event->result = ISC_R_CANCELED;
8159		isc_task_sendanddetach(&etask, ISC_EVENT_PTR(&event));
8160	}
8161	/*
8162	 * The fctx continues running even if no fetches remain;
8163	 * the answer is still cached.
8164	 */
8165
8166	UNLOCK(&res->buckets[fctx->bucketnum].lock);
8167}
8168
8169void
8170dns_resolver_destroyfetch(dns_fetch_t **fetchp) {
8171	dns_fetch_t *fetch;
8172	dns_resolver_t *res;
8173	dns_fetchevent_t *event, *next_event;
8174	fetchctx_t *fctx;
8175	unsigned int bucketnum;
8176	isc_boolean_t bucket_empty;
8177
8178	REQUIRE(fetchp != NULL);
8179	fetch = *fetchp;
8180	REQUIRE(DNS_FETCH_VALID(fetch));
8181	fctx = fetch->private;
8182	REQUIRE(VALID_FCTX(fctx));
8183	res = fctx->res;
8184
8185	FTRACE("destroyfetch");
8186
8187	bucketnum = fctx->bucketnum;
8188	LOCK(&res->buckets[bucketnum].lock);
8189
8190	/*
8191	 * Sanity check: the caller should have gotten its event before
8192	 * trying to destroy the fetch.
8193	 */
8194	event = NULL;
8195	if (fctx->state != fetchstate_done) {
8196		for (event = ISC_LIST_HEAD(fctx->events);
8197		     event != NULL;
8198		     event = next_event) {
8199			next_event = ISC_LIST_NEXT(event, ev_link);
8200			RUNTIME_CHECK(event->fetch != fetch);
8201		}
8202	}
8203
8204	bucket_empty = fctx_decreference(fctx);
8205
8206	UNLOCK(&res->buckets[bucketnum].lock);
8207
8208	isc_mem_put(res->mctx, fetch, sizeof(*fetch));
8209	*fetchp = NULL;
8210
8211	if (bucket_empty)
8212		empty_bucket(res);
8213}
8214
8215void
8216dns_resolver_logfetch(dns_fetch_t *fetch, isc_log_t *lctx,
8217		      isc_logcategory_t *category, isc_logmodule_t *module,
8218		      int level, isc_boolean_t duplicateok)
8219{
8220	fetchctx_t *fctx;
8221	dns_resolver_t *res;
8222	char domainbuf[DNS_NAME_FORMATSIZE];
8223
8224	REQUIRE(DNS_FETCH_VALID(fetch));
8225	fctx = fetch->private;
8226	REQUIRE(VALID_FCTX(fctx));
8227	res = fctx->res;
8228
8229	LOCK(&res->buckets[fctx->bucketnum].lock);
8230
8231	INSIST(fctx->exitline >= 0);
8232	if (!fctx->logged || duplicateok) {
8233		dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
8234		isc_log_write(lctx, category, module, level,
8235			      "fetch completed at %s:%d for %s in "
8236			      "%" ISC_PRINT_QUADFORMAT "u."
8237			      "%06" ISC_PRINT_QUADFORMAT "u: %s/%s "
8238			      "[domain:%s,referral:%u,restart:%u,qrysent:%u,"
8239			      "timeout:%u,lame:%u,neterr:%u,badresp:%u,"
8240			      "adberr:%u,findfail:%u,valfail:%u]",
8241			      __FILE__, fctx->exitline, fctx->info,
8242			      fctx->duration / US_PER_SEC,
8243			      fctx->duration % US_PER_SEC,
8244			      isc_result_totext(fctx->result),
8245			      isc_result_totext(fctx->vresult), domainbuf,
8246			      fctx->referrals, fctx->restarts,
8247			      fctx->querysent, fctx->timeouts, fctx->lamecount,
8248			      fctx->neterr, fctx->badresp, fctx->adberr,
8249			      fctx->findfail, fctx->valfail);
8250		fctx->logged = ISC_TRUE;
8251	}
8252
8253	UNLOCK(&res->buckets[fctx->bucketnum].lock);
8254}
8255
8256dns_dispatchmgr_t *
8257dns_resolver_dispatchmgr(dns_resolver_t *resolver) {
8258	REQUIRE(VALID_RESOLVER(resolver));
8259	return (resolver->dispatchmgr);
8260}
8261
8262dns_dispatch_t *
8263dns_resolver_dispatchv4(dns_resolver_t *resolver) {
8264	REQUIRE(VALID_RESOLVER(resolver));
8265	return (resolver->dispatchv4);
8266}
8267
8268dns_dispatch_t *
8269dns_resolver_dispatchv6(dns_resolver_t *resolver) {
8270	REQUIRE(VALID_RESOLVER(resolver));
8271	return (resolver->dispatchv6);
8272}
8273
8274isc_socketmgr_t *
8275dns_resolver_socketmgr(dns_resolver_t *resolver) {
8276	REQUIRE(VALID_RESOLVER(resolver));
8277	return (resolver->socketmgr);
8278}
8279
8280isc_taskmgr_t *
8281dns_resolver_taskmgr(dns_resolver_t *resolver) {
8282	REQUIRE(VALID_RESOLVER(resolver));
8283	return (resolver->taskmgr);
8284}
8285
8286isc_uint32_t
8287dns_resolver_getlamettl(dns_resolver_t *resolver) {
8288	REQUIRE(VALID_RESOLVER(resolver));
8289	return (resolver->lame_ttl);
8290}
8291
8292void
8293dns_resolver_setlamettl(dns_resolver_t *resolver, isc_uint32_t lame_ttl) {
8294	REQUIRE(VALID_RESOLVER(resolver));
8295	resolver->lame_ttl = lame_ttl;
8296}
8297
8298unsigned int
8299dns_resolver_nrunning(dns_resolver_t *resolver) {
8300	unsigned int n;
8301	LOCK(&resolver->nlock);
8302	n = resolver->nfctx;
8303	UNLOCK(&resolver->nlock);
8304	return (n);
8305}
8306
8307isc_result_t
8308dns_resolver_addalternate(dns_resolver_t *resolver, isc_sockaddr_t *alt,
8309			  dns_name_t *name, in_port_t port) {
8310	alternate_t *a;
8311	isc_result_t result;
8312
8313	REQUIRE(VALID_RESOLVER(resolver));
8314	REQUIRE(!resolver->frozen);
8315	REQUIRE((alt == NULL) ^ (name == NULL));
8316
8317	a = isc_mem_get(resolver->mctx, sizeof(*a));
8318	if (a == NULL)
8319		return (ISC_R_NOMEMORY);
8320	if (alt != NULL) {
8321		a->isaddress = ISC_TRUE;
8322		a->_u.addr = *alt;
8323	} else {
8324		a->isaddress = ISC_FALSE;
8325		a->_u._n.port = port;
8326		dns_name_init(&a->_u._n.name, NULL);
8327		result = dns_name_dup(name, resolver->mctx, &a->_u._n.name);
8328		if (result != ISC_R_SUCCESS) {
8329			isc_mem_put(resolver->mctx, a, sizeof(*a));
8330			return (result);
8331		}
8332	}
8333	ISC_LINK_INIT(a, link);
8334	ISC_LIST_APPEND(resolver->alternates, a, link);
8335
8336	return (ISC_R_SUCCESS);
8337}
8338
8339void
8340dns_resolver_setudpsize(dns_resolver_t *resolver, isc_uint16_t udpsize) {
8341	REQUIRE(VALID_RESOLVER(resolver));
8342	resolver->udpsize = udpsize;
8343}
8344
8345isc_uint16_t
8346dns_resolver_getudpsize(dns_resolver_t *resolver) {
8347	REQUIRE(VALID_RESOLVER(resolver));
8348	return (resolver->udpsize);
8349}
8350
8351void
8352dns_resolver_flushbadcache(dns_resolver_t *resolver, dns_name_t *name) {
8353	unsigned int i;
8354	dns_badcache_t *bad, *prev, *next;
8355
8356	REQUIRE(VALID_RESOLVER(resolver));
8357
8358	LOCK(&resolver->lock);
8359	if (resolver->badcache == NULL)
8360		goto unlock;
8361
8362	if (name != NULL) {
8363		isc_time_t now;
8364		isc_result_t result;
8365		result = isc_time_now(&now);
8366		if (result != ISC_R_SUCCESS)
8367			isc_time_settoepoch(&now);
8368		i = dns_name_hash(name, ISC_FALSE) % resolver->badhash;
8369		prev = NULL;
8370		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8371			int n;
8372			next = bad->next;
8373			n = isc_time_compare(&bad->expire, &now);
8374			if (n < 0 || dns_name_equal(name, &bad->name)) {
8375				if (prev == NULL)
8376					resolver->badcache[i] = bad->next;
8377				else
8378					prev->next = bad->next;
8379				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8380					    bad->name.length);
8381				resolver->badcount--;
8382			} else
8383				prev = bad;
8384		}
8385	} else
8386		destroy_badcache(resolver);
8387
8388 unlock:
8389	UNLOCK(&resolver->lock);
8390
8391}
8392
8393static void
8394resizehash(dns_resolver_t *resolver, isc_time_t *now, isc_boolean_t grow) {
8395	unsigned int newsize;
8396	dns_badcache_t **new, *bad, *next;
8397	unsigned int i;
8398
8399	if (grow)
8400		newsize = resolver->badhash * 2 + 1;
8401	else
8402		newsize = (resolver->badhash - 1) / 2;
8403
8404	new = isc_mem_get(resolver->mctx,
8405			  sizeof(*resolver->badcache) * newsize);
8406	if (new == NULL)
8407		return;
8408	memset(new, 0, sizeof(*resolver->badcache) * newsize);
8409	for (i = 0; i < resolver->badhash; i++) {
8410		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8411			next = bad->next;
8412			if (isc_time_compare(&bad->expire, now) < 0) {
8413				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8414					    bad->name.length);
8415				resolver->badcount--;
8416			} else {
8417				bad->next = new[bad->hashval % newsize];
8418				new[bad->hashval % newsize] = bad;
8419			}
8420		}
8421	}
8422	isc_mem_put(resolver->mctx, resolver->badcache,
8423		    sizeof(*resolver->badcache) * resolver->badhash);
8424	resolver->badhash = newsize;
8425	resolver->badcache = new;
8426}
8427
8428void
8429dns_resolver_addbadcache(dns_resolver_t *resolver, dns_name_t *name,
8430			 dns_rdatatype_t type, isc_time_t *expire)
8431{
8432	isc_time_t now;
8433	isc_result_t result = ISC_R_SUCCESS;
8434	unsigned int i, hashval;
8435	dns_badcache_t *bad, *prev, *next;
8436
8437	REQUIRE(VALID_RESOLVER(resolver));
8438
8439	LOCK(&resolver->lock);
8440	if (resolver->badcache == NULL) {
8441		resolver->badcache = isc_mem_get(resolver->mctx,
8442						 sizeof(*resolver->badcache) *
8443						 DNS_BADCACHE_SIZE);
8444		if (resolver->badcache == NULL)
8445			goto cleanup;
8446		resolver->badhash = DNS_BADCACHE_SIZE;
8447		memset(resolver->badcache, 0, sizeof(*resolver->badcache) *
8448		       resolver->badhash);
8449	}
8450
8451	result = isc_time_now(&now);
8452	if (result != ISC_R_SUCCESS)
8453		isc_time_settoepoch(&now);
8454	hashval = dns_name_hash(name, ISC_FALSE);
8455	i = hashval % resolver->badhash;
8456	prev = NULL;
8457	for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8458		next = bad->next;
8459		if (bad->type == type && dns_name_equal(name, &bad->name))
8460			break;
8461		if (isc_time_compare(&bad->expire, &now) < 0) {
8462			if (prev == NULL)
8463				resolver->badcache[i] = bad->next;
8464			else
8465				prev->next = bad->next;
8466			isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8467				    bad->name.length);
8468			resolver->badcount--;
8469		} else
8470			prev = bad;
8471	}
8472	if (bad == NULL) {
8473		isc_buffer_t buffer;
8474		bad = isc_mem_get(resolver->mctx, sizeof(*bad) + name->length);
8475		if (bad == NULL)
8476			goto cleanup;
8477		bad->type = type;
8478		bad->hashval = hashval;
8479		bad->expire = *expire;
8480		isc_buffer_init(&buffer, bad + 1, name->length);
8481		dns_name_init(&bad->name, NULL);
8482		dns_name_copy(name, &bad->name, &buffer);
8483		bad->next = resolver->badcache[i];
8484		resolver->badcache[i] = bad;
8485		resolver->badcount++;
8486		if (resolver->badcount > resolver->badhash * 8)
8487			resizehash(resolver, &now, ISC_TRUE);
8488		if (resolver->badcount < resolver->badhash * 2 &&
8489		    resolver->badhash > DNS_BADCACHE_SIZE)
8490			resizehash(resolver, &now, ISC_FALSE);
8491	} else
8492		bad->expire = *expire;
8493 cleanup:
8494	UNLOCK(&resolver->lock);
8495}
8496
8497isc_boolean_t
8498dns_resolver_getbadcache(dns_resolver_t *resolver, dns_name_t *name,
8499			 dns_rdatatype_t type, isc_time_t *now)
8500{
8501	dns_badcache_t *bad, *prev, *next;
8502	isc_boolean_t answer = ISC_FALSE;
8503	unsigned int i;
8504
8505	REQUIRE(VALID_RESOLVER(resolver));
8506
8507	LOCK(&resolver->lock);
8508	if (resolver->badcache == NULL)
8509		goto unlock;
8510
8511	i = dns_name_hash(name, ISC_FALSE) % resolver->badhash;
8512	prev = NULL;
8513	for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8514		next = bad->next;
8515		/*
8516		 * Search the hash list. Clean out expired records as we go.
8517		 */
8518		if (isc_time_compare(&bad->expire, now) < 0) {
8519			if (prev != NULL)
8520				prev->next = bad->next;
8521			else
8522				resolver->badcache[i] = bad->next;
8523			isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8524				    bad->name.length);
8525			resolver->badcount--;
8526			continue;
8527		}
8528		if (bad->type == type && dns_name_equal(name, &bad->name)) {
8529			answer = ISC_TRUE;
8530			break;
8531		}
8532		prev = bad;
8533	}
8534
8535	/*
8536	 * Slow sweep to clean out stale records.
8537	 */
8538	i = resolver->badsweep++ % resolver->badhash;
8539	bad = resolver->badcache[i];
8540	if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) {
8541		resolver->badcache[i] = bad->next;
8542		isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8543			    bad->name.length);
8544		resolver->badcount--;
8545	}
8546
8547 unlock:
8548	UNLOCK(&resolver->lock);
8549	return (answer);
8550}
8551
8552void
8553dns_resolver_printbadcache(dns_resolver_t *resolver, FILE *fp) {
8554	char namebuf[DNS_NAME_FORMATSIZE];
8555	char typebuf[DNS_RDATATYPE_FORMATSIZE];
8556	dns_badcache_t *bad, *next, *prev;
8557	isc_time_t now;
8558	unsigned int i;
8559	isc_uint64_t t;
8560
8561	LOCK(&resolver->lock);
8562	fprintf(fp, ";\n; Bad cache\n;\n");
8563
8564	if (resolver->badcache == NULL)
8565		goto unlock;
8566
8567	TIME_NOW(&now);
8568	for (i = 0; i < resolver->badhash; i++) {
8569		prev = NULL;
8570		for (bad = resolver->badcache[i]; bad != NULL; bad = next) {
8571			next = bad->next;
8572			if (isc_time_compare(&bad->expire, &now) < 0) {
8573				if (prev != NULL)
8574					prev->next = bad->next;
8575				else
8576					resolver->badcache[i] = bad->next;
8577				isc_mem_put(resolver->mctx, bad, sizeof(*bad) +
8578					    bad->name.length);
8579				resolver->badcount--;
8580				continue;
8581			}
8582			prev = bad;
8583			dns_name_format(&bad->name, namebuf, sizeof(namebuf));
8584			dns_rdatatype_format(bad->type, typebuf,
8585					     sizeof(typebuf));
8586			t = isc_time_microdiff(&bad->expire, &now);
8587			t /= 1000;
8588			fprintf(fp, "; %s/%s [ttl "
8589				"%" ISC_PLATFORM_QUADFORMAT "u]\n",
8590				namebuf, typebuf, t);
8591		}
8592	}
8593
8594 unlock:
8595	UNLOCK(&resolver->lock);
8596}
8597
8598static void
8599free_algorithm(void *node, void *arg) {
8600	unsigned char *algorithms = node;
8601	isc_mem_t *mctx = arg;
8602
8603	isc_mem_put(mctx, algorithms, *algorithms);
8604}
8605
8606void
8607dns_resolver_reset_algorithms(dns_resolver_t *resolver) {
8608
8609	REQUIRE(VALID_RESOLVER(resolver));
8610
8611#if USE_ALGLOCK
8612	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
8613#endif
8614	if (resolver->algorithms != NULL)
8615		dns_rbt_destroy(&resolver->algorithms);
8616#if USE_ALGLOCK
8617	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
8618#endif
8619}
8620
8621isc_result_t
8622dns_resolver_disable_algorithm(dns_resolver_t *resolver, dns_name_t *name,
8623			       unsigned int alg)
8624{
8625	unsigned int len, mask;
8626	unsigned char *new;
8627	unsigned char *algorithms;
8628	isc_result_t result;
8629	dns_rbtnode_t *node = NULL;
8630
8631	REQUIRE(VALID_RESOLVER(resolver));
8632	if (alg > 255)
8633		return (ISC_R_RANGE);
8634
8635#if USE_ALGLOCK
8636	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
8637#endif
8638	if (resolver->algorithms == NULL) {
8639		result = dns_rbt_create(resolver->mctx, free_algorithm,
8640					resolver->mctx, &resolver->algorithms);
8641		if (result != ISC_R_SUCCESS)
8642			goto cleanup;
8643	}
8644
8645	len = alg/8 + 2;
8646	mask = 1 << (alg%8);
8647
8648	result = dns_rbt_addnode(resolver->algorithms, name, &node);
8649
8650	if (result == ISC_R_SUCCESS || result == ISC_R_EXISTS) {
8651		algorithms = node->data;
8652		if (algorithms == NULL || len > *algorithms) {
8653			new = isc_mem_get(resolver->mctx, len);
8654			if (new == NULL) {
8655				result = ISC_R_NOMEMORY;
8656				goto cleanup;
8657			}
8658			memset(new, 0, len);
8659			if (algorithms != NULL)
8660				memcpy(new, algorithms, *algorithms);
8661			new[len-1] |= mask;
8662			*new = len;
8663			node->data = new;
8664			if (algorithms != NULL)
8665				isc_mem_put(resolver->mctx, algorithms,
8666					    *algorithms);
8667		} else
8668			algorithms[len-1] |= mask;
8669	}
8670	result = ISC_R_SUCCESS;
8671 cleanup:
8672#if USE_ALGLOCK
8673	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
8674#endif
8675	return (result);
8676}
8677
8678isc_boolean_t
8679dns_resolver_algorithm_supported(dns_resolver_t *resolver, dns_name_t *name,
8680				 unsigned int alg)
8681{
8682	unsigned int len, mask;
8683	unsigned char *algorithms;
8684	void *data = NULL;
8685	isc_result_t result;
8686	isc_boolean_t found = ISC_FALSE;
8687
8688	REQUIRE(VALID_RESOLVER(resolver));
8689
8690#if USE_ALGLOCK
8691	RWLOCK(&resolver->alglock, isc_rwlocktype_read);
8692#endif
8693	if (resolver->algorithms == NULL)
8694		goto unlock;
8695	result = dns_rbt_findname(resolver->algorithms, name, 0, NULL, &data);
8696	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8697		len = alg/8 + 2;
8698		mask = 1 << (alg%8);
8699		algorithms = data;
8700		if (len <= *algorithms && (algorithms[len-1] & mask) != 0)
8701			found = ISC_TRUE;
8702	}
8703 unlock:
8704#if USE_ALGLOCK
8705	RWUNLOCK(&resolver->alglock, isc_rwlocktype_read);
8706#endif
8707	if (found)
8708		return (ISC_FALSE);
8709	return (dst_algorithm_supported(alg));
8710}
8711
8712isc_boolean_t
8713dns_resolver_digest_supported(dns_resolver_t *resolver, unsigned int digest) {
8714
8715	UNUSED(resolver);
8716	return (dns_ds_digest_supported(digest));
8717}
8718
8719void
8720dns_resolver_resetmustbesecure(dns_resolver_t *resolver) {
8721
8722	REQUIRE(VALID_RESOLVER(resolver));
8723
8724#if USE_MBSLOCK
8725	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
8726#endif
8727	if (resolver->mustbesecure != NULL)
8728		dns_rbt_destroy(&resolver->mustbesecure);
8729#if USE_MBSLOCK
8730	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
8731#endif
8732}
8733
8734static isc_boolean_t yes = ISC_TRUE, no = ISC_FALSE;
8735
8736isc_result_t
8737dns_resolver_setmustbesecure(dns_resolver_t *resolver, dns_name_t *name,
8738			     isc_boolean_t value)
8739{
8740	isc_result_t result;
8741
8742	REQUIRE(VALID_RESOLVER(resolver));
8743
8744#if USE_MBSLOCK
8745	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
8746#endif
8747	if (resolver->mustbesecure == NULL) {
8748		result = dns_rbt_create(resolver->mctx, NULL, NULL,
8749					&resolver->mustbesecure);
8750		if (result != ISC_R_SUCCESS)
8751			goto cleanup;
8752	}
8753	result = dns_rbt_addname(resolver->mustbesecure, name,
8754				 value ? &yes : &no);
8755 cleanup:
8756#if USE_MBSLOCK
8757	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
8758#endif
8759	return (result);
8760}
8761
8762isc_boolean_t
8763dns_resolver_getmustbesecure(dns_resolver_t *resolver, dns_name_t *name) {
8764	void *data = NULL;
8765	isc_boolean_t value = ISC_FALSE;
8766	isc_result_t result;
8767
8768	REQUIRE(VALID_RESOLVER(resolver));
8769
8770#if USE_MBSLOCK
8771	RWLOCK(&resolver->mbslock, isc_rwlocktype_read);
8772#endif
8773	if (resolver->mustbesecure == NULL)
8774		goto unlock;
8775	result = dns_rbt_findname(resolver->mustbesecure, name, 0, NULL, &data);
8776	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH)
8777		value = *(isc_boolean_t*)data;
8778 unlock:
8779#if USE_MBSLOCK
8780	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_read);
8781#endif
8782	return (value);
8783}
8784
8785void
8786dns_resolver_getclientsperquery(dns_resolver_t *resolver, isc_uint32_t *cur,
8787				isc_uint32_t *min, isc_uint32_t *max)
8788{
8789	REQUIRE(VALID_RESOLVER(resolver));
8790
8791	LOCK(&resolver->lock);
8792	if (cur != NULL)
8793		*cur = resolver->spillat;
8794	if (min != NULL)
8795		*min = resolver->spillatmin;
8796	if (max != NULL)
8797		*max = resolver->spillatmax;
8798	UNLOCK(&resolver->lock);
8799}
8800
8801void
8802dns_resolver_setclientsperquery(dns_resolver_t *resolver, isc_uint32_t min,
8803				isc_uint32_t max)
8804{
8805	REQUIRE(VALID_RESOLVER(resolver));
8806
8807	LOCK(&resolver->lock);
8808	resolver->spillatmin = resolver->spillat = min;
8809	resolver->spillatmax = max;
8810	UNLOCK(&resolver->lock);
8811}
8812
8813isc_boolean_t
8814dns_resolver_getzeronosoattl(dns_resolver_t *resolver) {
8815	REQUIRE(VALID_RESOLVER(resolver));
8816
8817	return (resolver->zero_no_soa_ttl);
8818}
8819
8820void
8821dns_resolver_setzeronosoattl(dns_resolver_t *resolver, isc_boolean_t state) {
8822	REQUIRE(VALID_RESOLVER(resolver));
8823
8824	resolver->zero_no_soa_ttl = state;
8825}
8826
8827unsigned int
8828dns_resolver_getoptions(dns_resolver_t *resolver) {
8829	REQUIRE(VALID_RESOLVER(resolver));
8830
8831	return (resolver->options);
8832}
8833
8834unsigned int
8835dns_resolver_gettimeout(dns_resolver_t *resolver) {
8836	REQUIRE(VALID_RESOLVER(resolver));
8837
8838	return (resolver->query_timeout);
8839}
8840
8841void
8842dns_resolver_settimeout(dns_resolver_t *resolver, unsigned int seconds) {
8843	REQUIRE(VALID_RESOLVER(resolver));
8844
8845	if (seconds == 0)
8846		seconds = DEFAULT_QUERY_TIMEOUT;
8847	if (seconds > MAXIMUM_QUERY_TIMEOUT)
8848		seconds = MAXIMUM_QUERY_TIMEOUT;
8849	if (seconds < MINIMUM_QUERY_TIMEOUT)
8850		seconds =  MINIMUM_QUERY_TIMEOUT;
8851
8852	resolver->query_timeout = seconds;
8853}
8854