1/*	$NetBSD: resolver.c,v 1.1 2024/02/18 20:57:33 christos Exp $	*/
2
3/*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16/*! \file */
17
18#include <ctype.h>
19#include <inttypes.h>
20#include <stdbool.h>
21
22#include <isc/atomic.h>
23#include <isc/counter.h>
24#include <isc/log.h>
25#include <isc/platform.h>
26#include <isc/print.h>
27#include <isc/random.h>
28#include <isc/refcount.h>
29#include <isc/siphash.h>
30#include <isc/socket.h>
31#include <isc/stats.h>
32#include <isc/string.h>
33#include <isc/task.h>
34#include <isc/timer.h>
35#include <isc/util.h>
36
37#include <dns/acl.h>
38#include <dns/adb.h>
39#include <dns/badcache.h>
40#include <dns/cache.h>
41#include <dns/db.h>
42#include <dns/dispatch.h>
43#include <dns/dnstap.h>
44#include <dns/ds.h>
45#include <dns/edns.h>
46#include <dns/events.h>
47#include <dns/forward.h>
48#include <dns/keytable.h>
49#include <dns/log.h>
50#include <dns/message.h>
51#include <dns/ncache.h>
52#include <dns/nsec.h>
53#include <dns/nsec3.h>
54#include <dns/opcode.h>
55#include <dns/peer.h>
56#include <dns/rbt.h>
57#include <dns/rcode.h>
58#include <dns/rdata.h>
59#include <dns/rdataclass.h>
60#include <dns/rdatalist.h>
61#include <dns/rdataset.h>
62#include <dns/rdatastruct.h>
63#include <dns/rdatatype.h>
64#include <dns/resolver.h>
65#include <dns/result.h>
66#include <dns/rootns.h>
67#include <dns/stats.h>
68#include <dns/tsig.h>
69#include <dns/validator.h>
70#include <dns/zone.h>
71
72#ifdef WANT_QUERYTRACE
73#define RTRACE(m)                                                             \
74	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                     \
75		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), "res %p: %s", \
76		      res, (m))
77#define RRTRACE(r, m)                                                         \
78	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                     \
79		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), "res %p: %s", \
80		      (r), (m))
81#define FCTXTRACE(m)                                            \
82	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,       \
83		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), \
84		      "fctx %p(%s): %s", fctx, fctx->info, (m))
85#define FCTXTRACE2(m1, m2)                                      \
86	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,       \
87		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), \
88		      "fctx %p(%s): %s %s", fctx, fctx->info, (m1), (m2))
89#define FCTXTRACE3(m, res)                                              \
90	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,               \
91		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),         \
92		      "fctx %p(%s): [result: %s] %s", fctx, fctx->info, \
93		      isc_result_totext(res), (m))
94#define FCTXTRACE4(m1, m2, res)                                            \
95	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                  \
96		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),            \
97		      "fctx %p(%s): [result: %s] %s %s", fctx, fctx->info, \
98		      isc_result_totext(res), (m1), (m2))
99#define FCTXTRACE5(m1, m2, v)                                               \
100	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                   \
101		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),             \
102		      "fctx %p(%s): %s %s%u", fctx, fctx->info, (m1), (m2), \
103		      (v))
104#define FTRACE(m)                                                          \
105	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                  \
106		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),            \
107		      "fetch %p (fctx %p(%s)): %s", fetch, fetch->private, \
108		      fetch->private->info, (m))
109#define QTRACE(m)                                                          \
110	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                  \
111		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),            \
112		      "resquery %p (fctx %p(%s)): %s", query, query->fctx, \
113		      query->fctx->info, (m))
114#else /* ifdef WANT_QUERYTRACE */
115#define RTRACE(m)          \
116	do {               \
117		UNUSED(m); \
118	} while (0)
119#define RRTRACE(r, m)      \
120	do {               \
121		UNUSED(r); \
122		UNUSED(m); \
123	} while (0)
124#define FCTXTRACE(m)       \
125	do {               \
126		UNUSED(m); \
127	} while (0)
128#define FCTXTRACE2(m1, m2)  \
129	do {                \
130		UNUSED(m1); \
131		UNUSED(m2); \
132	} while (0)
133#define FCTXTRACE3(m1, res)  \
134	do {                 \
135		UNUSED(m1);  \
136		UNUSED(res); \
137	} while (0)
138#define FCTXTRACE4(m1, m2, res) \
139	do {                    \
140		UNUSED(m1);     \
141		UNUSED(m2);     \
142		UNUSED(res);    \
143	} while (0)
144#define FCTXTRACE5(m1, m2, v) \
145	do {                  \
146		UNUSED(m1);   \
147		UNUSED(m2);   \
148		UNUSED(v);    \
149	} while (0)
150#define FTRACE(m)          \
151	do {               \
152		UNUSED(m); \
153	} while (0)
154#define QTRACE(m)          \
155	do {               \
156		UNUSED(m); \
157	} while (0)
158#endif /* WANT_QUERYTRACE */
159
160#define US_PER_SEC  1000000U
161#define US_PER_MSEC 1000U
162/*
163 * The maximum time we will wait for a single query.
164 */
165#define MAX_SINGLE_QUERY_TIMEOUT    9000U
166#define MAX_SINGLE_QUERY_TIMEOUT_US (MAX_SINGLE_QUERY_TIMEOUT * US_PER_MSEC)
167
168/*
169 * We need to allow a individual query time to complete / timeout.
170 */
171#define MINIMUM_QUERY_TIMEOUT (MAX_SINGLE_QUERY_TIMEOUT + 1000U)
172
173/* The default time in seconds for the whole query to live. */
174#ifndef DEFAULT_QUERY_TIMEOUT
175#define DEFAULT_QUERY_TIMEOUT MINIMUM_QUERY_TIMEOUT
176#endif /* ifndef DEFAULT_QUERY_TIMEOUT */
177
178/* The maximum time in seconds for the whole query to live. */
179#ifndef MAXIMUM_QUERY_TIMEOUT
180#define MAXIMUM_QUERY_TIMEOUT 30000
181#endif /* ifndef MAXIMUM_QUERY_TIMEOUT */
182
183/* The default maximum number of recursions to follow before giving up. */
184#ifndef DEFAULT_RECURSION_DEPTH
185#define DEFAULT_RECURSION_DEPTH 7
186#endif /* ifndef DEFAULT_RECURSION_DEPTH */
187
188/* The default maximum number of iterative queries to allow before giving up. */
189#ifndef DEFAULT_MAX_QUERIES
190#define DEFAULT_MAX_QUERIES 100
191#endif /* ifndef DEFAULT_MAX_QUERIES */
192
193/*
194 * After NS_FAIL_LIMIT attempts to fetch a name server address,
195 * if the number of addresses in the NS RRset exceeds NS_RR_LIMIT,
196 * stop trying to fetch, in order to avoid wasting resources.
197 */
198#define NS_FAIL_LIMIT 4
199#define NS_RR_LIMIT   5
200/*
201 * IP address lookups are performed for at most NS_PROCESSING_LIMIT NS RRs in
202 * any NS RRset encountered, to avoid excessive resource use while processing
203 * large delegations.
204 */
205#define NS_PROCESSING_LIMIT 20
206
207/* Number of hash buckets for zone counters */
208#ifndef RES_DOMAIN_BUCKETS
209#define RES_DOMAIN_BUCKETS 523
210#endif /* ifndef RES_DOMAIN_BUCKETS */
211#define RES_NOBUCKET 0xffffffff
212
213/*%
214 * Maximum EDNS0 input packet size.
215 */
216#define RECV_BUFFER_SIZE 4096 /* XXXRTH  Constant. */
217
218/*%
219 * This defines the maximum number of timeouts we will permit before we
220 * disable EDNS0 on the query.
221 */
222#define MAX_EDNS0_TIMEOUTS 3
223
224#define DNS_RESOLVER_BADCACHESIZE 1021
225#define DNS_RESOLVER_BADCACHETTL(fctx) \
226	(((fctx)->res->lame_ttl > 30) ? (fctx)->res->lame_ttl : 30)
227
228typedef struct fetchctx fetchctx_t;
229
230typedef struct query {
231	/* Locked by task event serialization. */
232	unsigned int magic;
233	fetchctx_t *fctx;
234	dns_message_t *rmessage;
235	isc_mem_t *mctx;
236	dns_dispatchmgr_t *dispatchmgr;
237	dns_dispatch_t *dispatch;
238	bool exclusivesocket;
239	dns_adbaddrinfo_t *addrinfo;
240	isc_socket_t *tcpsocket;
241	isc_time_t start;
242	dns_messageid_t id;
243	dns_dispentry_t *dispentry;
244	ISC_LINK(struct query) link;
245	isc_buffer_t buffer;
246	isc_buffer_t *tsig;
247	dns_tsigkey_t *tsigkey;
248	isc_socketevent_t sendevent;
249	isc_dscp_t dscp;
250	int ednsversion;
251	unsigned int options;
252	isc_sockeventattr_t attributes;
253	unsigned int sends;
254	unsigned int connects;
255	unsigned int udpsize;
256	unsigned char data[512];
257} resquery_t;
258
259struct tried {
260	isc_sockaddr_t addr;
261	unsigned int count;
262	ISC_LINK(struct tried) link;
263};
264
265#define QUERY_MAGIC	   ISC_MAGIC('Q', '!', '!', '!')
266#define VALID_QUERY(query) ISC_MAGIC_VALID(query, QUERY_MAGIC)
267
268#define RESQUERY_ATTR_CANCELED 0x02
269
270#define RESQUERY_CONNECTING(q) ((q)->connects > 0)
271#define RESQUERY_CANCELED(q)   (((q)->attributes & RESQUERY_ATTR_CANCELED) != 0)
272#define RESQUERY_SENDING(q)    ((q)->sends > 0)
273
274typedef enum {
275	fetchstate_init = 0, /*%< Start event has not run yet. */
276	fetchstate_active,
277	fetchstate_done /*%< FETCHDONE events posted. */
278} fetchstate;
279
280typedef enum {
281	badns_unreachable = 0,
282	badns_response,
283	badns_validation,
284	badns_forwarder,
285} badnstype_t;
286
287struct fetchctx {
288	/*% Not locked. */
289	unsigned int magic;
290	dns_resolver_t *res;
291	dns_name_t name;
292	dns_rdatatype_t type;
293	unsigned int options;
294	unsigned int bucketnum;
295	unsigned int dbucketnum;
296	char *info;
297	isc_mem_t *mctx;
298	isc_stdtime_t now;
299
300	/* Atomic */
301	isc_refcount_t references;
302
303	/*% Locked by appropriate bucket lock. */
304	fetchstate state;
305	bool want_shutdown;
306	bool cloned;
307	bool spilled;
308	isc_event_t control_event;
309	ISC_LINK(struct fetchctx) link;
310	ISC_LIST(dns_fetchevent_t) events;
311
312	/*% Locked by task event serialization. */
313	dns_name_t domain;
314	dns_rdataset_t nameservers;
315	atomic_uint_fast32_t attributes;
316	isc_timer_t *timer;
317	isc_timer_t *timer_try_stale;
318	isc_time_t expires;
319	isc_time_t expires_try_stale;
320	isc_interval_t interval;
321	dns_message_t *qmessage;
322	ISC_LIST(resquery_t) queries;
323	dns_adbfindlist_t finds;
324	dns_adbfind_t *find;
325	/*
326	 * altfinds are names and/or addresses of dual stack servers that
327	 * should be used when iterative resolution to a server is not
328	 * possible because the address family of that server is not usable.
329	 */
330	dns_adbfindlist_t altfinds;
331	dns_adbfind_t *altfind;
332	dns_adbaddrinfolist_t forwaddrs;
333	dns_adbaddrinfolist_t altaddrs;
334	dns_forwarderlist_t forwarders;
335	dns_fwdpolicy_t fwdpolicy;
336	isc_sockaddrlist_t bad;
337	ISC_LIST(struct tried) edns;
338	ISC_LIST(struct tried) edns512;
339	isc_sockaddrlist_t bad_edns;
340	dns_validator_t *validator;
341	ISC_LIST(dns_validator_t) validators;
342	dns_db_t *cache;
343	dns_adb_t *adb;
344	bool ns_ttl_ok;
345	uint32_t ns_ttl;
346	isc_counter_t *qc;
347	bool minimized;
348	unsigned int qmin_labels;
349	isc_result_t qmin_warning;
350	bool ip6arpaskip;
351	bool forwarding;
352	dns_name_t qminname;
353	dns_rdatatype_t qmintype;
354	dns_fetch_t *qminfetch;
355	dns_rdataset_t qminrrset;
356	dns_name_t qmindcname;
357	dns_fixedname_t fwdfname;
358	dns_name_t *fwdname;
359
360	/*%
361	 * The number of events we're waiting for.
362	 */
363	unsigned int pending; /* Bucket lock. */
364
365	/*%
366	 * The number of times we've "restarted" the current
367	 * nameserver set.  This acts as a failsafe to prevent
368	 * us from pounding constantly on a particular set of
369	 * servers that, for whatever reason, are not giving
370	 * us useful responses, but are responding in such a
371	 * way that they are not marked "bad".
372	 */
373	unsigned int restarts;
374
375	/*%
376	 * The number of timeouts that have occurred since we
377	 * last successfully received a response packet.  This
378	 * is used for EDNS0 black hole detection.
379	 */
380	unsigned int timeouts;
381
382	/*%
383	 * Look aside state for DS lookups.
384	 */
385	dns_name_t nsname;
386	dns_fetch_t *nsfetch;
387	dns_rdataset_t nsrrset;
388
389	/*%
390	 * Number of queries that reference this context.
391	 */
392	unsigned int nqueries; /* Bucket lock. */
393
394	/*%
395	 * The reason to print when logging a successful
396	 * response to a query.
397	 */
398	const char *reason;
399
400	/*%
401	 * Random numbers to use for mixing up server addresses.
402	 */
403	uint32_t rand_buf;
404	uint32_t rand_bits;
405
406	/*%
407	 * Fetch-local statistics for detailed logging.
408	 */
409	isc_result_t result;  /*%< fetch result  */
410	isc_result_t vresult; /*%< validation result  */
411	int exitline;
412	isc_time_t start;
413	uint64_t duration;
414	bool logged;
415	unsigned int querysent;
416	unsigned int referrals;
417	unsigned int lamecount;
418	unsigned int quotacount;
419	unsigned int neterr;
420	unsigned int badresp;
421	unsigned int adberr;
422	unsigned int findfail;
423	unsigned int valfail;
424	bool timeout;
425	dns_adbaddrinfo_t *addrinfo;
426	dns_messageid_t id;
427	unsigned int depth;
428	char clientstr[ISC_SOCKADDR_FORMATSIZE];
429};
430
431#define FCTX_MAGIC	 ISC_MAGIC('F', '!', '!', '!')
432#define VALID_FCTX(fctx) ISC_MAGIC_VALID(fctx, FCTX_MAGIC)
433
434#define FCTX_ATTR_HAVEANSWER   0x0001
435#define FCTX_ATTR_GLUING       0x0002
436#define FCTX_ATTR_ADDRWAIT     0x0004
437#define FCTX_ATTR_SHUTTINGDOWN 0x0008 /* Bucket lock */
438#define FCTX_ATTR_WANTCACHE    0x0010
439#define FCTX_ATTR_WANTNCACHE   0x0020
440#define FCTX_ATTR_NEEDEDNS0    0x0040
441#define FCTX_ATTR_TRIEDFIND    0x0080
442#define FCTX_ATTR_TRIEDALT     0x0100
443
444#define HAVE_ANSWER(f) \
445	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_HAVEANSWER) != 0)
446#define GLUING(f) \
447	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_GLUING) != 0)
448#define ADDRWAIT(f) \
449	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_ADDRWAIT) != 0)
450#define SHUTTINGDOWN(f) \
451	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_SHUTTINGDOWN) != 0)
452#define WANTCACHE(f) \
453	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_WANTCACHE) != 0)
454#define WANTNCACHE(f) \
455	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_WANTNCACHE) != 0)
456#define NEEDEDNS0(f) \
457	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_NEEDEDNS0) != 0)
458#define TRIEDFIND(f) \
459	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_TRIEDFIND) != 0)
460#define TRIEDALT(f) \
461	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_TRIEDALT) != 0)
462
463#define FCTX_ATTR_SET(f, a) atomic_fetch_or_release(&(f)->attributes, (a))
464#define FCTX_ATTR_CLR(f, a) atomic_fetch_and_release(&(f)->attributes, ~(a))
465
466typedef struct {
467	dns_adbaddrinfo_t *addrinfo;
468	fetchctx_t *fctx;
469	dns_message_t *message;
470} dns_valarg_t;
471
472struct dns_fetch {
473	unsigned int magic;
474	isc_mem_t *mctx;
475	fetchctx_t *private;
476};
477
478#define DNS_FETCH_MAGIC	       ISC_MAGIC('F', 't', 'c', 'h')
479#define DNS_FETCH_VALID(fetch) ISC_MAGIC_VALID(fetch, DNS_FETCH_MAGIC)
480
481typedef struct fctxbucket {
482	isc_task_t *task;
483	isc_mutex_t lock;
484	ISC_LIST(fetchctx_t) fctxs;
485	atomic_bool exiting;
486	isc_mem_t *mctx;
487} fctxbucket_t;
488
489typedef struct fctxcount fctxcount_t;
490struct fctxcount {
491	dns_fixedname_t fdname;
492	dns_name_t *domain;
493	uint32_t count;
494	uint32_t allowed;
495	uint32_t dropped;
496	isc_stdtime_t logged;
497	ISC_LINK(fctxcount_t) link;
498};
499
500typedef struct zonebucket {
501	isc_mutex_t lock;
502	isc_mem_t *mctx;
503	ISC_LIST(fctxcount_t) list;
504} zonebucket_t;
505
506typedef struct alternate {
507	bool isaddress;
508	union {
509		isc_sockaddr_t addr;
510		struct {
511			dns_name_t name;
512			in_port_t port;
513		} _n;
514	} _u;
515	ISC_LINK(struct alternate) link;
516} alternate_t;
517
518struct dns_resolver {
519	/* Unlocked. */
520	unsigned int magic;
521	isc_mem_t *mctx;
522	isc_mutex_t lock;
523	isc_mutex_t primelock;
524	dns_rdataclass_t rdclass;
525	isc_socketmgr_t *socketmgr;
526	isc_timermgr_t *timermgr;
527	isc_taskmgr_t *taskmgr;
528	dns_view_t *view;
529	bool frozen;
530	unsigned int options;
531	dns_dispatchmgr_t *dispatchmgr;
532	dns_dispatchset_t *dispatches4;
533	bool exclusivev4;
534	dns_dispatchset_t *dispatches6;
535	isc_dscp_t querydscp4;
536	isc_dscp_t querydscp6;
537	bool exclusivev6;
538	unsigned int nbuckets;
539	fctxbucket_t *buckets;
540	zonebucket_t *dbuckets;
541	uint32_t lame_ttl;
542	ISC_LIST(alternate_t) alternates;
543	uint16_t udpsize;
544#if USE_ALGLOCK
545	isc_rwlock_t alglock;
546#endif /* if USE_ALGLOCK */
547	dns_rbt_t *algorithms;
548	dns_rbt_t *digests;
549#if USE_MBSLOCK
550	isc_rwlock_t mbslock;
551#endif /* if USE_MBSLOCK */
552	dns_rbt_t *mustbesecure;
553	unsigned int spillatmax;
554	unsigned int spillatmin;
555	isc_timer_t *spillattimer;
556	bool zero_no_soa_ttl;
557	unsigned int query_timeout;
558	unsigned int maxdepth;
559	unsigned int maxqueries;
560	isc_result_t quotaresp[2];
561
562	/* Additions for serve-stale feature. */
563	unsigned int retryinterval; /* in milliseconds */
564	unsigned int nonbackofftries;
565
566	/* Atomic */
567	isc_refcount_t references;
568	atomic_uint_fast32_t zspill; /* fetches-per-zone */
569	atomic_bool exiting;
570	atomic_bool priming;
571
572	/* Locked by lock. */
573	isc_eventlist_t whenshutdown;
574	unsigned int activebuckets;
575	unsigned int spillat; /* clients-per-query */
576
577	dns_badcache_t *badcache; /* Bad cache. */
578
579	/* Locked by primelock. */
580	dns_fetch_t *primefetch;
581
582	/* Atomic. */
583	atomic_uint_fast32_t nfctx;
584};
585
586#define RES_MAGIC	    ISC_MAGIC('R', 'e', 's', '!')
587#define VALID_RESOLVER(res) ISC_MAGIC_VALID(res, RES_MAGIC)
588
589/*%
590 * Private addrinfo flags.  These must not conflict with DNS_FETCHOPT_NOEDNS0
591 * (0x008) which we also use as an addrinfo flag.
592 */
593#define FCTX_ADDRINFO_MARK	0x00001
594#define FCTX_ADDRINFO_FORWARDER 0x01000
595#define FCTX_ADDRINFO_EDNSOK	0x04000
596#define FCTX_ADDRINFO_NOCOOKIE	0x08000
597#define FCTX_ADDRINFO_BADCOOKIE 0x10000
598#define FCTX_ADDRINFO_DUALSTACK 0x20000
599
600#define UNMARKED(a)    (((a)->flags & FCTX_ADDRINFO_MARK) == 0)
601#define ISFORWARDER(a) (((a)->flags & FCTX_ADDRINFO_FORWARDER) != 0)
602#define NOCOOKIE(a)    (((a)->flags & FCTX_ADDRINFO_NOCOOKIE) != 0)
603#define EDNSOK(a)      (((a)->flags & FCTX_ADDRINFO_EDNSOK) != 0)
604#define BADCOOKIE(a)   (((a)->flags & FCTX_ADDRINFO_BADCOOKIE) != 0)
605#define ISDUALSTACK(a) (((a)->flags & FCTX_ADDRINFO_DUALSTACK) != 0)
606
607#define NXDOMAIN(r) (((r)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
608#define NEGATIVE(r) (((r)->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
609
610#define NXDOMAIN_RESULT(r) \
611	((r) == DNS_R_NXDOMAIN || (r) == DNS_R_NCACHENXDOMAIN)
612#define NXRRSET_RESULT(r)                                      \
613	((r) == DNS_R_NCACHENXRRSET || (r) == DNS_R_NXRRSET || \
614	 (r) == DNS_R_HINTNXRRSET)
615
616#ifdef ENABLE_AFL
617bool dns_fuzzing_resolver = false;
618void
619dns_resolver_setfuzzing() {
620	dns_fuzzing_resolver = true;
621}
622#endif /* ifdef ENABLE_AFL */
623
624static unsigned char ip6_arpa_data[] = "\003IP6\004ARPA";
625static unsigned char ip6_arpa_offsets[] = { 0, 4, 9 };
626static const dns_name_t ip6_arpa = DNS_NAME_INITABSOLUTE(ip6_arpa_data,
627							 ip6_arpa_offsets);
628
629static unsigned char underscore_data[] = "\001_";
630static unsigned char underscore_offsets[] = { 0 };
631static const dns_name_t underscore_name =
632	DNS_NAME_INITNONABSOLUTE(underscore_data, underscore_offsets);
633
634static void
635destroy(dns_resolver_t *res);
636static void
637empty_bucket(dns_resolver_t *res);
638static isc_result_t
639resquery_send(resquery_t *query);
640static void
641resquery_response(isc_task_t *task, isc_event_t *event);
642static void
643resquery_connected(isc_task_t *task, isc_event_t *event);
644static void
645fctx_try(fetchctx_t *fctx, bool retrying, bool badcache);
646static isc_result_t
647fctx_minimize_qname(fetchctx_t *fctx);
648static void
649fctx_destroy(fetchctx_t *fctx);
650static bool
651fctx_unlink(fetchctx_t *fctx);
652static isc_result_t
653ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node,
654		  dns_rdatatype_t covers, isc_stdtime_t now, dns_ttl_t minttl,
655		  dns_ttl_t maxttl, bool optout, bool secure,
656		  dns_rdataset_t *ardataset, isc_result_t *eresultp);
657static void
658validated(isc_task_t *task, isc_event_t *event);
659static void
660add_bad(fetchctx_t *fctx, dns_message_t *rmessage, dns_adbaddrinfo_t *addrinfo,
661	isc_result_t reason, badnstype_t badtype);
662static isc_result_t
663findnoqname(fetchctx_t *fctx, dns_message_t *message, dns_name_t *name,
664	    dns_rdatatype_t type, dns_name_t **noqname);
665static void
666fctx_increference(fetchctx_t *fctx);
667static bool
668fctx_decreference(fetchctx_t *fctx);
669static void
670resume_qmin(isc_task_t *task, isc_event_t *event);
671
672/*%
673 * The structure and functions defined below implement the resolver
674 * query (resquery) response handling logic.
675 *
676 * When a resolver query is sent and a response is received, the
677 * resquery_response() event handler is run, which calls the rctx_*()
678 * functions.  The respctx_t structure maintains state from function
679 * to function.
680 *
681 * The call flow is described below:
682 *
683 * 1. resquery_response():
684 *    - Initialize a respctx_t structure (rctx_respinit()).
685 *    - Check for dispatcher failure (rctx_dispfail()).
686 *    - Parse the response (rctx_parse()).
687 *    - Log the response (rctx_logpacket()).
688 *    - Check the parsed response for an OPT record and handle
689 *      EDNS (rctx_opt(), rctx_edns()).
690 *    - Check for a bad or lame server (rctx_badserver(), rctx_lameserver()).
691 *    - Handle delegation-only zones (rctx_delonly_zone()).
692 *    - If RCODE and ANCOUNT suggest this is a positive answer, and
693 *      if so, call rctx_answer(): go to step 2.
694 *    - If RCODE and NSCOUNT suggest this is a negative answer or a
695 *      referral, call rctx_answer_none(): go to step 4.
696 *    - Check the additional section for data that should be cached
697 *      (rctx_additional()).
698 *    - Clean up and finish by calling rctx_done(): go to step 5.
699 *
700 * 2. rctx_answer():
701 *    - If the answer appears to be positive, call rctx_answer_positive():
702 *      go to step 3.
703 *    - If the response is a malformed delegation (with glue or NS records
704 *      in the answer section), call rctx_answer_none(): go to step 4.
705 *
706 * 3. rctx_answer_positive():
707 *    - Initialize the portions of respctx_t needed for processing an answer
708 *      (rctx_answer_init()).
709 *    - Scan the answer section to find records that are responsive to the
710 *      query (rctx_answer_scan()).
711 *    - For whichever type of response was found, call a separate routine
712 *      to handle it: matching QNAME/QTYPE (rctx_answer_match()),
713 *      CNAME (rctx_answer_cname()), covering DNAME (rctx_answer_dname()),
714 *      or any records returned in response to a query of type ANY
715 *      (rctx_answer_any()).
716 *    - Scan the authority section for NS or other records that may be
717 *      included with a positive answer (rctx_authority_scan()).
718 *
719 * 4. rctx_answer_none():
720 *    - Determine whether this is an NXDOMAIN, NXRRSET, or referral.
721 *    - If referral, set up the resolver to follow the delegation
722 *      (rctx_referral()).
723 *    - If NXDOMAIN/NXRRSET, scan the authority section for NS and SOA
724 *      records included with a negative response (rctx_authority_negative()),
725 *      then for DNSSEC proof of nonexistence (rctx_authority_dnssec()).
726 *
727 * 5. rctx_done():
728 *    - Set up chasing of DS records if needed (rctx_chaseds()).
729 *    - If the response wasn't intended for us, wait for another response
730 *      from the dispatcher (rctx_next()).
731 *    - If there is a problem with the responding server, set up another
732 *      query to a different server (rctx_nextserver()).
733 *    - If there is a problem that might be temporary or dependent on
734 *      EDNS options, set up another query to the same server with changed
735 *      options (rctx_resend()).
736 *    - Shut down the fetch context.
737 */
738
739typedef struct respctx {
740	isc_task_t *task;
741	dns_dispatchevent_t *devent;
742	resquery_t *query;
743	fetchctx_t *fctx;
744	isc_result_t result;
745	unsigned int retryopts; /* updated options to pass to
746				 * fctx_query() when resending */
747
748	dns_rdatatype_t type; /* type being sought (set to
749			       * ANY if qtype was SIG or RRSIG) */
750	bool aa;	      /* authoritative answer? */
751	dns_trust_t trust;    /* answer trust level */
752	bool chaining;	      /* CNAME/DNAME processing? */
753	bool next_server;     /* give up, try the next server
754			       * */
755
756	badnstype_t broken_type; /* type of name server problem
757				  * */
758	isc_result_t broken_server;
759
760	bool get_nameservers; /* get a new NS rrset at
761			       * zone cut? */
762	bool resend;	      /* resend this query? */
763	bool nextitem;	      /* invalid response; keep
764			       * listening for the correct one */
765	bool truncated;	      /* response was truncated */
766	bool no_response;     /* no response was received */
767	bool glue_in_answer;  /* glue may be in the answer
768			       * section */
769	bool ns_in_answer;    /* NS may be in the answer
770			       * section */
771	bool negative;	      /* is this a negative response? */
772
773	isc_stdtime_t now; /* time info */
774	isc_time_t tnow;
775	isc_time_t *finish;
776
777	unsigned int dname_labels;
778	unsigned int domain_labels; /* range of permissible number
779				     * of
780				     * labels in a DNAME */
781
782	dns_name_t *aname;	   /* answer name */
783	dns_rdataset_t *ardataset; /* answer rdataset */
784
785	dns_name_t *cname;	   /* CNAME name */
786	dns_rdataset_t *crdataset; /* CNAME rdataset */
787
788	dns_name_t *dname;	   /* DNAME name */
789	dns_rdataset_t *drdataset; /* DNAME rdataset */
790
791	dns_name_t *ns_name;	     /* NS name */
792	dns_rdataset_t *ns_rdataset; /* NS rdataset */
793
794	dns_name_t *soa_name; /* SOA name in a negative answer */
795	dns_name_t *ds_name;  /* DS name in a negative answer */
796
797	dns_name_t *found_name;	    /* invalid name in negative
798				     * response */
799	dns_rdatatype_t found_type; /* invalid type in negative
800				     * response */
801
802	dns_rdataset_t *opt; /* OPT rdataset */
803} respctx_t;
804
805static void
806rctx_respinit(isc_task_t *task, dns_dispatchevent_t *devent, resquery_t *query,
807	      fetchctx_t *fctx, respctx_t *rctx);
808
809static void
810rctx_answer_init(respctx_t *rctx);
811
812static void
813rctx_answer_scan(respctx_t *rctx);
814
815static void
816rctx_authority_positive(respctx_t *rctx);
817
818static isc_result_t
819rctx_answer_any(respctx_t *rctx);
820
821static isc_result_t
822rctx_answer_match(respctx_t *rctx);
823
824static isc_result_t
825rctx_answer_cname(respctx_t *rctx);
826
827static isc_result_t
828rctx_answer_dname(respctx_t *rctx);
829
830static isc_result_t
831rctx_answer_positive(respctx_t *rctx);
832
833static isc_result_t
834rctx_authority_negative(respctx_t *rctx);
835
836static isc_result_t
837rctx_authority_dnssec(respctx_t *rctx);
838
839static void
840rctx_additional(respctx_t *rctx);
841
842static isc_result_t
843rctx_referral(respctx_t *rctx);
844
845static isc_result_t
846rctx_answer_none(respctx_t *rctx);
847
848static void
849rctx_nextserver(respctx_t *rctx, dns_message_t *message,
850		dns_adbaddrinfo_t *addrinfo, isc_result_t result);
851
852static void
853rctx_resend(respctx_t *rctx, dns_adbaddrinfo_t *addrinfo);
854
855static void
856rctx_next(respctx_t *rctx);
857
858static void
859rctx_chaseds(respctx_t *rctx, dns_message_t *message,
860	     dns_adbaddrinfo_t *addrinfo, isc_result_t result);
861
862static void
863rctx_done(respctx_t *rctx, isc_result_t result);
864
865static void
866rctx_logpacket(respctx_t *rctx);
867
868static void
869rctx_opt(respctx_t *rctx);
870
871static void
872rctx_edns(respctx_t *rctx);
873
874static isc_result_t
875rctx_parse(respctx_t *rctx);
876
877static isc_result_t
878rctx_badserver(respctx_t *rctx, isc_result_t result);
879
880static isc_result_t
881rctx_answer(respctx_t *rctx);
882
883static isc_result_t
884rctx_lameserver(respctx_t *rctx);
885
886static isc_result_t
887rctx_dispfail(respctx_t *rctx);
888
889static void
890rctx_delonly_zone(respctx_t *rctx);
891
892static void
893rctx_ncache(respctx_t *rctx);
894
895/*%
896 * Increment resolver-related statistics counters.
897 */
898static void
899inc_stats(dns_resolver_t *res, isc_statscounter_t counter) {
900	if (res->view->resstats != NULL) {
901		isc_stats_increment(res->view->resstats, counter);
902	}
903}
904
905static void
906dec_stats(dns_resolver_t *res, isc_statscounter_t counter) {
907	if (res->view->resstats != NULL) {
908		isc_stats_decrement(res->view->resstats, counter);
909	}
910}
911
912static isc_result_t
913valcreate(fetchctx_t *fctx, dns_message_t *message, dns_adbaddrinfo_t *addrinfo,
914	  dns_name_t *name, dns_rdatatype_t type, dns_rdataset_t *rdataset,
915	  dns_rdataset_t *sigrdataset, unsigned int valoptions,
916	  isc_task_t *task) {
917	dns_validator_t *validator = NULL;
918	dns_valarg_t *valarg = NULL;
919	isc_result_t result;
920
921	if (SHUTTINGDOWN(fctx)) {
922		return (ISC_R_SHUTTINGDOWN);
923	}
924
925	valarg = isc_mem_get(fctx->mctx, sizeof(*valarg));
926	*valarg = (dns_valarg_t){ .fctx = fctx, .addrinfo = addrinfo };
927
928	dns_message_attach(message, &valarg->message);
929
930	if (!ISC_LIST_EMPTY(fctx->validators)) {
931		valoptions |= DNS_VALIDATOR_DEFER;
932	} else {
933		valoptions &= ~DNS_VALIDATOR_DEFER;
934	}
935
936	result = dns_validator_create(fctx->res->view, name, type, rdataset,
937				      sigrdataset, message, valoptions, task,
938				      validated, valarg, &validator);
939	if (result == ISC_R_SUCCESS) {
940		inc_stats(fctx->res, dns_resstatscounter_val);
941		if ((valoptions & DNS_VALIDATOR_DEFER) == 0) {
942			INSIST(fctx->validator == NULL);
943			fctx->validator = validator;
944		}
945		ISC_LIST_APPEND(fctx->validators, validator, link);
946	} else {
947		dns_message_detach(&valarg->message);
948		isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
949	}
950	return (result);
951}
952
953static bool
954rrsig_fromchildzone(fetchctx_t *fctx, dns_rdataset_t *rdataset) {
955	dns_namereln_t namereln;
956	dns_rdata_rrsig_t rrsig;
957	dns_rdata_t rdata = DNS_RDATA_INIT;
958	int order;
959	isc_result_t result;
960	unsigned int labels;
961
962	for (result = dns_rdataset_first(rdataset); result == ISC_R_SUCCESS;
963	     result = dns_rdataset_next(rdataset))
964	{
965		dns_rdataset_current(rdataset, &rdata);
966		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
967		RUNTIME_CHECK(result == ISC_R_SUCCESS);
968		namereln = dns_name_fullcompare(&rrsig.signer, &fctx->domain,
969						&order, &labels);
970		if (namereln == dns_namereln_subdomain) {
971			return (true);
972		}
973		dns_rdata_reset(&rdata);
974	}
975	return (false);
976}
977
978static bool
979fix_mustbedelegationornxdomain(dns_message_t *message, fetchctx_t *fctx) {
980	dns_name_t *name;
981	dns_name_t *domain = &fctx->domain;
982	dns_rdataset_t *rdataset;
983	dns_rdatatype_t type;
984	isc_result_t result;
985	bool keep_auth = false;
986
987	if (message->rcode == dns_rcode_nxdomain) {
988		return (false);
989	}
990
991	/*
992	 * A DS RRset can appear anywhere in a zone, even for a delegation-only
993	 * zone.  So a response to an explicit query for this type should be
994	 * excluded from delegation-only fixup.
995	 *
996	 * SOA, NS, and DNSKEY can only exist at a zone apex, so a positive
997	 * response to a query for these types can never violate the
998	 * delegation-only assumption: if the query name is below a
999	 * zone cut, the response should normally be a referral, which should
1000	 * be accepted; if the query name is below a zone cut but the server
1001	 * happens to have authority for the zone of the query name, the
1002	 * response is a (non-referral) answer.  But this does not violate
1003	 * delegation-only because the query name must be in a different zone
1004	 * due to the "apex-only" nature of these types.  Note that if the
1005	 * remote server happens to have authority for a child zone of a
1006	 * delegation-only zone, we may still incorrectly "fix" the response
1007	 * with NXDOMAIN for queries for other types.  Unfortunately it's
1008	 * generally impossible to differentiate this case from violation of
1009	 * the delegation-only assumption.  Once the resolver learns the
1010	 * correct zone cut, possibly via a separate query for an "apex-only"
1011	 * type, queries for other types will be resolved correctly.
1012	 *
1013	 * A query for type ANY will be accepted if it hits an exceptional
1014	 * type above in the answer section as it should be from a child
1015	 * zone.
1016	 *
1017	 * Also accept answers with RRSIG records from the child zone.
1018	 * Direct queries for RRSIG records should not be answered from
1019	 * the parent zone.
1020	 */
1021
1022	if (message->counts[DNS_SECTION_ANSWER] != 0 &&
1023	    (fctx->type == dns_rdatatype_ns || fctx->type == dns_rdatatype_ds ||
1024	     fctx->type == dns_rdatatype_soa ||
1025	     fctx->type == dns_rdatatype_any ||
1026	     fctx->type == dns_rdatatype_rrsig ||
1027	     fctx->type == dns_rdatatype_dnskey))
1028	{
1029		result = dns_message_firstname(message, DNS_SECTION_ANSWER);
1030		while (result == ISC_R_SUCCESS) {
1031			name = NULL;
1032			dns_message_currentname(message, DNS_SECTION_ANSWER,
1033						&name);
1034			for (rdataset = ISC_LIST_HEAD(name->list);
1035			     rdataset != NULL;
1036			     rdataset = ISC_LIST_NEXT(rdataset, link))
1037			{
1038				if (!dns_name_equal(name, &fctx->name)) {
1039					continue;
1040				}
1041				type = rdataset->type;
1042				/*
1043				 * RRsig from child?
1044				 */
1045				if (type == dns_rdatatype_rrsig &&
1046				    rrsig_fromchildzone(fctx, rdataset))
1047				{
1048					return (false);
1049				}
1050				/*
1051				 * Direct query for apex records or DS.
1052				 */
1053				if (fctx->type == type &&
1054				    (type == dns_rdatatype_ds ||
1055				     type == dns_rdatatype_ns ||
1056				     type == dns_rdatatype_soa ||
1057				     type == dns_rdatatype_dnskey))
1058				{
1059					return (false);
1060				}
1061				/*
1062				 * Indirect query for apex records or DS.
1063				 */
1064				if (fctx->type == dns_rdatatype_any &&
1065				    (type == dns_rdatatype_ns ||
1066				     type == dns_rdatatype_ds ||
1067				     type == dns_rdatatype_soa ||
1068				     type == dns_rdatatype_dnskey))
1069				{
1070					return (false);
1071				}
1072			}
1073			result = dns_message_nextname(message,
1074						      DNS_SECTION_ANSWER);
1075		}
1076	}
1077
1078	/*
1079	 * A NODATA response to a DS query?
1080	 */
1081	if (fctx->type == dns_rdatatype_ds &&
1082	    message->counts[DNS_SECTION_ANSWER] == 0)
1083	{
1084		return (false);
1085	}
1086
1087	/* Look for referral or indication of answer from child zone? */
1088	if (message->counts[DNS_SECTION_AUTHORITY] == 0) {
1089		goto munge;
1090	}
1091
1092	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
1093	while (result == ISC_R_SUCCESS) {
1094		name = NULL;
1095		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
1096		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
1097		     rdataset = ISC_LIST_NEXT(rdataset, link))
1098		{
1099			type = rdataset->type;
1100			if (type == dns_rdatatype_soa &&
1101			    dns_name_equal(name, domain))
1102			{
1103				keep_auth = true;
1104			}
1105
1106			if (type != dns_rdatatype_ns &&
1107			    type != dns_rdatatype_soa &&
1108			    type != dns_rdatatype_rrsig)
1109			{
1110				continue;
1111			}
1112
1113			if (type == dns_rdatatype_rrsig) {
1114				if (rrsig_fromchildzone(fctx, rdataset)) {
1115					return (false);
1116				} else {
1117					continue;
1118				}
1119			}
1120
1121			/* NS or SOA records. */
1122			if (dns_name_equal(name, domain)) {
1123				/*
1124				 * If a query for ANY causes a negative
1125				 * response, we can be sure that this is
1126				 * an empty node.  For other type of queries
1127				 * we cannot differentiate an empty node
1128				 * from a node that just doesn't have that
1129				 * type of record.  We only accept the former
1130				 * case.
1131				 */
1132				if (message->counts[DNS_SECTION_ANSWER] == 0 &&
1133				    fctx->type == dns_rdatatype_any)
1134				{
1135					return (false);
1136				}
1137			} else if (dns_name_issubdomain(name, domain)) {
1138				/* Referral or answer from child zone. */
1139				return (false);
1140			}
1141		}
1142		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
1143	}
1144
1145munge:
1146	message->rcode = dns_rcode_nxdomain;
1147	message->counts[DNS_SECTION_ANSWER] = 0;
1148	if (!keep_auth) {
1149		message->counts[DNS_SECTION_AUTHORITY] = 0;
1150	}
1151	message->counts[DNS_SECTION_ADDITIONAL] = 0;
1152	return (true);
1153}
1154
1155static isc_result_t
1156fctx_starttimer(fetchctx_t *fctx) {
1157	/*
1158	 * Start the lifetime timer for fctx.
1159	 *
1160	 * This is also used for stopping the idle timer; in that
1161	 * case we must purge events already posted to ensure that
1162	 * no further idle events are delivered.
1163	 */
1164	return (isc_timer_reset(fctx->timer, isc_timertype_once, &fctx->expires,
1165				NULL, true));
1166}
1167
1168static isc_result_t
1169fctx_starttimer_trystale(fetchctx_t *fctx) {
1170	/*
1171	 * Start the stale-answer-client-timeout timer for fctx.
1172	 */
1173
1174	return (isc_timer_reset(fctx->timer_try_stale, isc_timertype_once,
1175				&fctx->expires_try_stale, NULL, true));
1176}
1177
1178static void
1179fctx_stoptimer(fetchctx_t *fctx) {
1180	isc_result_t result;
1181
1182	/*
1183	 * We don't return a result if resetting the timer to inactive fails
1184	 * since there's nothing to be done about it.  Resetting to inactive
1185	 * should never fail anyway, since the code as currently written
1186	 * cannot fail in that case.
1187	 */
1188	result = isc_timer_reset(fctx->timer, isc_timertype_inactive, NULL,
1189				 NULL, true);
1190	if (result != ISC_R_SUCCESS) {
1191		UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_timer_reset(): %s",
1192				 isc_result_totext(result));
1193	}
1194}
1195
1196static void
1197fctx_stoptimer_trystale(fetchctx_t *fctx) {
1198	isc_result_t result;
1199
1200	if (fctx->timer_try_stale != NULL) {
1201		result = isc_timer_reset(fctx->timer_try_stale,
1202					 isc_timertype_inactive, NULL, NULL,
1203					 true);
1204		if (result != ISC_R_SUCCESS) {
1205			UNEXPECTED_ERROR(__FILE__, __LINE__,
1206					 "isc_timer_reset(): %s",
1207					 isc_result_totext(result));
1208		}
1209	}
1210}
1211
1212static isc_result_t
1213fctx_startidletimer(fetchctx_t *fctx, isc_interval_t *interval) {
1214	/*
1215	 * Start the idle timer for fctx.  The lifetime timer continues
1216	 * to be in effect.
1217	 */
1218	return (isc_timer_reset(fctx->timer, isc_timertype_once, &fctx->expires,
1219				interval, false));
1220}
1221
1222/*
1223 * Stopping the idle timer is equivalent to calling fctx_starttimer(), but
1224 * we use fctx_stopidletimer for readability in the code below.
1225 */
1226#define fctx_stopidletimer fctx_starttimer
1227
1228static void
1229resquery_destroy(resquery_t **queryp) {
1230	dns_resolver_t *res;
1231	bool empty;
1232	resquery_t *query;
1233	fetchctx_t *fctx;
1234	unsigned int bucket;
1235
1236	REQUIRE(queryp != NULL);
1237	query = *queryp;
1238	*queryp = NULL;
1239	REQUIRE(!ISC_LINK_LINKED(query, link));
1240
1241	INSIST(query->tcpsocket == NULL);
1242
1243	fctx = query->fctx;
1244	res = fctx->res;
1245	bucket = fctx->bucketnum;
1246
1247	LOCK(&res->buckets[bucket].lock);
1248	fctx->nqueries--;
1249	empty = fctx_decreference(query->fctx);
1250	UNLOCK(&res->buckets[bucket].lock);
1251
1252	if (query->rmessage != NULL) {
1253		dns_message_detach(&query->rmessage);
1254	}
1255
1256	query->magic = 0;
1257	isc_mem_put(query->mctx, query, sizeof(*query));
1258
1259	if (empty) {
1260		empty_bucket(res);
1261	}
1262}
1263
1264/*%
1265 * Update EDNS statistics for a server after not getting a response to a UDP
1266 * query sent to it.
1267 */
1268static void
1269update_edns_stats(resquery_t *query) {
1270	fetchctx_t *fctx = query->fctx;
1271
1272	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1273		return;
1274	}
1275
1276	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1277		dns_adb_ednsto(fctx->adb, query->addrinfo, query->udpsize);
1278	} else {
1279		dns_adb_timeout(fctx->adb, query->addrinfo);
1280	}
1281}
1282
1283static void
1284fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp,
1285		 isc_time_t *finish, bool no_response, bool age_untried) {
1286	fetchctx_t *fctx;
1287	resquery_t *query;
1288	unsigned int rtt, rttms;
1289	unsigned int factor;
1290	dns_adbfind_t *find;
1291	dns_adbaddrinfo_t *addrinfo;
1292	isc_socket_t *sock;
1293	isc_stdtime_t now;
1294
1295	query = *queryp;
1296	fctx = query->fctx;
1297
1298	FCTXTRACE("cancelquery");
1299
1300	REQUIRE(!RESQUERY_CANCELED(query));
1301
1302	query->attributes |= RESQUERY_ATTR_CANCELED;
1303
1304	/*
1305	 * Should we update the RTT?
1306	 */
1307	if (finish != NULL || no_response) {
1308		if (finish != NULL) {
1309			/*
1310			 * We have both the start and finish times for this
1311			 * packet, so we can compute a real RTT.
1312			 */
1313			rtt = (unsigned int)isc_time_microdiff(finish,
1314							       &query->start);
1315			factor = DNS_ADB_RTTADJDEFAULT;
1316
1317			rttms = rtt / 1000;
1318			if (rttms < DNS_RESOLVER_QRYRTTCLASS0) {
1319				inc_stats(fctx->res,
1320					  dns_resstatscounter_queryrtt0);
1321			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS1) {
1322				inc_stats(fctx->res,
1323					  dns_resstatscounter_queryrtt1);
1324			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS2) {
1325				inc_stats(fctx->res,
1326					  dns_resstatscounter_queryrtt2);
1327			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS3) {
1328				inc_stats(fctx->res,
1329					  dns_resstatscounter_queryrtt3);
1330			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS4) {
1331				inc_stats(fctx->res,
1332					  dns_resstatscounter_queryrtt4);
1333			} else {
1334				inc_stats(fctx->res,
1335					  dns_resstatscounter_queryrtt5);
1336			}
1337		} else {
1338			uint32_t value;
1339			uint32_t mask;
1340
1341			update_edns_stats(query);
1342
1343			/*
1344			 * If "forward first;" is used and a forwarder timed
1345			 * out, do not attempt to query it again in this fetch
1346			 * context.
1347			 */
1348			if (fctx->fwdpolicy == dns_fwdpolicy_first &&
1349			    ISFORWARDER(query->addrinfo))
1350			{
1351				add_bad(fctx, query->rmessage, query->addrinfo,
1352					ISC_R_TIMEDOUT, badns_forwarder);
1353			}
1354
1355			/*
1356			 * We don't have an RTT for this query.  Maybe the
1357			 * packet was lost, or maybe this server is very
1358			 * slow.  We don't know.  Increase the RTT.
1359			 */
1360			INSIST(no_response);
1361			value = isc_random32();
1362			if (query->addrinfo->srtt > 800000) {
1363				mask = 0x3fff;
1364			} else if (query->addrinfo->srtt > 400000) {
1365				mask = 0x7fff;
1366			} else if (query->addrinfo->srtt > 200000) {
1367				mask = 0xffff;
1368			} else if (query->addrinfo->srtt > 100000) {
1369				mask = 0x1ffff;
1370			} else if (query->addrinfo->srtt > 50000) {
1371				mask = 0x3ffff;
1372			} else if (query->addrinfo->srtt > 25000) {
1373				mask = 0x7ffff;
1374			} else {
1375				mask = 0xfffff;
1376			}
1377
1378			/*
1379			 * Don't adjust timeout on EDNS queries unless we have
1380			 * seen a EDNS response.
1381			 */
1382			if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0 &&
1383			    !EDNSOK(query->addrinfo))
1384			{
1385				mask >>= 2;
1386			}
1387
1388			rtt = query->addrinfo->srtt + (value & mask);
1389			if (rtt > MAX_SINGLE_QUERY_TIMEOUT_US) {
1390				rtt = MAX_SINGLE_QUERY_TIMEOUT_US;
1391			}
1392
1393			/*
1394			 * Replace the current RTT with our value.
1395			 */
1396			factor = DNS_ADB_RTTADJREPLACE;
1397		}
1398
1399		dns_adb_adjustsrtt(fctx->adb, query->addrinfo, rtt, factor);
1400	}
1401	if ((query->options & DNS_FETCHOPT_TCP) == 0) {
1402		/* Inform the ADB that we're ending a UDP fetch */
1403		dns_adb_endudpfetch(fctx->adb, query->addrinfo);
1404	}
1405
1406	/*
1407	 * Age RTTs of servers not tried.
1408	 */
1409	isc_stdtime_get(&now);
1410	if (finish != NULL || age_untried) {
1411		for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
1412		     addrinfo != NULL;
1413		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
1414		{
1415			if (UNMARKED(addrinfo)) {
1416				dns_adb_agesrtt(fctx->adb, addrinfo, now);
1417			}
1418		}
1419	}
1420
1421	if ((finish != NULL || age_untried) && TRIEDFIND(fctx)) {
1422		for (find = ISC_LIST_HEAD(fctx->finds); find != NULL;
1423		     find = ISC_LIST_NEXT(find, publink))
1424		{
1425			for (addrinfo = ISC_LIST_HEAD(find->list);
1426			     addrinfo != NULL;
1427			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
1428			{
1429				if (UNMARKED(addrinfo)) {
1430					dns_adb_agesrtt(fctx->adb, addrinfo,
1431							now);
1432				}
1433			}
1434		}
1435	}
1436
1437	if ((finish != NULL || age_untried) && TRIEDALT(fctx)) {
1438		for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs); addrinfo != NULL;
1439		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
1440		{
1441			if (UNMARKED(addrinfo)) {
1442				dns_adb_agesrtt(fctx->adb, addrinfo, now);
1443			}
1444		}
1445		for (find = ISC_LIST_HEAD(fctx->altfinds); find != NULL;
1446		     find = ISC_LIST_NEXT(find, publink))
1447		{
1448			for (addrinfo = ISC_LIST_HEAD(find->list);
1449			     addrinfo != NULL;
1450			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
1451			{
1452				if (UNMARKED(addrinfo)) {
1453					dns_adb_agesrtt(fctx->adb, addrinfo,
1454							now);
1455				}
1456			}
1457		}
1458	}
1459
1460	/*
1461	 * Check for any outstanding socket events.  If they exist, cancel
1462	 * them and let the event handlers finish the cleanup.  The resolver
1463	 * only needs to worry about managing the connect and send events;
1464	 * the dispatcher manages the recv events.
1465	 */
1466	if (RESQUERY_CONNECTING(query)) {
1467		/*
1468		 * Cancel the connect.
1469		 */
1470		if (query->tcpsocket != NULL) {
1471			isc_socket_cancel(query->tcpsocket, NULL,
1472					  ISC_SOCKCANCEL_CONNECT);
1473		} else if (query->dispentry != NULL) {
1474			INSIST(query->exclusivesocket);
1475			sock = dns_dispatch_getentrysocket(query->dispentry);
1476			if (sock != NULL) {
1477				isc_socket_cancel(sock, NULL,
1478						  ISC_SOCKCANCEL_CONNECT);
1479			}
1480		}
1481	}
1482	if (RESQUERY_SENDING(query)) {
1483		/*
1484		 * Cancel the pending send.
1485		 */
1486		if (query->exclusivesocket && query->dispentry != NULL) {
1487			sock = dns_dispatch_getentrysocket(query->dispentry);
1488		} else {
1489			sock = dns_dispatch_getsocket(query->dispatch);
1490		}
1491		if (sock != NULL) {
1492			isc_socket_cancel(sock, NULL, ISC_SOCKCANCEL_SEND);
1493		}
1494	}
1495
1496	if (query->dispentry != NULL) {
1497		dns_dispatch_removeresponse(&query->dispentry, deventp);
1498	}
1499
1500	ISC_LIST_UNLINK(fctx->queries, query, link);
1501
1502	if (query->tsig != NULL) {
1503		isc_buffer_free(&query->tsig);
1504	}
1505
1506	if (query->tsigkey != NULL) {
1507		dns_tsigkey_detach(&query->tsigkey);
1508	}
1509
1510	if (query->dispatch != NULL) {
1511		dns_dispatch_detach(&query->dispatch);
1512	}
1513
1514	if (!(RESQUERY_CONNECTING(query) || RESQUERY_SENDING(query))) {
1515		/*
1516		 * It's safe to destroy the query now.
1517		 */
1518		resquery_destroy(&query);
1519	}
1520}
1521
1522static void
1523fctx_cancelqueries(fetchctx_t *fctx, bool no_response, bool age_untried) {
1524	resquery_t *query, *next_query;
1525
1526	FCTXTRACE("cancelqueries");
1527
1528	for (query = ISC_LIST_HEAD(fctx->queries); query != NULL;
1529	     query = next_query)
1530	{
1531		next_query = ISC_LIST_NEXT(query, link);
1532		fctx_cancelquery(&query, NULL, NULL, no_response, age_untried);
1533	}
1534}
1535
1536static void
1537fctx_cleanupfinds(fetchctx_t *fctx) {
1538	dns_adbfind_t *find, *next_find;
1539
1540	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1541
1542	for (find = ISC_LIST_HEAD(fctx->finds); find != NULL; find = next_find)
1543	{
1544		next_find = ISC_LIST_NEXT(find, publink);
1545		ISC_LIST_UNLINK(fctx->finds, find, publink);
1546		dns_adb_destroyfind(&find);
1547	}
1548	fctx->find = NULL;
1549}
1550
1551static void
1552fctx_cleanupaltfinds(fetchctx_t *fctx) {
1553	dns_adbfind_t *find, *next_find;
1554
1555	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1556
1557	for (find = ISC_LIST_HEAD(fctx->altfinds); find != NULL;
1558	     find = next_find)
1559	{
1560		next_find = ISC_LIST_NEXT(find, publink);
1561		ISC_LIST_UNLINK(fctx->altfinds, find, publink);
1562		dns_adb_destroyfind(&find);
1563	}
1564	fctx->altfind = NULL;
1565}
1566
1567static void
1568fctx_cleanupforwaddrs(fetchctx_t *fctx) {
1569	dns_adbaddrinfo_t *addr, *next_addr;
1570
1571	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1572
1573	for (addr = ISC_LIST_HEAD(fctx->forwaddrs); addr != NULL;
1574	     addr = next_addr)
1575	{
1576		next_addr = ISC_LIST_NEXT(addr, publink);
1577		ISC_LIST_UNLINK(fctx->forwaddrs, addr, publink);
1578		dns_adb_freeaddrinfo(fctx->adb, &addr);
1579	}
1580}
1581
1582static void
1583fctx_cleanupaltaddrs(fetchctx_t *fctx) {
1584	dns_adbaddrinfo_t *addr, *next_addr;
1585
1586	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1587
1588	for (addr = ISC_LIST_HEAD(fctx->altaddrs); addr != NULL;
1589	     addr = next_addr)
1590	{
1591		next_addr = ISC_LIST_NEXT(addr, publink);
1592		ISC_LIST_UNLINK(fctx->altaddrs, addr, publink);
1593		dns_adb_freeaddrinfo(fctx->adb, &addr);
1594	}
1595}
1596
1597static void
1598fctx_stopqueries(fetchctx_t *fctx, bool no_response, bool age_untried) {
1599	FCTXTRACE("stopqueries");
1600	fctx_cancelqueries(fctx, no_response, age_untried);
1601	fctx_stoptimer(fctx);
1602	fctx_stoptimer_trystale(fctx);
1603}
1604
1605static void
1606fctx_cleanupall(fetchctx_t *fctx) {
1607	fctx_cleanupfinds(fctx);
1608	fctx_cleanupaltfinds(fctx);
1609	fctx_cleanupforwaddrs(fctx);
1610	fctx_cleanupaltaddrs(fctx);
1611}
1612
1613static void
1614fcount_logspill(fetchctx_t *fctx, fctxcount_t *counter, bool final) {
1615	char dbuf[DNS_NAME_FORMATSIZE];
1616	isc_stdtime_t now;
1617
1618	if (!isc_log_wouldlog(dns_lctx, ISC_LOG_INFO)) {
1619		return;
1620	}
1621
1622	/* Do not log a message if there were no dropped fetches. */
1623	if (counter->dropped == 0) {
1624		return;
1625	}
1626
1627	/* Do not log the cumulative message if the previous log is recent. */
1628	isc_stdtime_get(&now);
1629	if (!final && counter->logged > now - 60) {
1630		return;
1631	}
1632
1633	dns_name_format(&fctx->domain, dbuf, sizeof(dbuf));
1634
1635	if (!final) {
1636		isc_log_write(dns_lctx, DNS_LOGCATEGORY_SPILL,
1637			      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1638			      "too many simultaneous fetches for %s "
1639			      "(allowed %d spilled %d)",
1640			      dbuf, counter->allowed, counter->dropped);
1641	} else {
1642		isc_log_write(dns_lctx, DNS_LOGCATEGORY_SPILL,
1643			      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1644			      "fetch counters for %s now being discarded "
1645			      "(allowed %d spilled %d; cumulative since "
1646			      "initial trigger event)",
1647			      dbuf, counter->allowed, counter->dropped);
1648	}
1649
1650	counter->logged = now;
1651}
1652
1653static isc_result_t
1654fcount_incr(fetchctx_t *fctx, bool force) {
1655	isc_result_t result = ISC_R_SUCCESS;
1656	zonebucket_t *dbucket;
1657	fctxcount_t *counter;
1658	unsigned int bucketnum;
1659
1660	REQUIRE(fctx != NULL);
1661	REQUIRE(fctx->res != NULL);
1662
1663	INSIST(fctx->dbucketnum == RES_NOBUCKET);
1664	bucketnum = dns_name_fullhash(&fctx->domain, false) %
1665		    RES_DOMAIN_BUCKETS;
1666
1667	dbucket = &fctx->res->dbuckets[bucketnum];
1668
1669	LOCK(&dbucket->lock);
1670	for (counter = ISC_LIST_HEAD(dbucket->list); counter != NULL;
1671	     counter = ISC_LIST_NEXT(counter, link))
1672	{
1673		if (dns_name_equal(counter->domain, &fctx->domain)) {
1674			break;
1675		}
1676	}
1677
1678	if (counter == NULL) {
1679		counter = isc_mem_get(dbucket->mctx, sizeof(fctxcount_t));
1680		{
1681			ISC_LINK_INIT(counter, link);
1682			counter->count = 1;
1683			counter->logged = 0;
1684			counter->allowed = 1;
1685			counter->dropped = 0;
1686			counter->domain =
1687				dns_fixedname_initname(&counter->fdname);
1688			dns_name_copynf(&fctx->domain, counter->domain);
1689			ISC_LIST_APPEND(dbucket->list, counter, link);
1690		}
1691	} else {
1692		uint_fast32_t spill = atomic_load_acquire(&fctx->res->zspill);
1693		if (!force && spill != 0 && counter->count >= spill) {
1694			counter->dropped++;
1695			fcount_logspill(fctx, counter, false);
1696			result = ISC_R_QUOTA;
1697		} else {
1698			counter->count++;
1699			counter->allowed++;
1700		}
1701	}
1702	UNLOCK(&dbucket->lock);
1703
1704	if (result == ISC_R_SUCCESS) {
1705		fctx->dbucketnum = bucketnum;
1706	}
1707
1708	return (result);
1709}
1710
1711static void
1712fcount_decr(fetchctx_t *fctx) {
1713	zonebucket_t *dbucket;
1714	fctxcount_t *counter;
1715
1716	REQUIRE(fctx != NULL);
1717
1718	if (fctx->dbucketnum == RES_NOBUCKET) {
1719		return;
1720	}
1721
1722	dbucket = &fctx->res->dbuckets[fctx->dbucketnum];
1723
1724	LOCK(&dbucket->lock);
1725	for (counter = ISC_LIST_HEAD(dbucket->list); counter != NULL;
1726	     counter = ISC_LIST_NEXT(counter, link))
1727	{
1728		if (dns_name_equal(counter->domain, &fctx->domain)) {
1729			break;
1730		}
1731	}
1732
1733	if (counter != NULL) {
1734		INSIST(counter->count != 0);
1735		counter->count--;
1736		fctx->dbucketnum = RES_NOBUCKET;
1737
1738		if (counter->count == 0) {
1739			fcount_logspill(fctx, counter, true);
1740			ISC_LIST_UNLINK(dbucket->list, counter, link);
1741			isc_mem_put(dbucket->mctx, counter, sizeof(*counter));
1742		}
1743	}
1744
1745	UNLOCK(&dbucket->lock);
1746}
1747
1748static void
1749fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) {
1750	dns_fetchevent_t *event, *next_event;
1751	isc_task_t *task;
1752	unsigned int count = 0;
1753	isc_interval_t i;
1754	bool logit = false;
1755	isc_time_t now;
1756	unsigned int old_spillat;
1757	unsigned int new_spillat = 0; /* initialized to silence
1758				       * compiler warnings */
1759
1760	/*
1761	 * Caller must be holding the appropriate bucket lock.
1762	 */
1763	REQUIRE(fctx->state == fetchstate_done);
1764
1765	FCTXTRACE("sendevents");
1766
1767	/*
1768	 * Keep some record of fetch result for logging later (if required).
1769	 */
1770	fctx->result = result;
1771	fctx->exitline = line;
1772	TIME_NOW(&now);
1773	fctx->duration = isc_time_microdiff(&now, &fctx->start);
1774
1775	for (event = ISC_LIST_HEAD(fctx->events); event != NULL;
1776	     event = next_event)
1777	{
1778		next_event = ISC_LIST_NEXT(event, ev_link);
1779		ISC_LIST_UNLINK(fctx->events, event, ev_link);
1780		if (event->ev_type == DNS_EVENT_TRYSTALE) {
1781			/*
1782			 * Not applicable to TRY STALE events, this function is
1783			 * called when the fetch has either completed or timed
1784			 * out due to resolver-query-timeout being reached.
1785			 */
1786			isc_task_detach((isc_task_t **)&event->ev_sender);
1787			isc_event_free((isc_event_t **)&event);
1788			continue;
1789		}
1790		task = event->ev_sender;
1791		event->ev_sender = fctx;
1792		event->vresult = fctx->vresult;
1793		if (!HAVE_ANSWER(fctx)) {
1794			event->result = result;
1795		}
1796
1797		INSIST(event->result != ISC_R_SUCCESS ||
1798		       dns_rdataset_isassociated(event->rdataset) ||
1799		       fctx->type == dns_rdatatype_any ||
1800		       fctx->type == dns_rdatatype_rrsig ||
1801		       fctx->type == dns_rdatatype_sig);
1802
1803		/*
1804		 * Negative results must be indicated in event->result.
1805		 */
1806		if (dns_rdataset_isassociated(event->rdataset) &&
1807		    NEGATIVE(event->rdataset))
1808		{
1809			INSIST(event->result == DNS_R_NCACHENXDOMAIN ||
1810			       event->result == DNS_R_NCACHENXRRSET);
1811		}
1812
1813		isc_task_sendanddetach(&task, ISC_EVENT_PTR(&event));
1814		count++;
1815	}
1816
1817	if (HAVE_ANSWER(fctx) && fctx->spilled &&
1818	    (count < fctx->res->spillatmax || fctx->res->spillatmax == 0))
1819	{
1820		LOCK(&fctx->res->lock);
1821		if (count == fctx->res->spillat &&
1822		    !atomic_load_acquire(&fctx->res->exiting))
1823		{
1824			old_spillat = fctx->res->spillat;
1825			fctx->res->spillat += 5;
1826			if (fctx->res->spillat > fctx->res->spillatmax &&
1827			    fctx->res->spillatmax != 0)
1828			{
1829				fctx->res->spillat = fctx->res->spillatmax;
1830			}
1831			new_spillat = fctx->res->spillat;
1832			if (new_spillat != old_spillat) {
1833				logit = true;
1834			}
1835			isc_interval_set(&i, 20 * 60, 0);
1836			result = isc_timer_reset(fctx->res->spillattimer,
1837						 isc_timertype_ticker, NULL, &i,
1838						 true);
1839			RUNTIME_CHECK(result == ISC_R_SUCCESS);
1840		}
1841		UNLOCK(&fctx->res->lock);
1842		if (logit) {
1843			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
1844				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
1845				      "clients-per-query increased to %u",
1846				      new_spillat);
1847		}
1848	}
1849}
1850
1851static void
1852log_edns(fetchctx_t *fctx) {
1853	char domainbuf[DNS_NAME_FORMATSIZE];
1854
1855	if (fctx->reason == NULL) {
1856		return;
1857	}
1858
1859	/*
1860	 * We do not know if fctx->domain is the actual domain the record
1861	 * lives in or a parent domain so we have a '?' after it.
1862	 */
1863	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
1864	isc_log_write(dns_lctx, DNS_LOGCATEGORY_EDNS_DISABLED,
1865		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1866		      "success resolving '%s' (in '%s'?) after %s", fctx->info,
1867		      domainbuf, fctx->reason);
1868}
1869
1870static void
1871fctx_done(fetchctx_t *fctx, isc_result_t result, int line) {
1872	dns_resolver_t *res;
1873	bool no_response = false;
1874	bool age_untried = false;
1875
1876	REQUIRE(line >= 0);
1877
1878	FCTXTRACE("done");
1879
1880	res = fctx->res;
1881
1882	if (result == ISC_R_SUCCESS) {
1883		/*%
1884		 * Log any deferred EDNS timeout messages.
1885		 */
1886		log_edns(fctx);
1887		no_response = true;
1888		if (fctx->qmin_warning != ISC_R_SUCCESS) {
1889			isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
1890				      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1891				      "success resolving '%s' "
1892				      "after disabling qname minimization due "
1893				      "to '%s'",
1894				      fctx->info,
1895				      isc_result_totext(fctx->qmin_warning));
1896		}
1897	} else if (result == ISC_R_TIMEDOUT) {
1898		age_untried = true;
1899	}
1900
1901	fctx->qmin_warning = ISC_R_SUCCESS;
1902	fctx->reason = NULL;
1903
1904	fctx_stopqueries(fctx, no_response, age_untried);
1905
1906	LOCK(&res->buckets[fctx->bucketnum].lock);
1907
1908	fctx->state = fetchstate_done;
1909	FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
1910	fctx_sendevents(fctx, result, line);
1911
1912	UNLOCK(&res->buckets[fctx->bucketnum].lock);
1913}
1914
1915static void
1916process_sendevent(resquery_t *query, isc_event_t *event) {
1917	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
1918	bool destroy_query = false;
1919	bool retry = false;
1920	isc_result_t result;
1921	fetchctx_t *fctx;
1922
1923	fctx = query->fctx;
1924
1925	if (RESQUERY_CANCELED(query)) {
1926		if (query->sends == 0 && query->connects == 0) {
1927			/*
1928			 * This query was canceled while the
1929			 * isc_socket_sendto/connect() was in progress.
1930			 */
1931			if (query->tcpsocket != NULL) {
1932				isc_socket_detach(&query->tcpsocket);
1933			}
1934			destroy_query = true;
1935		}
1936	} else {
1937		switch (sevent->result) {
1938		case ISC_R_SUCCESS:
1939			break;
1940
1941		case ISC_R_HOSTUNREACH:
1942		case ISC_R_NETUNREACH:
1943		case ISC_R_NOPERM:
1944		case ISC_R_ADDRNOTAVAIL:
1945		case ISC_R_CONNREFUSED:
1946			FCTXTRACE3("query canceled in sendevent(): "
1947				   "no route to host; no response",
1948				   sevent->result);
1949
1950			/*
1951			 * No route to remote.
1952			 */
1953			add_bad(fctx, query->rmessage, query->addrinfo,
1954				sevent->result, badns_unreachable);
1955			fctx_cancelquery(&query, NULL, NULL, true, false);
1956			retry = true;
1957			break;
1958
1959		default:
1960			FCTXTRACE3("query canceled in sendevent() due to "
1961				   "unexpected event result; responding",
1962				   sevent->result);
1963
1964			fctx_cancelquery(&query, NULL, NULL, false, false);
1965			break;
1966		}
1967	}
1968
1969	if (event->ev_type == ISC_SOCKEVENT_CONNECT) {
1970		isc_event_free(&event);
1971	}
1972
1973	if (retry) {
1974		/*
1975		 * Behave as if the idle timer has expired.  For TCP
1976		 * this may not actually reflect the latest timer.
1977		 */
1978		FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
1979		result = fctx_stopidletimer(fctx);
1980		if (result != ISC_R_SUCCESS) {
1981			fctx_done(fctx, result, __LINE__);
1982		} else {
1983			fctx_try(fctx, true, false);
1984		}
1985	}
1986
1987	if (destroy_query) {
1988		resquery_destroy(&query);
1989	}
1990}
1991
1992static void
1993resquery_udpconnected(isc_task_t *task, isc_event_t *event) {
1994	resquery_t *query = event->ev_arg;
1995
1996	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
1997
1998	QTRACE("udpconnected");
1999
2000	UNUSED(task);
2001
2002	INSIST(RESQUERY_CONNECTING(query));
2003
2004	query->connects--;
2005
2006	process_sendevent(query, event);
2007}
2008
2009static void
2010resquery_senddone(isc_task_t *task, isc_event_t *event) {
2011	resquery_t *query = event->ev_arg;
2012
2013	REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE);
2014
2015	QTRACE("senddone");
2016
2017	/*
2018	 * XXXRTH
2019	 *
2020	 * Currently we don't wait for the senddone event before retrying
2021	 * a query.  This means that if we get really behind, we may end
2022	 * up doing extra work!
2023	 */
2024
2025	UNUSED(task);
2026
2027	INSIST(RESQUERY_SENDING(query));
2028
2029	query->sends--;
2030
2031	process_sendevent(query, event);
2032}
2033
2034static isc_result_t
2035fctx_addopt(dns_message_t *message, unsigned int version, uint16_t udpsize,
2036	    dns_ednsopt_t *ednsopts, size_t count) {
2037	dns_rdataset_t *rdataset = NULL;
2038	isc_result_t result;
2039
2040	result = dns_message_buildopt(message, &rdataset, version, udpsize,
2041				      DNS_MESSAGEEXTFLAG_DO, ednsopts, count);
2042	if (result != ISC_R_SUCCESS) {
2043		return (result);
2044	}
2045	return (dns_message_setopt(message, rdataset));
2046}
2047
2048static void
2049fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
2050	unsigned int seconds;
2051	unsigned int us;
2052
2053	us = fctx->res->retryinterval * 1000;
2054	/*
2055	 * Exponential backoff after the first few tries.
2056	 */
2057	if (fctx->restarts > fctx->res->nonbackofftries) {
2058		int shift = fctx->restarts - fctx->res->nonbackofftries;
2059		if (shift > 6) {
2060			shift = 6;
2061		}
2062		us <<= shift;
2063	}
2064
2065	/*
2066	 * Add a fudge factor to the expected rtt based on the current
2067	 * estimate.
2068	 */
2069	if (rtt < 50000) {
2070		rtt += 50000;
2071	} else if (rtt < 100000) {
2072		rtt += 100000;
2073	} else {
2074		rtt += 200000;
2075	}
2076
2077	/*
2078	 * Always wait for at least the expected rtt.
2079	 */
2080	if (us < rtt) {
2081		us = rtt;
2082	}
2083
2084	/*
2085	 * But don't ever wait for more than 10 seconds.
2086	 */
2087	if (us > MAX_SINGLE_QUERY_TIMEOUT_US) {
2088		us = MAX_SINGLE_QUERY_TIMEOUT_US;
2089	}
2090
2091	seconds = us / US_PER_SEC;
2092	us -= seconds * US_PER_SEC;
2093	isc_interval_set(&fctx->interval, seconds, us * 1000);
2094}
2095
2096static isc_result_t
2097fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
2098	   unsigned int options) {
2099	dns_resolver_t *res;
2100	isc_task_t *task;
2101	isc_result_t result;
2102	resquery_t *query;
2103	isc_sockaddr_t addr;
2104	bool have_addr = false;
2105	unsigned int srtt;
2106	isc_dscp_t dscp = -1;
2107	unsigned int bucketnum;
2108
2109	FCTXTRACE("query");
2110
2111	res = fctx->res;
2112	task = res->buckets[fctx->bucketnum].task;
2113
2114	srtt = addrinfo->srtt;
2115
2116	/*
2117	 * Allow an additional second for the kernel to resend the SYN (or
2118	 * SYN without ECN in the case of stupid firewalls blocking ECN
2119	 * negotiation) over the current RTT estimate.
2120	 */
2121	if ((options & DNS_FETCHOPT_TCP) != 0) {
2122		srtt += 1000000;
2123	}
2124
2125	/*
2126	 * A forwarder needs to make multiple queries. Give it at least
2127	 * a second to do these in.
2128	 */
2129	if (ISFORWARDER(addrinfo) && srtt < 1000000) {
2130		srtt = 1000000;
2131	}
2132
2133	fctx_setretryinterval(fctx, srtt);
2134	result = fctx_startidletimer(fctx, &fctx->interval);
2135	if (result != ISC_R_SUCCESS) {
2136		return (result);
2137	}
2138
2139	INSIST(ISC_LIST_EMPTY(fctx->validators));
2140
2141	query = isc_mem_get(fctx->mctx, sizeof(*query));
2142	query->rmessage = NULL;
2143	dns_message_create(fctx->mctx, DNS_MESSAGE_INTENTPARSE,
2144			   &query->rmessage);
2145	query->mctx = fctx->mctx;
2146	query->options = options;
2147	query->attributes = 0;
2148	query->sends = 0;
2149	query->connects = 0;
2150	query->dscp = addrinfo->dscp;
2151	query->udpsize = 0;
2152	/*
2153	 * Note that the caller MUST guarantee that 'addrinfo' will remain
2154	 * valid until this query is canceled.
2155	 */
2156	query->addrinfo = addrinfo;
2157	TIME_NOW(&query->start);
2158
2159	/*
2160	 * If this is a TCP query, then we need to make a socket and
2161	 * a dispatch for it here.  Otherwise we use the resolver's
2162	 * shared dispatch.
2163	 */
2164	query->dispatchmgr = res->dispatchmgr;
2165	query->dispatch = NULL;
2166	query->exclusivesocket = false;
2167	query->tcpsocket = NULL;
2168	if (res->view->peers != NULL) {
2169		dns_peer_t *peer = NULL;
2170		isc_netaddr_t dstip;
2171		bool usetcp = false;
2172		isc_netaddr_fromsockaddr(&dstip, &addrinfo->sockaddr);
2173		result = dns_peerlist_peerbyaddr(res->view->peers, &dstip,
2174						 &peer);
2175		if (result == ISC_R_SUCCESS) {
2176			result = dns_peer_getquerysource(peer, &addr);
2177			if (result == ISC_R_SUCCESS) {
2178				have_addr = true;
2179			}
2180			result = dns_peer_getquerydscp(peer, &dscp);
2181			if (result == ISC_R_SUCCESS) {
2182				query->dscp = dscp;
2183			}
2184			result = dns_peer_getforcetcp(peer, &usetcp);
2185			if (result == ISC_R_SUCCESS && usetcp) {
2186				query->options |= DNS_FETCHOPT_TCP;
2187			}
2188		}
2189	}
2190
2191	dscp = -1;
2192	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
2193		int pf;
2194
2195		pf = isc_sockaddr_pf(&addrinfo->sockaddr);
2196		if (!have_addr) {
2197			switch (pf) {
2198			case PF_INET:
2199				result = dns_dispatch_getlocaladdress(
2200					res->dispatches4->dispatches[0], &addr);
2201				dscp = dns_resolver_getquerydscp4(fctx->res);
2202				break;
2203			case PF_INET6:
2204				result = dns_dispatch_getlocaladdress(
2205					res->dispatches6->dispatches[0], &addr);
2206				dscp = dns_resolver_getquerydscp6(fctx->res);
2207				break;
2208			default:
2209				result = ISC_R_NOTIMPLEMENTED;
2210				break;
2211			}
2212			if (result != ISC_R_SUCCESS) {
2213				goto cleanup_query;
2214			}
2215		}
2216		isc_sockaddr_setport(&addr, 0);
2217		if (query->dscp == -1) {
2218			query->dscp = dscp;
2219		}
2220
2221		result = isc_socket_create(res->socketmgr, pf,
2222					   isc_sockettype_tcp,
2223					   &query->tcpsocket);
2224		if (result != ISC_R_SUCCESS) {
2225			goto cleanup_query;
2226		}
2227
2228#ifndef BROKEN_TCP_BIND_BEFORE_CONNECT
2229		result = isc_socket_bind(query->tcpsocket, &addr, 0);
2230		if (result != ISC_R_SUCCESS) {
2231			goto cleanup_socket;
2232		}
2233#endif /* ifndef BROKEN_TCP_BIND_BEFORE_CONNECT */
2234
2235		/*
2236		 * A dispatch will be created once the connect succeeds.
2237		 */
2238	} else {
2239		if (have_addr) {
2240			unsigned int attrs, attrmask;
2241			attrs = DNS_DISPATCHATTR_UDP;
2242			switch (isc_sockaddr_pf(&addr)) {
2243			case AF_INET:
2244				attrs |= DNS_DISPATCHATTR_IPV4;
2245				dscp = dns_resolver_getquerydscp4(fctx->res);
2246				break;
2247			case AF_INET6:
2248				attrs |= DNS_DISPATCHATTR_IPV6;
2249				dscp = dns_resolver_getquerydscp6(fctx->res);
2250				break;
2251			default:
2252				result = ISC_R_NOTIMPLEMENTED;
2253				goto cleanup_query;
2254			}
2255			attrmask = DNS_DISPATCHATTR_UDP;
2256			attrmask |= DNS_DISPATCHATTR_TCP;
2257			attrmask |= DNS_DISPATCHATTR_IPV4;
2258			attrmask |= DNS_DISPATCHATTR_IPV6;
2259			result = dns_dispatch_getudp(
2260				res->dispatchmgr, res->socketmgr, res->taskmgr,
2261				&addr, 4096, 20000, 32768, 16411, 16433, attrs,
2262				attrmask, &query->dispatch);
2263			if (result != ISC_R_SUCCESS) {
2264				goto cleanup_query;
2265			}
2266		} else {
2267			switch (isc_sockaddr_pf(&addrinfo->sockaddr)) {
2268			case PF_INET:
2269				dns_dispatch_attach(
2270					dns_resolver_dispatchv4(res),
2271					&query->dispatch);
2272				query->exclusivesocket = res->exclusivev4;
2273				dscp = dns_resolver_getquerydscp4(fctx->res);
2274				break;
2275			case PF_INET6:
2276				dns_dispatch_attach(
2277					dns_resolver_dispatchv6(res),
2278					&query->dispatch);
2279				query->exclusivesocket = res->exclusivev6;
2280				dscp = dns_resolver_getquerydscp6(fctx->res);
2281				break;
2282			default:
2283				result = ISC_R_NOTIMPLEMENTED;
2284				goto cleanup_query;
2285			}
2286		}
2287
2288		if (query->dscp == -1) {
2289			query->dscp = dscp;
2290		}
2291		/*
2292		 * We should always have a valid dispatcher here.  If we
2293		 * don't support a protocol family, then its dispatcher
2294		 * will be NULL, but we shouldn't be finding addresses for
2295		 * protocol types we don't support, so the dispatcher
2296		 * we found should never be NULL.
2297		 */
2298		INSIST(query->dispatch != NULL);
2299	}
2300
2301	query->dispentry = NULL;
2302	query->fctx = fctx; /* reference added by caller */
2303	query->tsig = NULL;
2304	query->tsigkey = NULL;
2305	ISC_LINK_INIT(query, link);
2306	query->magic = QUERY_MAGIC;
2307
2308	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
2309		/*
2310		 * Connect to the remote server.
2311		 *
2312		 * XXXRTH  Should we attach to the socket?
2313		 */
2314		if (query->dscp != -1) {
2315			isc_socket_dscp(query->tcpsocket, query->dscp);
2316		}
2317		result = isc_socket_connect(query->tcpsocket,
2318					    &addrinfo->sockaddr, task,
2319					    resquery_connected, query);
2320		if (result != ISC_R_SUCCESS) {
2321			goto cleanup_socket;
2322		}
2323		query->connects++;
2324		QTRACE("connecting via TCP");
2325	} else {
2326		if (dns_adbentry_overquota(addrinfo->entry)) {
2327			goto cleanup_dispatch;
2328		}
2329
2330		/* Inform the ADB that we're starting a UDP fetch */
2331		dns_adb_beginudpfetch(fctx->adb, addrinfo);
2332
2333		result = resquery_send(query);
2334		if (result != ISC_R_SUCCESS) {
2335			goto cleanup_udpfetch;
2336		}
2337	}
2338
2339	fctx->querysent++;
2340
2341	ISC_LIST_APPEND(fctx->queries, query, link);
2342	bucketnum = fctx->bucketnum;
2343	LOCK(&res->buckets[bucketnum].lock);
2344	fctx->nqueries++;
2345	UNLOCK(&res->buckets[bucketnum].lock);
2346	if (isc_sockaddr_pf(&addrinfo->sockaddr) == PF_INET) {
2347		inc_stats(res, dns_resstatscounter_queryv4);
2348	} else {
2349		inc_stats(res, dns_resstatscounter_queryv6);
2350	}
2351	if (res->view->resquerystats != NULL) {
2352		dns_rdatatypestats_increment(res->view->resquerystats,
2353					     fctx->type);
2354	}
2355
2356	return (ISC_R_SUCCESS);
2357
2358cleanup_socket:
2359	isc_socket_detach(&query->tcpsocket);
2360
2361cleanup_udpfetch:
2362	if (!RESQUERY_CANCELED(query)) {
2363		if ((query->options & DNS_FETCHOPT_TCP) == 0) {
2364			/* Inform the ADB that we're ending a UDP fetch */
2365			dns_adb_endudpfetch(fctx->adb, addrinfo);
2366		}
2367	}
2368
2369cleanup_dispatch:
2370	if (query->dispatch != NULL) {
2371		dns_dispatch_detach(&query->dispatch);
2372	}
2373
2374cleanup_query:
2375	if (query->connects == 0) {
2376		query->magic = 0;
2377		dns_message_detach(&query->rmessage);
2378		isc_mem_put(fctx->mctx, query, sizeof(*query));
2379	}
2380
2381	RUNTIME_CHECK(fctx_stopidletimer(fctx) == ISC_R_SUCCESS);
2382
2383	return (result);
2384}
2385
2386static bool
2387bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
2388	isc_sockaddr_t *sa;
2389
2390	for (sa = ISC_LIST_HEAD(fctx->bad_edns); sa != NULL;
2391	     sa = ISC_LIST_NEXT(sa, link))
2392	{
2393		if (isc_sockaddr_equal(sa, address)) {
2394			return (true);
2395		}
2396	}
2397
2398	return (false);
2399}
2400
2401static void
2402add_bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
2403	isc_sockaddr_t *sa;
2404
2405#ifdef ENABLE_AFL
2406	if (dns_fuzzing_resolver) {
2407		return;
2408	}
2409#endif /* ifdef ENABLE_AFL */
2410	if (bad_edns(fctx, address)) {
2411		return;
2412	}
2413
2414	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
2415
2416	*sa = *address;
2417	ISC_LIST_INITANDAPPEND(fctx->bad_edns, sa, link);
2418}
2419
2420static struct tried *
2421triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
2422	struct tried *tried;
2423
2424	for (tried = ISC_LIST_HEAD(fctx->edns); tried != NULL;
2425	     tried = ISC_LIST_NEXT(tried, link))
2426	{
2427		if (isc_sockaddr_equal(&tried->addr, address)) {
2428			return (tried);
2429		}
2430	}
2431
2432	return (NULL);
2433}
2434
2435static void
2436add_triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
2437	struct tried *tried;
2438
2439	tried = triededns(fctx, address);
2440	if (tried != NULL) {
2441		tried->count++;
2442		return;
2443	}
2444
2445	tried = isc_mem_get(fctx->mctx, sizeof(*tried));
2446
2447	tried->addr = *address;
2448	tried->count = 1;
2449	ISC_LIST_INITANDAPPEND(fctx->edns, tried, link);
2450}
2451
2452static struct tried *
2453triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
2454	struct tried *tried;
2455
2456	for (tried = ISC_LIST_HEAD(fctx->edns512); tried != NULL;
2457	     tried = ISC_LIST_NEXT(tried, link))
2458	{
2459		if (isc_sockaddr_equal(&tried->addr, address)) {
2460			return (tried);
2461		}
2462	}
2463
2464	return (NULL);
2465}
2466
2467static void
2468add_triededns512(fetchctx_t *fctx, isc_sockaddr_t *address) {
2469	struct tried *tried;
2470
2471	tried = triededns512(fctx, address);
2472	if (tried != NULL) {
2473		tried->count++;
2474		return;
2475	}
2476
2477	tried = isc_mem_get(fctx->mctx, sizeof(*tried));
2478
2479	tried->addr = *address;
2480	tried->count = 1;
2481	ISC_LIST_INITANDAPPEND(fctx->edns512, tried, link);
2482}
2483
2484static size_t
2485addr2buf(void *buf, const size_t bufsize, const isc_sockaddr_t *sockaddr) {
2486	isc_netaddr_t netaddr;
2487	isc_netaddr_fromsockaddr(&netaddr, sockaddr);
2488	switch (netaddr.family) {
2489	case AF_INET:
2490		INSIST(bufsize >= 4);
2491		memmove(buf, &netaddr.type.in, 4);
2492		return (4);
2493	case AF_INET6:
2494		INSIST(bufsize >= 16);
2495		memmove(buf, &netaddr.type.in6, 16);
2496		return (16);
2497	default:
2498		UNREACHABLE();
2499	}
2500	return (0);
2501}
2502
2503static isc_socket_t *
2504query2sock(const resquery_t *query) {
2505	if (query->exclusivesocket) {
2506		return (dns_dispatch_getentrysocket(query->dispentry));
2507	} else {
2508		return (dns_dispatch_getsocket(query->dispatch));
2509	}
2510}
2511
2512static size_t
2513add_serveraddr(uint8_t *buf, const size_t bufsize, const resquery_t *query) {
2514	return (addr2buf(buf, bufsize, &query->addrinfo->sockaddr));
2515}
2516
2517/*
2518 * Client cookie is 8 octets.
2519 * Server cookie is [8..32] octets.
2520 */
2521#define CLIENT_COOKIE_SIZE 8U
2522#define COOKIE_BUFFER_SIZE (8U + 32U)
2523
2524static void
2525compute_cc(const resquery_t *query, uint8_t *cookie, const size_t len) {
2526	INSIST(len >= CLIENT_COOKIE_SIZE);
2527	STATIC_ASSERT(sizeof(query->fctx->res->view->secret) >=
2528			      ISC_SIPHASH24_KEY_LENGTH,
2529		      "The view->secret size can't fit SipHash 2-4 key length");
2530
2531	uint8_t buf[16] ISC_NONSTRING = { 0 };
2532	size_t buflen = add_serveraddr(buf, sizeof(buf), query);
2533
2534	uint8_t digest[ISC_SIPHASH24_TAG_LENGTH] ISC_NONSTRING = { 0 };
2535	isc_siphash24(query->fctx->res->view->secret, buf, buflen, digest);
2536	memmove(cookie, digest, CLIENT_COOKIE_SIZE);
2537}
2538
2539static isc_result_t
2540issecuredomain(dns_view_t *view, const dns_name_t *name, dns_rdatatype_t type,
2541	       isc_stdtime_t now, bool checknta, bool *ntap, bool *issecure) {
2542	dns_name_t suffix;
2543	unsigned int labels;
2544
2545	/*
2546	 * For DS variants we need to check fom the parent domain,
2547	 * since there may be a negative trust anchor for the name,
2548	 * while the enclosing domain where the DS record lives is
2549	 * under a secure entry point.
2550	 */
2551	labels = dns_name_countlabels(name);
2552	if (dns_rdatatype_atparent(type) && labels > 1) {
2553		dns_name_init(&suffix, NULL);
2554		dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
2555		name = &suffix;
2556	}
2557
2558	return (dns_view_issecuredomain(view, name, now, checknta, ntap,
2559					issecure));
2560}
2561
2562static isc_result_t
2563resquery_send(resquery_t *query) {
2564	fetchctx_t *fctx;
2565	isc_result_t result;
2566	dns_name_t *qname = NULL;
2567	dns_rdataset_t *qrdataset = NULL;
2568	isc_region_t r;
2569	dns_resolver_t *res;
2570	isc_task_t *task;
2571	isc_socket_t *sock;
2572	isc_buffer_t tcpbuffer;
2573	isc_sockaddr_t *address;
2574	isc_buffer_t *buffer;
2575	isc_netaddr_t ipaddr;
2576	dns_tsigkey_t *tsigkey = NULL;
2577	dns_peer_t *peer = NULL;
2578	bool useedns;
2579	dns_compress_t cctx;
2580	bool cleanup_cctx = false;
2581	bool secure_domain;
2582	bool tcp = ((query->options & DNS_FETCHOPT_TCP) != 0);
2583	dns_ednsopt_t ednsopts[DNS_EDNSOPTIONS];
2584	unsigned ednsopt = 0;
2585	uint16_t hint = 0, udpsize = 0; /* No EDNS */
2586#ifdef HAVE_DNSTAP
2587	isc_sockaddr_t localaddr, *la = NULL;
2588	unsigned char zone[DNS_NAME_MAXWIRE];
2589	dns_dtmsgtype_t dtmsgtype;
2590	isc_region_t zr;
2591	isc_buffer_t zb;
2592#endif /* HAVE_DNSTAP */
2593
2594	fctx = query->fctx;
2595	QTRACE("send");
2596
2597	res = fctx->res;
2598	task = res->buckets[fctx->bucketnum].task;
2599	address = NULL;
2600
2601	if (tcp) {
2602		/*
2603		 * Reserve space for the TCP message length.
2604		 */
2605		isc_buffer_init(&tcpbuffer, query->data, sizeof(query->data));
2606		isc_buffer_init(&query->buffer, query->data + 2,
2607				sizeof(query->data) - 2);
2608		buffer = &tcpbuffer;
2609	} else {
2610		isc_buffer_init(&query->buffer, query->data,
2611				sizeof(query->data));
2612		buffer = &query->buffer;
2613	}
2614
2615	result = dns_message_gettempname(fctx->qmessage, &qname);
2616	if (result != ISC_R_SUCCESS) {
2617		goto cleanup_temps;
2618	}
2619	result = dns_message_gettemprdataset(fctx->qmessage, &qrdataset);
2620	if (result != ISC_R_SUCCESS) {
2621		goto cleanup_temps;
2622	}
2623
2624	/*
2625	 * Get a query id from the dispatch.
2626	 */
2627	result = dns_dispatch_addresponse(query->dispatch, 0,
2628					  &query->addrinfo->sockaddr, task,
2629					  resquery_response, query, &query->id,
2630					  &query->dispentry, res->socketmgr);
2631	if (result != ISC_R_SUCCESS) {
2632		goto cleanup_temps;
2633	}
2634
2635	fctx->qmessage->opcode = dns_opcode_query;
2636
2637	/*
2638	 * Set up question.
2639	 */
2640	dns_name_clone(&fctx->name, qname);
2641	dns_rdataset_makequestion(qrdataset, res->rdclass, fctx->type);
2642	ISC_LIST_APPEND(qname->list, qrdataset, link);
2643	dns_message_addname(fctx->qmessage, qname, DNS_SECTION_QUESTION);
2644	qname = NULL;
2645	qrdataset = NULL;
2646
2647	/*
2648	 * Set RD if the client has requested that we do a recursive query,
2649	 * or if we're sending to a forwarder.
2650	 */
2651	if ((query->options & DNS_FETCHOPT_RECURSIVE) != 0 ||
2652	    ISFORWARDER(query->addrinfo))
2653	{
2654		fctx->qmessage->flags |= DNS_MESSAGEFLAG_RD;
2655	}
2656
2657	/*
2658	 * Set CD if the client says not to validate, or if the
2659	 * question is under a secure entry point and this is a
2660	 * recursive/forward query -- unless the client said not to.
2661	 */
2662	if ((query->options & DNS_FETCHOPT_NOCDFLAG) != 0) {
2663		/* Do nothing */
2664	} else if ((query->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
2665		fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
2666	} else if (res->view->enablevalidation &&
2667		   ((fctx->qmessage->flags & DNS_MESSAGEFLAG_RD) != 0))
2668	{
2669		bool checknta = ((query->options & DNS_FETCHOPT_NONTA) == 0);
2670		bool ntacovered = false;
2671		result = issecuredomain(res->view, &fctx->name, fctx->type,
2672					isc_time_seconds(&query->start),
2673					checknta, &ntacovered, &secure_domain);
2674		if (result != ISC_R_SUCCESS) {
2675			secure_domain = false;
2676		}
2677		if (secure_domain ||
2678		    (ISFORWARDER(query->addrinfo) && ntacovered))
2679		{
2680			fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
2681		}
2682	}
2683
2684	/*
2685	 * We don't have to set opcode because it defaults to query.
2686	 */
2687	fctx->qmessage->id = query->id;
2688
2689	/*
2690	 * Convert the question to wire format.
2691	 */
2692	result = dns_compress_init(&cctx, -1, fctx->res->mctx);
2693	if (result != ISC_R_SUCCESS) {
2694		goto cleanup_message;
2695	}
2696	cleanup_cctx = true;
2697
2698	result = dns_message_renderbegin(fctx->qmessage, &cctx, &query->buffer);
2699	if (result != ISC_R_SUCCESS) {
2700		goto cleanup_message;
2701	}
2702
2703	result = dns_message_rendersection(fctx->qmessage, DNS_SECTION_QUESTION,
2704					   0);
2705	if (result != ISC_R_SUCCESS) {
2706		goto cleanup_message;
2707	}
2708
2709	peer = NULL;
2710	isc_netaddr_fromsockaddr(&ipaddr, &query->addrinfo->sockaddr);
2711	(void)dns_peerlist_peerbyaddr(fctx->res->view->peers, &ipaddr, &peer);
2712
2713	/*
2714	 * The ADB does not know about servers with "edns no".  Check this,
2715	 * and then inform the ADB for future use.
2716	 */
2717	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0 &&
2718	    peer != NULL &&
2719	    dns_peer_getsupportedns(peer, &useedns) == ISC_R_SUCCESS &&
2720	    !useedns)
2721	{
2722		query->options |= DNS_FETCHOPT_NOEDNS0;
2723		dns_adb_changeflags(fctx->adb, query->addrinfo,
2724				    DNS_FETCHOPT_NOEDNS0, DNS_FETCHOPT_NOEDNS0);
2725	}
2726
2727	/* Sync NOEDNS0 flag in addrinfo->flags and options now. */
2728	if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) != 0) {
2729		query->options |= DNS_FETCHOPT_NOEDNS0;
2730	}
2731
2732	if (fctx->timeout && (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
2733		isc_sockaddr_t *sockaddr = &query->addrinfo->sockaddr;
2734		struct tried *tried;
2735
2736		if ((tried = triededns(fctx, sockaddr)) != NULL) {
2737			if (tried->count == 1U) {
2738				hint = dns_adb_getudpsize(fctx->adb,
2739							  query->addrinfo);
2740			} else if (tried->count >= 2U) {
2741				query->options |= DNS_FETCHOPT_EDNS512;
2742				fctx->reason = "reducing the advertised EDNS "
2743					       "UDP packet size to 512 octets";
2744			}
2745		}
2746	}
2747	fctx->timeout = false;
2748
2749	/*
2750	 * Use EDNS0, unless the caller doesn't want it, or we know that the
2751	 * remote server doesn't like it.
2752	 */
2753	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
2754		if ((query->addrinfo->flags & DNS_FETCHOPT_NOEDNS0) == 0) {
2755			unsigned int version = DNS_EDNS_VERSION;
2756			unsigned int flags = query->addrinfo->flags;
2757			bool reqnsid = res->view->requestnsid;
2758			bool sendcookie = res->view->sendcookie;
2759			bool tcpkeepalive = false;
2760			unsigned char cookie[COOKIE_BUFFER_SIZE];
2761			uint16_t padding = 0;
2762
2763			if ((flags & FCTX_ADDRINFO_EDNSOK) != 0 &&
2764			    (query->options & DNS_FETCHOPT_EDNS512) == 0)
2765			{
2766				udpsize = dns_adb_probesize(fctx->adb,
2767							    query->addrinfo,
2768							    fctx->timeouts);
2769				if (udpsize > res->udpsize) {
2770					udpsize = res->udpsize;
2771				}
2772			}
2773
2774			if (peer != NULL) {
2775				(void)dns_peer_getudpsize(peer, &udpsize);
2776			}
2777
2778			if (udpsize == 0U && res->udpsize == 512U) {
2779				udpsize = 512;
2780			}
2781
2782			/*
2783			 * Was the size forced to 512 in the configuration?
2784			 */
2785			if (udpsize == 512U) {
2786				query->options |= DNS_FETCHOPT_EDNS512;
2787			}
2788
2789			/*
2790			 * We have talked to this server before.
2791			 */
2792			if (hint != 0U) {
2793				udpsize = hint;
2794			}
2795
2796			/*
2797			 * We know nothing about the peer's capabilities
2798			 * so start with minimal EDNS UDP size.
2799			 */
2800			if (udpsize == 0U) {
2801				udpsize = 512;
2802			}
2803
2804			if ((flags & DNS_FETCHOPT_EDNSVERSIONSET) != 0) {
2805				version = flags & DNS_FETCHOPT_EDNSVERSIONMASK;
2806				version >>= DNS_FETCHOPT_EDNSVERSIONSHIFT;
2807			}
2808
2809			/* Request NSID/COOKIE/VERSION for current peer? */
2810			if (peer != NULL) {
2811				uint8_t ednsversion;
2812				(void)dns_peer_getrequestnsid(peer, &reqnsid);
2813				(void)dns_peer_getsendcookie(peer, &sendcookie);
2814				result = dns_peer_getednsversion(peer,
2815								 &ednsversion);
2816				if (result == ISC_R_SUCCESS &&
2817				    ednsversion < version)
2818				{
2819					version = ednsversion;
2820				}
2821			}
2822			if (NOCOOKIE(query->addrinfo)) {
2823				sendcookie = false;
2824			}
2825			if (reqnsid) {
2826				INSIST(ednsopt < DNS_EDNSOPTIONS);
2827				ednsopts[ednsopt].code = DNS_OPT_NSID;
2828				ednsopts[ednsopt].length = 0;
2829				ednsopts[ednsopt].value = NULL;
2830				ednsopt++;
2831			}
2832			if (sendcookie) {
2833				INSIST(ednsopt < DNS_EDNSOPTIONS);
2834				ednsopts[ednsopt].code = DNS_OPT_COOKIE;
2835				ednsopts[ednsopt].length =
2836					(uint16_t)dns_adb_getcookie(
2837						fctx->adb, query->addrinfo,
2838						cookie, sizeof(cookie));
2839				if (ednsopts[ednsopt].length != 0) {
2840					ednsopts[ednsopt].value = cookie;
2841					inc_stats(
2842						fctx->res,
2843						dns_resstatscounter_cookieout);
2844				} else {
2845					compute_cc(query, cookie,
2846						   CLIENT_COOKIE_SIZE);
2847					ednsopts[ednsopt].value = cookie;
2848					ednsopts[ednsopt].length =
2849						CLIENT_COOKIE_SIZE;
2850					inc_stats(
2851						fctx->res,
2852						dns_resstatscounter_cookienew);
2853				}
2854				ednsopt++;
2855			}
2856
2857			/* Add TCP keepalive option if appropriate */
2858			if ((peer != NULL) && tcp) {
2859				(void)dns_peer_gettcpkeepalive(peer,
2860							       &tcpkeepalive);
2861			}
2862			if (tcpkeepalive) {
2863				INSIST(ednsopt < DNS_EDNSOPTIONS);
2864				ednsopts[ednsopt].code = DNS_OPT_TCP_KEEPALIVE;
2865				ednsopts[ednsopt].length = 0;
2866				ednsopts[ednsopt].value = NULL;
2867				ednsopt++;
2868			}
2869
2870			/* Add PAD for current peer? Require TCP for now */
2871			if ((peer != NULL) && tcp) {
2872				(void)dns_peer_getpadding(peer, &padding);
2873			}
2874			if (padding != 0) {
2875				INSIST(ednsopt < DNS_EDNSOPTIONS);
2876				ednsopts[ednsopt].code = DNS_OPT_PAD;
2877				ednsopts[ednsopt].length = 0;
2878				ednsopt++;
2879				dns_message_setpadding(fctx->qmessage, padding);
2880			}
2881
2882			query->ednsversion = version;
2883			result = fctx_addopt(fctx->qmessage, version, udpsize,
2884					     ednsopts, ednsopt);
2885			if (reqnsid && result == ISC_R_SUCCESS) {
2886				query->options |= DNS_FETCHOPT_WANTNSID;
2887			} else if (result != ISC_R_SUCCESS) {
2888				/*
2889				 * We couldn't add the OPT, but we'll press on.
2890				 * We're not using EDNS0, so set the NOEDNS0
2891				 * bit.
2892				 */
2893				query->options |= DNS_FETCHOPT_NOEDNS0;
2894				query->ednsversion = -1;
2895				udpsize = 0;
2896			}
2897		} else {
2898			/*
2899			 * We know this server doesn't like EDNS0, so we
2900			 * won't use it.  Set the NOEDNS0 bit since we're
2901			 * not using EDNS0.
2902			 */
2903			query->options |= DNS_FETCHOPT_NOEDNS0;
2904			query->ednsversion = -1;
2905		}
2906	} else {
2907		query->ednsversion = -1;
2908	}
2909
2910	/*
2911	 * Record the UDP EDNS size chosen.
2912	 */
2913	query->udpsize = udpsize;
2914
2915	/*
2916	 * If we need EDNS0 to do this query and aren't using it, we lose.
2917	 */
2918	if (NEEDEDNS0(fctx) && (query->options & DNS_FETCHOPT_NOEDNS0) != 0) {
2919		result = DNS_R_SERVFAIL;
2920		goto cleanup_message;
2921	}
2922
2923	if (udpsize > 512U) {
2924		add_triededns(fctx, &query->addrinfo->sockaddr);
2925	}
2926
2927	if (udpsize == 512U) {
2928		add_triededns512(fctx, &query->addrinfo->sockaddr);
2929	}
2930
2931	/*
2932	 * Clear CD if EDNS is not in use.
2933	 */
2934	if ((query->options & DNS_FETCHOPT_NOEDNS0) != 0) {
2935		fctx->qmessage->flags &= ~DNS_MESSAGEFLAG_CD;
2936	}
2937
2938	/*
2939	 * Add TSIG record tailored to the current recipient.
2940	 */
2941	result = dns_view_getpeertsig(fctx->res->view, &ipaddr, &tsigkey);
2942	if (result != ISC_R_SUCCESS && result != ISC_R_NOTFOUND) {
2943		goto cleanup_message;
2944	}
2945
2946	if (tsigkey != NULL) {
2947		result = dns_message_settsigkey(fctx->qmessage, tsigkey);
2948		dns_tsigkey_detach(&tsigkey);
2949		if (result != ISC_R_SUCCESS) {
2950			goto cleanup_message;
2951		}
2952	}
2953
2954	result = dns_message_rendersection(fctx->qmessage,
2955					   DNS_SECTION_ADDITIONAL, 0);
2956	if (result != ISC_R_SUCCESS) {
2957		goto cleanup_message;
2958	}
2959
2960	result = dns_message_renderend(fctx->qmessage);
2961	if (result != ISC_R_SUCCESS) {
2962		goto cleanup_message;
2963	}
2964
2965#ifdef HAVE_DNSTAP
2966	memset(&zr, 0, sizeof(zr));
2967	isc_buffer_init(&zb, zone, sizeof(zone));
2968	dns_compress_setmethods(&cctx, DNS_COMPRESS_NONE);
2969	result = dns_name_towire(&fctx->domain, &cctx, &zb);
2970	if (result == ISC_R_SUCCESS) {
2971		isc_buffer_usedregion(&zb, &zr);
2972	}
2973#endif /* HAVE_DNSTAP */
2974
2975	dns_compress_invalidate(&cctx);
2976	cleanup_cctx = false;
2977
2978	if (dns_message_gettsigkey(fctx->qmessage) != NULL) {
2979		dns_tsigkey_attach(dns_message_gettsigkey(fctx->qmessage),
2980				   &query->tsigkey);
2981		result = dns_message_getquerytsig(
2982			fctx->qmessage, fctx->res->mctx, &query->tsig);
2983		if (result != ISC_R_SUCCESS) {
2984			goto cleanup_message;
2985		}
2986	}
2987
2988	/*
2989	 * If using TCP, write the length of the message at the beginning
2990	 * of the buffer.
2991	 */
2992	if (tcp) {
2993		isc_buffer_usedregion(&query->buffer, &r);
2994		isc_buffer_putuint16(&tcpbuffer, (uint16_t)r.length);
2995		isc_buffer_add(&tcpbuffer, r.length);
2996	}
2997
2998	/*
2999	 * Log the outgoing packet.
3000	 */
3001	dns_message_logfmtpacket(
3002		fctx->qmessage, "sending packet to", &query->addrinfo->sockaddr,
3003		DNS_LOGCATEGORY_RESOLVER, DNS_LOGMODULE_PACKETS,
3004		&dns_master_style_comment, ISC_LOG_DEBUG(11), fctx->res->mctx);
3005
3006	/*
3007	 * We're now done with the query message.
3008	 */
3009	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
3010
3011	sock = query2sock(query);
3012
3013	/*
3014	 * Send the query!
3015	 */
3016	if (!tcp) {
3017		address = &query->addrinfo->sockaddr;
3018		if (query->exclusivesocket) {
3019			result = isc_socket_connect(sock, address, task,
3020						    resquery_udpconnected,
3021						    query);
3022			if (result != ISC_R_SUCCESS) {
3023				goto cleanup_message;
3024			}
3025			query->connects++;
3026		}
3027	}
3028	isc_buffer_usedregion(buffer, &r);
3029
3030	/*
3031	 * XXXRTH  Make sure we don't send to ourselves!  We should probably
3032	 *		prune out these addresses when we get them from the ADB.
3033	 */
3034	memset(&query->sendevent, 0, sizeof(query->sendevent));
3035	ISC_EVENT_INIT(&query->sendevent, sizeof(query->sendevent), 0, NULL,
3036		       ISC_SOCKEVENT_SENDDONE, resquery_senddone, query, NULL,
3037		       NULL, NULL);
3038
3039	if (query->dscp == -1) {
3040		query->sendevent.attributes &= ~ISC_SOCKEVENTATTR_DSCP;
3041		query->sendevent.dscp = 0;
3042	} else {
3043		query->sendevent.attributes |= ISC_SOCKEVENTATTR_DSCP;
3044		query->sendevent.dscp = query->dscp;
3045		if (tcp) {
3046			isc_socket_dscp(sock, query->dscp);
3047		}
3048	}
3049
3050	result = isc_socket_sendto2(sock, &r, task, address, NULL,
3051				    &query->sendevent, 0);
3052	INSIST(result == ISC_R_SUCCESS);
3053
3054	query->sends++;
3055
3056	QTRACE("sent");
3057
3058#ifdef HAVE_DNSTAP
3059	/*
3060	 * Log the outgoing query via dnstap.
3061	 */
3062	if ((fctx->qmessage->flags & DNS_MESSAGEFLAG_RD) != 0) {
3063		dtmsgtype = DNS_DTTYPE_FQ;
3064	} else {
3065		dtmsgtype = DNS_DTTYPE_RQ;
3066	}
3067
3068	result = isc_socket_getsockname(sock, &localaddr);
3069	if (result == ISC_R_SUCCESS) {
3070		la = &localaddr;
3071	}
3072
3073	dns_dt_send(fctx->res->view, dtmsgtype, la, &query->addrinfo->sockaddr,
3074		    tcp, &zr, &query->start, NULL, &query->buffer);
3075#endif /* HAVE_DNSTAP */
3076
3077	return (ISC_R_SUCCESS);
3078
3079cleanup_message:
3080	if (cleanup_cctx) {
3081		dns_compress_invalidate(&cctx);
3082	}
3083
3084	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
3085
3086	/*
3087	 * Stop the dispatcher from listening.
3088	 */
3089	dns_dispatch_removeresponse(&query->dispentry, NULL);
3090
3091cleanup_temps:
3092	if (qname != NULL) {
3093		dns_message_puttempname(fctx->qmessage, &qname);
3094	}
3095	if (qrdataset != NULL) {
3096		dns_message_puttemprdataset(fctx->qmessage, &qrdataset);
3097	}
3098
3099	return (result);
3100}
3101
3102static void
3103resquery_connected(isc_task_t *task, isc_event_t *event) {
3104	isc_socketevent_t *sevent = (isc_socketevent_t *)event;
3105	resquery_t *query = event->ev_arg;
3106	bool retry = false;
3107	isc_interval_t interval;
3108	isc_result_t result;
3109	unsigned int attrs;
3110	fetchctx_t *fctx;
3111
3112	REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT);
3113	REQUIRE(VALID_QUERY(query));
3114
3115	QTRACE("connected");
3116
3117	UNUSED(task);
3118
3119	/*
3120	 * XXXRTH
3121	 *
3122	 * Currently we don't wait for the connect event before retrying
3123	 * a query.  This means that if we get really behind, we may end
3124	 * up doing extra work!
3125	 */
3126
3127	query->connects--;
3128	fctx = query->fctx;
3129
3130	if (RESQUERY_CANCELED(query)) {
3131		/*
3132		 * This query was canceled while the connect() was in
3133		 * progress.
3134		 */
3135		isc_socket_detach(&query->tcpsocket);
3136		resquery_destroy(&query);
3137	} else {
3138		switch (sevent->result) {
3139		case ISC_R_SUCCESS:
3140
3141			/*
3142			 * Extend the idle timer for TCP.  Half of
3143			 * "resolver-query-timeout" will hopefully be long
3144			 * enough for a TCP connection to be established, a
3145			 * single DNS request to be sent, and the response
3146			 * received.
3147			 */
3148			isc_interval_set(&interval,
3149					 fctx->res->query_timeout / 1000 / 2,
3150					 0);
3151			result = fctx_startidletimer(query->fctx, &interval);
3152			if (result != ISC_R_SUCCESS) {
3153				FCTXTRACE("query canceled: idle timer failed; "
3154					  "responding");
3155
3156				fctx_cancelquery(&query, NULL, NULL, false,
3157						 false);
3158				fctx_done(fctx, result, __LINE__);
3159				break;
3160			}
3161			/*
3162			 * We are connected.  Create a dispatcher and
3163			 * send the query.
3164			 */
3165			attrs = 0;
3166			attrs |= DNS_DISPATCHATTR_TCP;
3167			attrs |= DNS_DISPATCHATTR_PRIVATE;
3168			attrs |= DNS_DISPATCHATTR_CONNECTED;
3169			if (isc_sockaddr_pf(&query->addrinfo->sockaddr) ==
3170			    AF_INET)
3171			{
3172				attrs |= DNS_DISPATCHATTR_IPV4;
3173			} else {
3174				attrs |= DNS_DISPATCHATTR_IPV6;
3175			}
3176			attrs |= DNS_DISPATCHATTR_MAKEQUERY;
3177
3178			result = dns_dispatch_createtcp(
3179				query->dispatchmgr, query->tcpsocket,
3180				query->fctx->res->taskmgr, NULL, NULL, 4096, 2,
3181				1, 1, 3, attrs, &query->dispatch);
3182
3183			/*
3184			 * Regardless of whether dns_dispatch_create()
3185			 * succeeded or not, we don't need our reference
3186			 * to the socket anymore.
3187			 */
3188			isc_socket_detach(&query->tcpsocket);
3189
3190			if (result == ISC_R_SUCCESS) {
3191				result = resquery_send(query);
3192			}
3193
3194			if (result != ISC_R_SUCCESS) {
3195				FCTXTRACE("query canceled: "
3196					  "resquery_send() failed; responding");
3197
3198				fctx_cancelquery(&query, NULL, NULL, false,
3199						 false);
3200				fctx_done(fctx, result, __LINE__);
3201			}
3202			break;
3203
3204		case ISC_R_NETUNREACH:
3205		case ISC_R_HOSTUNREACH:
3206		case ISC_R_CONNREFUSED:
3207		case ISC_R_NOPERM:
3208		case ISC_R_ADDRNOTAVAIL:
3209		case ISC_R_CONNECTIONRESET:
3210			FCTXTRACE3("query canceled in connected(): "
3211				   "no route to host; no response",
3212				   sevent->result);
3213
3214			/*
3215			 * No route to remote.
3216			 */
3217			isc_socket_detach(&query->tcpsocket);
3218			/*
3219			 * Do not query this server again in this fetch context
3220			 * if we already tried reducing the advertised EDNS UDP
3221			 * payload size to 512 bytes and the server is
3222			 * unavailable over TCP.  This prevents query loops
3223			 * lasting until the fetch context restart limit is
3224			 * reached when attempting to get answers whose size
3225			 * exceeds 512 bytes from broken servers.
3226			 */
3227			if ((query->options & DNS_FETCHOPT_EDNS512) != 0) {
3228				add_bad(fctx, query->rmessage, query->addrinfo,
3229					sevent->result, badns_unreachable);
3230			}
3231			fctx_cancelquery(&query, NULL, NULL, true, false);
3232			retry = true;
3233			break;
3234
3235		default:
3236			FCTXTRACE3("query canceled in connected() due to "
3237				   "unexpected event result; responding",
3238				   sevent->result);
3239
3240			isc_socket_detach(&query->tcpsocket);
3241			fctx_cancelquery(&query, NULL, NULL, false, false);
3242			break;
3243		}
3244	}
3245
3246	isc_event_free(&event);
3247
3248	if (retry) {
3249		/*
3250		 * Behave as if the idle timer has expired.  For TCP
3251		 * connections this may not actually reflect the latest timer.
3252		 */
3253		FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
3254		result = fctx_stopidletimer(fctx);
3255		if (result != ISC_R_SUCCESS) {
3256			fctx_done(fctx, result, __LINE__);
3257		} else {
3258			fctx_try(fctx, true, false);
3259		}
3260	}
3261}
3262
3263static void
3264fctx_finddone(isc_task_t *task, isc_event_t *event) {
3265	fetchctx_t *fctx;
3266	dns_adbfind_t *find;
3267	dns_resolver_t *res;
3268	bool want_try = false;
3269	bool want_done = false;
3270	bool bucket_empty = false;
3271	unsigned int bucketnum;
3272	bool dodestroy = false;
3273
3274	find = event->ev_sender;
3275	fctx = event->ev_arg;
3276	REQUIRE(VALID_FCTX(fctx));
3277	res = fctx->res;
3278
3279	UNUSED(task);
3280
3281	FCTXTRACE("finddone");
3282
3283	bucketnum = fctx->bucketnum;
3284	LOCK(&res->buckets[bucketnum].lock);
3285
3286	INSIST(fctx->pending > 0);
3287	fctx->pending--;
3288
3289	if (ADDRWAIT(fctx)) {
3290		/*
3291		 * The fetch is waiting for a name to be found.
3292		 */
3293		INSIST(!SHUTTINGDOWN(fctx));
3294		if (event->ev_type == DNS_EVENT_ADBMOREADDRESSES) {
3295			FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
3296			want_try = true;
3297		} else {
3298			fctx->findfail++;
3299			if (fctx->pending == 0) {
3300				/*
3301				 * We've got nothing else to wait for and don't
3302				 * know the answer.  There's nothing to do but
3303				 * fail the fctx.
3304				 */
3305				FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
3306				want_done = true;
3307			}
3308		}
3309	} else if (SHUTTINGDOWN(fctx) && fctx->pending == 0 &&
3310		   fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators))
3311	{
3312		if (isc_refcount_current(&fctx->references) == 0) {
3313			bucket_empty = fctx_unlink(fctx);
3314			dodestroy = true;
3315		}
3316	}
3317	UNLOCK(&res->buckets[bucketnum].lock);
3318
3319	isc_event_free(&event);
3320	dns_adb_destroyfind(&find);
3321
3322	if (want_try) {
3323		fctx_try(fctx, true, false);
3324	} else if (want_done) {
3325		FCTXTRACE("fetch failed in finddone(); return ISC_R_FAILURE");
3326		fctx_done(fctx, ISC_R_FAILURE, __LINE__);
3327	} else if (dodestroy) {
3328		fctx_destroy(fctx);
3329		if (bucket_empty) {
3330			empty_bucket(res);
3331		}
3332	}
3333}
3334
3335static bool
3336bad_server(fetchctx_t *fctx, isc_sockaddr_t *address) {
3337	isc_sockaddr_t *sa;
3338
3339	for (sa = ISC_LIST_HEAD(fctx->bad); sa != NULL;
3340	     sa = ISC_LIST_NEXT(sa, link))
3341	{
3342		if (isc_sockaddr_equal(sa, address)) {
3343			return (true);
3344		}
3345	}
3346
3347	return (false);
3348}
3349
3350static bool
3351mark_bad(fetchctx_t *fctx) {
3352	dns_adbfind_t *curr;
3353	dns_adbaddrinfo_t *addrinfo;
3354	bool all_bad = true;
3355
3356#ifdef ENABLE_AFL
3357	if (dns_fuzzing_resolver) {
3358		return (false);
3359	}
3360#endif /* ifdef ENABLE_AFL */
3361
3362	/*
3363	 * Mark all known bad servers, so we don't try to talk to them
3364	 * again.
3365	 */
3366
3367	/*
3368	 * Mark any bad nameservers.
3369	 */
3370	for (curr = ISC_LIST_HEAD(fctx->finds); curr != NULL;
3371	     curr = ISC_LIST_NEXT(curr, publink))
3372	{
3373		for (addrinfo = ISC_LIST_HEAD(curr->list); addrinfo != NULL;
3374		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
3375		{
3376			if (bad_server(fctx, &addrinfo->sockaddr)) {
3377				addrinfo->flags |= FCTX_ADDRINFO_MARK;
3378			} else {
3379				all_bad = false;
3380			}
3381		}
3382	}
3383
3384	/*
3385	 * Mark any bad forwarders.
3386	 */
3387	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs); addrinfo != NULL;
3388	     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
3389	{
3390		if (bad_server(fctx, &addrinfo->sockaddr)) {
3391			addrinfo->flags |= FCTX_ADDRINFO_MARK;
3392		} else {
3393			all_bad = false;
3394		}
3395	}
3396
3397	/*
3398	 * Mark any bad alternates.
3399	 */
3400	for (curr = ISC_LIST_HEAD(fctx->altfinds); curr != NULL;
3401	     curr = ISC_LIST_NEXT(curr, publink))
3402	{
3403		for (addrinfo = ISC_LIST_HEAD(curr->list); addrinfo != NULL;
3404		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
3405		{
3406			if (bad_server(fctx, &addrinfo->sockaddr)) {
3407				addrinfo->flags |= FCTX_ADDRINFO_MARK;
3408			} else {
3409				all_bad = false;
3410			}
3411		}
3412	}
3413
3414	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs); addrinfo != NULL;
3415	     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
3416	{
3417		if (bad_server(fctx, &addrinfo->sockaddr)) {
3418			addrinfo->flags |= FCTX_ADDRINFO_MARK;
3419		} else {
3420			all_bad = false;
3421		}
3422	}
3423
3424	return (all_bad);
3425}
3426
3427static void
3428add_bad(fetchctx_t *fctx, dns_message_t *rmessage, dns_adbaddrinfo_t *addrinfo,
3429	isc_result_t reason, badnstype_t badtype) {
3430	char namebuf[DNS_NAME_FORMATSIZE];
3431	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3432	char classbuf[64];
3433	char typebuf[64];
3434	char code[64];
3435	isc_buffer_t b;
3436	isc_sockaddr_t *sa;
3437	const char *spc = "";
3438	isc_sockaddr_t *address = &addrinfo->sockaddr;
3439
3440#ifdef ENABLE_AFL
3441	if (dns_fuzzing_resolver) {
3442		return;
3443	}
3444#endif /* ifdef ENABLE_AFL */
3445
3446	if (reason == DNS_R_LAME) {
3447		fctx->lamecount++;
3448	} else {
3449		switch (badtype) {
3450		case badns_unreachable:
3451			fctx->neterr++;
3452			break;
3453		case badns_response:
3454			fctx->badresp++;
3455			break;
3456		case badns_validation:
3457			break; /* counted as 'valfail' */
3458		case badns_forwarder:
3459			/*
3460			 * We were called to prevent the given forwarder from
3461			 * being used again for this fetch context.
3462			 */
3463			break;
3464		}
3465	}
3466
3467	if (bad_server(fctx, address)) {
3468		/*
3469		 * We already know this server is bad.
3470		 */
3471		return;
3472	}
3473
3474	FCTXTRACE("add_bad");
3475
3476	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
3477	*sa = *address;
3478	ISC_LIST_INITANDAPPEND(fctx->bad, sa, link);
3479
3480	if (reason == DNS_R_LAME) { /* already logged */
3481		return;
3482	}
3483
3484	if (reason == DNS_R_UNEXPECTEDRCODE &&
3485	    rmessage->rcode == dns_rcode_servfail && ISFORWARDER(addrinfo))
3486	{
3487		return;
3488	}
3489
3490	if (reason == DNS_R_UNEXPECTEDRCODE) {
3491		isc_buffer_init(&b, code, sizeof(code) - 1);
3492		dns_rcode_totext(rmessage->rcode, &b);
3493		code[isc_buffer_usedlength(&b)] = '\0';
3494		spc = " ";
3495	} else if (reason == DNS_R_UNEXPECTEDOPCODE) {
3496		isc_buffer_init(&b, code, sizeof(code) - 1);
3497		dns_opcode_totext((dns_opcode_t)rmessage->opcode, &b);
3498		code[isc_buffer_usedlength(&b)] = '\0';
3499		spc = " ";
3500	} else {
3501		code[0] = '\0';
3502	}
3503	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
3504	dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
3505	dns_rdataclass_format(fctx->res->rdclass, classbuf, sizeof(classbuf));
3506	isc_sockaddr_format(address, addrbuf, sizeof(addrbuf));
3507	isc_log_write(
3508		dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS, DNS_LOGMODULE_RESOLVER,
3509		ISC_LOG_INFO, "%s%s%s resolving '%s/%s/%s': %s", code, spc,
3510		dns_result_totext(reason), namebuf, typebuf, classbuf, addrbuf);
3511}
3512
3513/*
3514 * Sort addrinfo list by RTT.
3515 */
3516static void
3517sort_adbfind(dns_adbfind_t *find, unsigned int bias) {
3518	dns_adbaddrinfo_t *best, *curr;
3519	dns_adbaddrinfolist_t sorted;
3520	unsigned int best_srtt, curr_srtt;
3521
3522	/* Lame N^2 bubble sort. */
3523	ISC_LIST_INIT(sorted);
3524	while (!ISC_LIST_EMPTY(find->list)) {
3525		best = ISC_LIST_HEAD(find->list);
3526		best_srtt = best->srtt;
3527		if (isc_sockaddr_pf(&best->sockaddr) != AF_INET6) {
3528			best_srtt += bias;
3529		}
3530		curr = ISC_LIST_NEXT(best, publink);
3531		while (curr != NULL) {
3532			curr_srtt = curr->srtt;
3533			if (isc_sockaddr_pf(&curr->sockaddr) != AF_INET6) {
3534				curr_srtt += bias;
3535			}
3536			if (curr_srtt < best_srtt) {
3537				best = curr;
3538				best_srtt = curr_srtt;
3539			}
3540			curr = ISC_LIST_NEXT(curr, publink);
3541		}
3542		ISC_LIST_UNLINK(find->list, best, publink);
3543		ISC_LIST_APPEND(sorted, best, publink);
3544	}
3545	find->list = sorted;
3546}
3547
3548/*
3549 * Sort a list of finds by server RTT.
3550 */
3551static void
3552sort_finds(dns_adbfindlist_t *findlist, unsigned int bias) {
3553	dns_adbfind_t *best, *curr;
3554	dns_adbfindlist_t sorted;
3555	dns_adbaddrinfo_t *addrinfo, *bestaddrinfo;
3556	unsigned int best_srtt, curr_srtt;
3557
3558	/* Sort each find's addrinfo list by SRTT. */
3559	for (curr = ISC_LIST_HEAD(*findlist); curr != NULL;
3560	     curr = ISC_LIST_NEXT(curr, publink))
3561	{
3562		sort_adbfind(curr, bias);
3563	}
3564
3565	/* Lame N^2 bubble sort. */
3566	ISC_LIST_INIT(sorted);
3567	while (!ISC_LIST_EMPTY(*findlist)) {
3568		best = ISC_LIST_HEAD(*findlist);
3569		bestaddrinfo = ISC_LIST_HEAD(best->list);
3570		INSIST(bestaddrinfo != NULL);
3571		best_srtt = bestaddrinfo->srtt;
3572		if (isc_sockaddr_pf(&bestaddrinfo->sockaddr) != AF_INET6) {
3573			best_srtt += bias;
3574		}
3575		curr = ISC_LIST_NEXT(best, publink);
3576		while (curr != NULL) {
3577			addrinfo = ISC_LIST_HEAD(curr->list);
3578			INSIST(addrinfo != NULL);
3579			curr_srtt = addrinfo->srtt;
3580			if (isc_sockaddr_pf(&addrinfo->sockaddr) != AF_INET6) {
3581				curr_srtt += bias;
3582			}
3583			if (curr_srtt < best_srtt) {
3584				best = curr;
3585				best_srtt = curr_srtt;
3586			}
3587			curr = ISC_LIST_NEXT(curr, publink);
3588		}
3589		ISC_LIST_UNLINK(*findlist, best, publink);
3590		ISC_LIST_APPEND(sorted, best, publink);
3591	}
3592	*findlist = sorted;
3593}
3594
3595static void
3596findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port,
3597	 unsigned int options, unsigned int flags, isc_stdtime_t now,
3598	 bool *overquota, bool *need_alternate, unsigned int *no_addresses) {
3599	dns_adbaddrinfo_t *ai;
3600	dns_adbfind_t *find;
3601	dns_resolver_t *res;
3602	bool unshared;
3603	isc_result_t result;
3604
3605	FCTXTRACE("FINDNAME");
3606	res = fctx->res;
3607	unshared = ((fctx->options & DNS_FETCHOPT_UNSHARED) != 0);
3608	/*
3609	 * If this name is a subdomain of the query domain, tell
3610	 * the ADB to start looking using zone/hint data. This keeps us
3611	 * from getting stuck if the nameserver is beneath the zone cut
3612	 * and we don't know its address (e.g. because the A record has
3613	 * expired).
3614	 */
3615	if (dns_name_issubdomain(name, &fctx->domain)) {
3616		options |= DNS_ADBFIND_STARTATZONE;
3617	}
3618	options |= DNS_ADBFIND_GLUEOK;
3619	options |= DNS_ADBFIND_HINTOK;
3620
3621	/*
3622	 * See what we know about this address.
3623	 */
3624	find = NULL;
3625	result = dns_adb_createfind(
3626		fctx->adb, res->buckets[fctx->bucketnum].task, fctx_finddone,
3627		fctx, name, &fctx->name, fctx->type, options, now, NULL,
3628		res->view->dstport, fctx->depth + 1, fctx->qc, &find);
3629
3630	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3631		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
3632		      "fctx %p(%s): createfind for %s/%d - %s", fctx,
3633		      fctx->info, fctx->clientstr, fctx->id,
3634		      isc_result_totext(result));
3635
3636	if (result != ISC_R_SUCCESS) {
3637		if (result == DNS_R_ALIAS) {
3638			char namebuf[DNS_NAME_FORMATSIZE];
3639
3640			/*
3641			 * XXXRTH  Follow the CNAME/DNAME chain?
3642			 */
3643			dns_adb_destroyfind(&find);
3644			fctx->adberr++;
3645			dns_name_format(name, namebuf, sizeof(namebuf));
3646			isc_log_write(dns_lctx, DNS_LOGCATEGORY_CNAME,
3647				      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
3648				      "skipping nameserver '%s' because it "
3649				      "is a CNAME, while resolving '%s'",
3650				      namebuf, fctx->info);
3651		}
3652	} else if (!ISC_LIST_EMPTY(find->list)) {
3653		/*
3654		 * We have at least some of the addresses for the
3655		 * name.
3656		 */
3657		INSIST((find->options & DNS_ADBFIND_WANTEVENT) == 0);
3658		if (flags != 0 || port != 0) {
3659			for (ai = ISC_LIST_HEAD(find->list); ai != NULL;
3660			     ai = ISC_LIST_NEXT(ai, publink))
3661			{
3662				ai->flags |= flags;
3663				if (port != 0) {
3664					isc_sockaddr_setport(&ai->sockaddr,
3665							     port);
3666				}
3667			}
3668		}
3669		if ((flags & FCTX_ADDRINFO_DUALSTACK) != 0) {
3670			ISC_LIST_APPEND(fctx->altfinds, find, publink);
3671		} else {
3672			ISC_LIST_APPEND(fctx->finds, find, publink);
3673		}
3674	} else {
3675		/*
3676		 * We don't know any of the addresses for this
3677		 * name.
3678		 */
3679		if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
3680			/*
3681			 * We're looking for them and will get an
3682			 * event about it later.
3683			 */
3684			fctx->pending++;
3685			/*
3686			 * Bootstrap.
3687			 */
3688			if (need_alternate != NULL && !*need_alternate &&
3689			    unshared &&
3690			    ((res->dispatches4 == NULL &&
3691			      find->result_v6 != DNS_R_NXDOMAIN) ||
3692			     (res->dispatches6 == NULL &&
3693			      find->result_v4 != DNS_R_NXDOMAIN)))
3694			{
3695				*need_alternate = true;
3696			}
3697			if (no_addresses != NULL) {
3698				(*no_addresses)++;
3699			}
3700		} else {
3701			if ((find->options & DNS_ADBFIND_OVERQUOTA) != 0) {
3702				if (overquota != NULL) {
3703					*overquota = true;
3704				}
3705				fctx->quotacount++; /* quota exceeded */
3706			} else if ((find->options & DNS_ADBFIND_LAMEPRUNED) !=
3707				   0)
3708			{
3709				fctx->lamecount++; /* cached lame server */
3710			} else {
3711				fctx->adberr++; /* unreachable server, etc. */
3712			}
3713
3714			/*
3715			 * If we know there are no addresses for
3716			 * the family we are using then try to add
3717			 * an alternative server.
3718			 */
3719			if (need_alternate != NULL && !*need_alternate &&
3720			    ((res->dispatches4 == NULL &&
3721			      find->result_v6 == DNS_R_NXRRSET) ||
3722			     (res->dispatches6 == NULL &&
3723			      find->result_v4 == DNS_R_NXRRSET)))
3724			{
3725				*need_alternate = true;
3726			}
3727			dns_adb_destroyfind(&find);
3728		}
3729	}
3730}
3731
3732static bool
3733isstrictsubdomain(const dns_name_t *name1, const dns_name_t *name2) {
3734	int order;
3735	unsigned int nlabels;
3736	dns_namereln_t namereln;
3737
3738	namereln = dns_name_fullcompare(name1, name2, &order, &nlabels);
3739	return (namereln == dns_namereln_subdomain);
3740}
3741
3742static isc_result_t
3743fctx_getaddresses(fetchctx_t *fctx, bool badcache) {
3744	dns_rdata_t rdata = DNS_RDATA_INIT;
3745	isc_result_t result;
3746	dns_resolver_t *res;
3747	isc_stdtime_t now;
3748	unsigned int stdoptions = 0;
3749	dns_forwarder_t *fwd;
3750	dns_adbaddrinfo_t *ai;
3751	bool all_bad;
3752	dns_rdata_ns_t ns;
3753	bool need_alternate = false;
3754	bool all_spilled = true;
3755	unsigned int no_addresses = 0;
3756	unsigned int ns_processed = 0;
3757
3758	FCTXTRACE5("getaddresses", "fctx->depth=", fctx->depth);
3759
3760	/*
3761	 * Don't pound on remote servers.  (Failsafe!)
3762	 */
3763	fctx->restarts++;
3764	if (fctx->restarts > 100) {
3765		FCTXTRACE("too many restarts");
3766		return (DNS_R_SERVFAIL);
3767	}
3768
3769	res = fctx->res;
3770
3771	if (fctx->depth > res->maxdepth) {
3772		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3773			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
3774			      "too much NS indirection resolving '%s' "
3775			      "(depth=%u, maxdepth=%u)",
3776			      fctx->info, fctx->depth, res->maxdepth);
3777		return (DNS_R_SERVFAIL);
3778	}
3779
3780	/*
3781	 * Forwarders.
3782	 */
3783
3784	INSIST(ISC_LIST_EMPTY(fctx->forwaddrs));
3785	INSIST(ISC_LIST_EMPTY(fctx->altaddrs));
3786
3787	/*
3788	 * If we have DNS_FETCHOPT_NOFORWARD set and forwarding policy
3789	 * allows us to not forward - skip forwarders and go straight
3790	 * to NSes. This is currently used to make sure that priming query
3791	 * gets root servers' IP addresses in ADDITIONAL section.
3792	 */
3793	if ((fctx->options & DNS_FETCHOPT_NOFORWARD) != 0 &&
3794	    (fctx->fwdpolicy != dns_fwdpolicy_only))
3795	{
3796		goto normal_nses;
3797	}
3798
3799	/*
3800	 * If this fctx has forwarders, use them; otherwise use any
3801	 * selective forwarders specified in the view; otherwise use the
3802	 * resolver's forwarders (if any).
3803	 */
3804	fwd = ISC_LIST_HEAD(fctx->forwarders);
3805	if (fwd == NULL) {
3806		dns_forwarders_t *forwarders = NULL;
3807		dns_name_t *name = &fctx->name;
3808		dns_name_t suffix;
3809		unsigned int labels;
3810		dns_fixedname_t fixed;
3811		dns_name_t *domain;
3812
3813		/*
3814		 * DS records are found in the parent server.
3815		 * Strip label to get the correct forwarder (if any).
3816		 */
3817		if (dns_rdatatype_atparent(fctx->type) &&
3818		    dns_name_countlabels(name) > 1)
3819		{
3820			dns_name_init(&suffix, NULL);
3821			labels = dns_name_countlabels(name);
3822			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
3823			name = &suffix;
3824		}
3825
3826		domain = dns_fixedname_initname(&fixed);
3827		result = dns_fwdtable_find(res->view->fwdtable, name, domain,
3828					   &forwarders);
3829		if (result == ISC_R_SUCCESS) {
3830			fwd = ISC_LIST_HEAD(forwarders->fwdrs);
3831			fctx->fwdpolicy = forwarders->fwdpolicy;
3832			dns_name_copynf(domain, fctx->fwdname);
3833			if (fctx->fwdpolicy == dns_fwdpolicy_only &&
3834			    isstrictsubdomain(domain, &fctx->domain))
3835			{
3836				fcount_decr(fctx);
3837				dns_name_free(&fctx->domain, fctx->mctx);
3838				dns_name_init(&fctx->domain, NULL);
3839				dns_name_dup(domain, fctx->mctx, &fctx->domain);
3840				result = fcount_incr(fctx, true);
3841				if (result != ISC_R_SUCCESS) {
3842					return (result);
3843				}
3844			}
3845		}
3846	}
3847
3848	while (fwd != NULL) {
3849		if ((isc_sockaddr_pf(&fwd->addr) == AF_INET &&
3850		     res->dispatches4 == NULL) ||
3851		    (isc_sockaddr_pf(&fwd->addr) == AF_INET6 &&
3852		     res->dispatches6 == NULL))
3853		{
3854			fwd = ISC_LIST_NEXT(fwd, link);
3855			continue;
3856		}
3857		ai = NULL;
3858		result = dns_adb_findaddrinfo(fctx->adb, &fwd->addr, &ai, 0);
3859		if (result == ISC_R_SUCCESS) {
3860			dns_adbaddrinfo_t *cur;
3861			ai->flags |= FCTX_ADDRINFO_FORWARDER;
3862			ai->dscp = fwd->dscp;
3863			cur = ISC_LIST_HEAD(fctx->forwaddrs);
3864			while (cur != NULL && cur->srtt < ai->srtt) {
3865				cur = ISC_LIST_NEXT(cur, publink);
3866			}
3867			if (cur != NULL) {
3868				ISC_LIST_INSERTBEFORE(fctx->forwaddrs, cur, ai,
3869						      publink);
3870			} else {
3871				ISC_LIST_APPEND(fctx->forwaddrs, ai, publink);
3872			}
3873		}
3874		fwd = ISC_LIST_NEXT(fwd, link);
3875	}
3876
3877	/*
3878	 * If the forwarding policy is "only", we don't need the addresses
3879	 * of the nameservers.
3880	 */
3881	if (fctx->fwdpolicy == dns_fwdpolicy_only) {
3882		goto out;
3883	}
3884
3885	/*
3886	 * Normal nameservers.
3887	 */
3888normal_nses:
3889	stdoptions = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_EMPTYEVENT;
3890	if (fctx->restarts == 1) {
3891		/*
3892		 * To avoid sending out a flood of queries likely to
3893		 * result in NXRRSET, we suppress fetches for address
3894		 * families we don't have the first time through,
3895		 * provided that we have addresses in some family we
3896		 * can use.
3897		 *
3898		 * We don't want to set this option all the time, since
3899		 * if fctx->restarts > 1, we've clearly been having trouble
3900		 * with the addresses we had, so getting more could help.
3901		 */
3902		stdoptions |= DNS_ADBFIND_AVOIDFETCHES;
3903	}
3904	if (res->dispatches4 != NULL) {
3905		stdoptions |= DNS_ADBFIND_INET;
3906	}
3907	if (res->dispatches6 != NULL) {
3908		stdoptions |= DNS_ADBFIND_INET6;
3909	}
3910
3911	if ((stdoptions & DNS_ADBFIND_ADDRESSMASK) == 0) {
3912		return (DNS_R_SERVFAIL);
3913	}
3914
3915	isc_stdtime_get(&now);
3916
3917	INSIST(ISC_LIST_EMPTY(fctx->finds));
3918	INSIST(ISC_LIST_EMPTY(fctx->altfinds));
3919
3920	for (result = dns_rdataset_first(&fctx->nameservers);
3921	     result == ISC_R_SUCCESS;
3922	     result = dns_rdataset_next(&fctx->nameservers))
3923	{
3924		bool overquota = false;
3925
3926		dns_rdataset_current(&fctx->nameservers, &rdata);
3927		/*
3928		 * Extract the name from the NS record.
3929		 */
3930		result = dns_rdata_tostruct(&rdata, &ns, NULL);
3931		if (result != ISC_R_SUCCESS) {
3932			continue;
3933		}
3934
3935		if (no_addresses > NS_FAIL_LIMIT &&
3936		    dns_rdataset_count(&fctx->nameservers) > NS_RR_LIMIT)
3937		{
3938			stdoptions |= DNS_ADBFIND_NOFETCH;
3939		}
3940		findname(fctx, &ns.name, 0, stdoptions, 0, now, &overquota,
3941			 &need_alternate, &no_addresses);
3942
3943		if (!overquota) {
3944			all_spilled = false;
3945		}
3946
3947		dns_rdata_reset(&rdata);
3948		dns_rdata_freestruct(&ns);
3949
3950		if (++ns_processed >= NS_PROCESSING_LIMIT) {
3951			result = ISC_R_NOMORE;
3952			break;
3953		}
3954	}
3955	if (result != ISC_R_NOMORE) {
3956		return (result);
3957	}
3958
3959	/*
3960	 * Do we need to use 6 to 4?
3961	 */
3962	if (need_alternate) {
3963		int family;
3964		alternate_t *a;
3965		family = (res->dispatches6 != NULL) ? AF_INET6 : AF_INET;
3966		for (a = ISC_LIST_HEAD(res->alternates); a != NULL;
3967		     a = ISC_LIST_NEXT(a, link))
3968		{
3969			if (!a->isaddress) {
3970				findname(fctx, &a->_u._n.name, a->_u._n.port,
3971					 stdoptions, FCTX_ADDRINFO_DUALSTACK,
3972					 now, NULL, NULL, NULL);
3973				continue;
3974			}
3975			if (isc_sockaddr_pf(&a->_u.addr) != family) {
3976				continue;
3977			}
3978			ai = NULL;
3979			result = dns_adb_findaddrinfo(fctx->adb, &a->_u.addr,
3980						      &ai, 0);
3981			if (result == ISC_R_SUCCESS) {
3982				dns_adbaddrinfo_t *cur;
3983				ai->flags |= FCTX_ADDRINFO_FORWARDER;
3984				ai->flags |= FCTX_ADDRINFO_DUALSTACK;
3985				cur = ISC_LIST_HEAD(fctx->altaddrs);
3986				while (cur != NULL && cur->srtt < ai->srtt) {
3987					cur = ISC_LIST_NEXT(cur, publink);
3988				}
3989				if (cur != NULL) {
3990					ISC_LIST_INSERTBEFORE(fctx->altaddrs,
3991							      cur, ai, publink);
3992				} else {
3993					ISC_LIST_APPEND(fctx->altaddrs, ai,
3994							publink);
3995				}
3996			}
3997		}
3998	}
3999
4000out:
4001	/*
4002	 * Mark all known bad servers.
4003	 */
4004	all_bad = mark_bad(fctx);
4005
4006	/*
4007	 * How are we doing?
4008	 */
4009	if (all_bad) {
4010		/*
4011		 * We've got no addresses.
4012		 */
4013		if (fctx->pending > 0) {
4014			/*
4015			 * We're fetching the addresses, but don't have any
4016			 * yet.   Tell the caller to wait for an answer.
4017			 */
4018			result = DNS_R_WAIT;
4019		} else {
4020			isc_time_t expire;
4021			isc_interval_t i;
4022			/*
4023			 * We've lost completely.  We don't know any
4024			 * addresses, and the ADB has told us it can't get
4025			 * them.
4026			 */
4027			FCTXTRACE("no addresses");
4028			isc_interval_set(&i, DNS_RESOLVER_BADCACHETTL(fctx), 0);
4029			result = isc_time_nowplusinterval(&expire, &i);
4030			if (badcache &&
4031			    (fctx->type == dns_rdatatype_dnskey ||
4032			     fctx->type == dns_rdatatype_ds) &&
4033			    result == ISC_R_SUCCESS)
4034			{
4035				dns_resolver_addbadcache(res, &fctx->name,
4036							 fctx->type, &expire);
4037			}
4038
4039			result = ISC_R_FAILURE;
4040
4041			/*
4042			 * If all of the addresses found were over the
4043			 * fetches-per-server quota, return the configured
4044			 * response.
4045			 */
4046			if (all_spilled) {
4047				result = res->quotaresp[dns_quotatype_server];
4048				inc_stats(res, dns_resstatscounter_serverquota);
4049			}
4050		}
4051	} else {
4052		/*
4053		 * We've found some addresses.  We might still be looking
4054		 * for more addresses.
4055		 */
4056		sort_finds(&fctx->finds, res->view->v6bias);
4057		sort_finds(&fctx->altfinds, 0);
4058		result = ISC_R_SUCCESS;
4059	}
4060
4061	return (result);
4062}
4063
4064static void
4065possibly_mark(fetchctx_t *fctx, dns_adbaddrinfo_t *addr) {
4066	isc_netaddr_t na;
4067	char buf[ISC_NETADDR_FORMATSIZE];
4068	isc_sockaddr_t *sa;
4069	bool aborted = false;
4070	bool bogus;
4071	dns_acl_t *blackhole;
4072	isc_netaddr_t ipaddr;
4073	dns_peer_t *peer = NULL;
4074	dns_resolver_t *res;
4075	const char *msg = NULL;
4076
4077	sa = &addr->sockaddr;
4078
4079	res = fctx->res;
4080	isc_netaddr_fromsockaddr(&ipaddr, sa);
4081	blackhole = dns_dispatchmgr_getblackhole(res->dispatchmgr);
4082	(void)dns_peerlist_peerbyaddr(res->view->peers, &ipaddr, &peer);
4083
4084	if (blackhole != NULL) {
4085		int match;
4086
4087		if ((dns_acl_match(&ipaddr, NULL, blackhole, &res->view->aclenv,
4088				   &match, NULL) == ISC_R_SUCCESS) &&
4089		    match > 0)
4090		{
4091			aborted = true;
4092		}
4093	}
4094
4095	if (peer != NULL && dns_peer_getbogus(peer, &bogus) == ISC_R_SUCCESS &&
4096	    bogus)
4097	{
4098		aborted = true;
4099	}
4100
4101	if (aborted) {
4102		addr->flags |= FCTX_ADDRINFO_MARK;
4103		msg = "ignoring blackholed / bogus server: ";
4104	} else if (isc_sockaddr_isnetzero(sa)) {
4105		addr->flags |= FCTX_ADDRINFO_MARK;
4106		msg = "ignoring net zero address: ";
4107	} else if (isc_sockaddr_ismulticast(sa)) {
4108		addr->flags |= FCTX_ADDRINFO_MARK;
4109		msg = "ignoring multicast address: ";
4110	} else if (isc_sockaddr_isexperimental(sa)) {
4111		addr->flags |= FCTX_ADDRINFO_MARK;
4112		msg = "ignoring experimental address: ";
4113	} else if (sa->type.sa.sa_family != AF_INET6) {
4114		return;
4115	} else if (IN6_IS_ADDR_V4MAPPED(&sa->type.sin6.sin6_addr)) {
4116		addr->flags |= FCTX_ADDRINFO_MARK;
4117		msg = "ignoring IPv6 mapped IPV4 address: ";
4118	} else if (IN6_IS_ADDR_V4COMPAT(&sa->type.sin6.sin6_addr)) {
4119		addr->flags |= FCTX_ADDRINFO_MARK;
4120		msg = "ignoring IPv6 compatibility IPV4 address: ";
4121	} else {
4122		return;
4123	}
4124
4125	if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(3))) {
4126		isc_netaddr_fromsockaddr(&na, sa);
4127		isc_netaddr_format(&na, buf, sizeof(buf));
4128		FCTXTRACE2(msg, buf);
4129	}
4130}
4131
4132static dns_adbaddrinfo_t *
4133fctx_nextaddress(fetchctx_t *fctx) {
4134	dns_adbfind_t *find, *start;
4135	dns_adbaddrinfo_t *addrinfo;
4136	dns_adbaddrinfo_t *faddrinfo;
4137
4138	/*
4139	 * Return the next untried address, if any.
4140	 */
4141
4142	/*
4143	 * Find the first unmarked forwarder (if any).
4144	 */
4145	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs); addrinfo != NULL;
4146	     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
4147	{
4148		if (!UNMARKED(addrinfo)) {
4149			continue;
4150		}
4151		possibly_mark(fctx, addrinfo);
4152		if (UNMARKED(addrinfo)) {
4153			addrinfo->flags |= FCTX_ADDRINFO_MARK;
4154			fctx->find = NULL;
4155			fctx->forwarding = true;
4156
4157			/*
4158			 * QNAME minimization is disabled when
4159			 * forwarding, and has to remain disabled if
4160			 * we switch back to normal recursion; otherwise
4161			 * forwarding could leave us in an inconsistent
4162			 * state.
4163			 */
4164			fctx->minimized = false;
4165			return (addrinfo);
4166		}
4167	}
4168
4169	/*
4170	 * No forwarders.  Move to the next find.
4171	 */
4172	fctx->forwarding = false;
4173	FCTX_ATTR_SET(fctx, FCTX_ATTR_TRIEDFIND);
4174
4175	find = fctx->find;
4176	if (find == NULL) {
4177		find = ISC_LIST_HEAD(fctx->finds);
4178	} else {
4179		find = ISC_LIST_NEXT(find, publink);
4180		if (find == NULL) {
4181			find = ISC_LIST_HEAD(fctx->finds);
4182		}
4183	}
4184
4185	/*
4186	 * Find the first unmarked addrinfo.
4187	 */
4188	addrinfo = NULL;
4189	if (find != NULL) {
4190		start = find;
4191		do {
4192			for (addrinfo = ISC_LIST_HEAD(find->list);
4193			     addrinfo != NULL;
4194			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
4195			{
4196				if (!UNMARKED(addrinfo)) {
4197					continue;
4198				}
4199				possibly_mark(fctx, addrinfo);
4200				if (UNMARKED(addrinfo)) {
4201					addrinfo->flags |= FCTX_ADDRINFO_MARK;
4202					break;
4203				}
4204			}
4205			if (addrinfo != NULL) {
4206				break;
4207			}
4208			find = ISC_LIST_NEXT(find, publink);
4209			if (find == NULL) {
4210				find = ISC_LIST_HEAD(fctx->finds);
4211			}
4212		} while (find != start);
4213	}
4214
4215	fctx->find = find;
4216	if (addrinfo != NULL) {
4217		return (addrinfo);
4218	}
4219
4220	/*
4221	 * No nameservers left.  Try alternates.
4222	 */
4223
4224	FCTX_ATTR_SET(fctx, FCTX_ATTR_TRIEDALT);
4225
4226	find = fctx->altfind;
4227	if (find == NULL) {
4228		find = ISC_LIST_HEAD(fctx->altfinds);
4229	} else {
4230		find = ISC_LIST_NEXT(find, publink);
4231		if (find == NULL) {
4232			find = ISC_LIST_HEAD(fctx->altfinds);
4233		}
4234	}
4235
4236	/*
4237	 * Find the first unmarked addrinfo.
4238	 */
4239	addrinfo = NULL;
4240	if (find != NULL) {
4241		start = find;
4242		do {
4243			for (addrinfo = ISC_LIST_HEAD(find->list);
4244			     addrinfo != NULL;
4245			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
4246			{
4247				if (!UNMARKED(addrinfo)) {
4248					continue;
4249				}
4250				possibly_mark(fctx, addrinfo);
4251				if (UNMARKED(addrinfo)) {
4252					addrinfo->flags |= FCTX_ADDRINFO_MARK;
4253					break;
4254				}
4255			}
4256			if (addrinfo != NULL) {
4257				break;
4258			}
4259			find = ISC_LIST_NEXT(find, publink);
4260			if (find == NULL) {
4261				find = ISC_LIST_HEAD(fctx->altfinds);
4262			}
4263		} while (find != start);
4264	}
4265
4266	faddrinfo = addrinfo;
4267
4268	/*
4269	 * See if we have a better alternate server by address.
4270	 */
4271
4272	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs); addrinfo != NULL;
4273	     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
4274	{
4275		if (!UNMARKED(addrinfo)) {
4276			continue;
4277		}
4278		possibly_mark(fctx, addrinfo);
4279		if (UNMARKED(addrinfo) &&
4280		    (faddrinfo == NULL || addrinfo->srtt < faddrinfo->srtt))
4281		{
4282			if (faddrinfo != NULL) {
4283				faddrinfo->flags &= ~FCTX_ADDRINFO_MARK;
4284			}
4285			addrinfo->flags |= FCTX_ADDRINFO_MARK;
4286			break;
4287		}
4288	}
4289
4290	if (addrinfo == NULL) {
4291		addrinfo = faddrinfo;
4292		fctx->altfind = find;
4293	}
4294
4295	return (addrinfo);
4296}
4297
4298static void
4299fctx_try(fetchctx_t *fctx, bool retrying, bool badcache) {
4300	isc_result_t result;
4301	dns_adbaddrinfo_t *addrinfo = NULL;
4302	dns_resolver_t *res;
4303	isc_task_t *task;
4304	unsigned int bucketnum;
4305	bool bucket_empty;
4306
4307	FCTXTRACE5("try", "fctx->qc=", isc_counter_used(fctx->qc));
4308
4309	REQUIRE(!ADDRWAIT(fctx));
4310
4311	res = fctx->res;
4312	bucketnum = fctx->bucketnum;
4313
4314	/* We've already exceeded maximum query count */
4315	if (isc_counter_used(fctx->qc) > res->maxqueries) {
4316		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4317			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
4318			      "exceeded max queries resolving '%s' "
4319			      "(querycount=%u, maxqueries=%u)",
4320			      fctx->info, isc_counter_used(fctx->qc),
4321			      res->maxqueries);
4322		fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
4323		return;
4324	}
4325
4326	addrinfo = fctx_nextaddress(fctx);
4327
4328	/* Try to find an address that isn't over quota */
4329	while (addrinfo != NULL && dns_adbentry_overquota(addrinfo->entry)) {
4330		addrinfo = fctx_nextaddress(fctx);
4331	}
4332
4333	if (addrinfo == NULL) {
4334		/* We have no more addresses.  Start over. */
4335		fctx_cancelqueries(fctx, true, false);
4336		fctx_cleanupall(fctx);
4337		result = fctx_getaddresses(fctx, badcache);
4338		if (result == DNS_R_WAIT) {
4339			/*
4340			 * Sleep waiting for addresses.
4341			 */
4342			FCTXTRACE("addrwait");
4343			FCTX_ATTR_SET(fctx, FCTX_ATTR_ADDRWAIT);
4344			return;
4345		} else if (result != ISC_R_SUCCESS) {
4346			/*
4347			 * Something bad happened.
4348			 */
4349			fctx_done(fctx, result, __LINE__);
4350			return;
4351		}
4352
4353		addrinfo = fctx_nextaddress(fctx);
4354
4355		while (addrinfo != NULL &&
4356		       dns_adbentry_overquota(addrinfo->entry))
4357		{
4358			addrinfo = fctx_nextaddress(fctx);
4359		}
4360
4361		/*
4362		 * While we may have addresses from the ADB, they
4363		 * might be bad ones.  In this case, return SERVFAIL.
4364		 */
4365		if (addrinfo == NULL) {
4366			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
4367			return;
4368		}
4369	}
4370	/*
4371	 * We're minimizing and we're not yet at the final NS -
4372	 * we need to launch a query for NS for 'upper' domain
4373	 */
4374	if (fctx->minimized && !fctx->forwarding) {
4375		unsigned int options = fctx->options;
4376		/*
4377		 * Also clear DNS_FETCHOPT_TRYSTALE_ONTIMEOUT here, otherwise
4378		 * every query minimization step will activate the try-stale
4379		 * timer again.
4380		 */
4381		options &= ~(DNS_FETCHOPT_QMINIMIZE |
4382			     DNS_FETCHOPT_TRYSTALE_ONTIMEOUT);
4383
4384		/*
4385		 * Is another QNAME minimization fetch still running?
4386		 */
4387		if (fctx->qminfetch != NULL) {
4388			bool validfctx = (DNS_FETCH_VALID(fctx->qminfetch) &&
4389					  VALID_FCTX(fctx->qminfetch->private));
4390			char namebuf[DNS_NAME_FORMATSIZE];
4391			char typebuf[DNS_RDATATYPE_FORMATSIZE];
4392
4393			dns_name_format(&fctx->qminname, namebuf,
4394					sizeof(namebuf));
4395			dns_rdatatype_format(fctx->qmintype, typebuf,
4396					     sizeof(typebuf));
4397
4398			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4399				      DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR,
4400				      "fctx %p(%s): attempting QNAME "
4401				      "minimization fetch for %s/%s but "
4402				      "fetch %p(%s) still running",
4403				      fctx, fctx->info, namebuf, typebuf,
4404				      fctx->qminfetch,
4405				      validfctx ? fctx->qminfetch->private->info
4406						: "<invalid>");
4407			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
4408			return;
4409		}
4410
4411		/*
4412		 * In "_ A" mode we're asking for _.domain -
4413		 * resolver by default will follow delegations
4414		 * then, we don't want that.
4415		 */
4416		if ((options & DNS_FETCHOPT_QMIN_USE_A) != 0) {
4417			options |= DNS_FETCHOPT_NOFOLLOW;
4418		}
4419		fctx_increference(fctx);
4420		task = res->buckets[bucketnum].task;
4421		fctx_stoptimer(fctx);
4422		fctx_stoptimer_trystale(fctx);
4423		result = dns_resolver_createfetch(
4424			fctx->res, &fctx->qminname, fctx->qmintype,
4425			&fctx->domain, &fctx->nameservers, NULL, NULL, 0,
4426			options, 0, fctx->qc, task, resume_qmin, fctx,
4427			&fctx->qminrrset, NULL, &fctx->qminfetch);
4428		if (result != ISC_R_SUCCESS) {
4429			LOCK(&res->buckets[bucketnum].lock);
4430			RUNTIME_CHECK(!fctx_decreference(fctx));
4431			UNLOCK(&res->buckets[bucketnum].lock);
4432			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
4433		}
4434		return;
4435	}
4436
4437	result = isc_counter_increment(fctx->qc);
4438	if (result != ISC_R_SUCCESS) {
4439		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4440			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
4441			      "exceeded max queries resolving '%s'",
4442			      fctx->info);
4443		fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
4444		return;
4445	}
4446
4447	fctx_increference(fctx);
4448
4449	result = fctx_query(fctx, addrinfo, fctx->options);
4450	if (result != ISC_R_SUCCESS) {
4451		fctx_done(fctx, result, __LINE__);
4452		LOCK(&res->buckets[bucketnum].lock);
4453		bucket_empty = fctx_decreference(fctx);
4454		UNLOCK(&res->buckets[bucketnum].lock);
4455		if (bucket_empty) {
4456			empty_bucket(res);
4457		}
4458	} else if (retrying) {
4459		inc_stats(res, dns_resstatscounter_retry);
4460	}
4461}
4462
4463static void
4464resume_qmin(isc_task_t *task, isc_event_t *event) {
4465	dns_fetchevent_t *fevent;
4466	dns_resolver_t *res;
4467	fetchctx_t *fctx;
4468	isc_result_t result;
4469	bool bucket_empty;
4470	unsigned int bucketnum;
4471	unsigned int findoptions = 0;
4472	dns_name_t *fname, *dcname;
4473	dns_fixedname_t ffixed, dcfixed;
4474	fname = dns_fixedname_initname(&ffixed);
4475	dcname = dns_fixedname_initname(&dcfixed);
4476
4477	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
4478	fevent = (dns_fetchevent_t *)event;
4479	fctx = event->ev_arg;
4480	REQUIRE(VALID_FCTX(fctx));
4481	res = fctx->res;
4482
4483	UNUSED(task);
4484	FCTXTRACE("resume_qmin");
4485
4486	if (fevent->node != NULL) {
4487		dns_db_detachnode(fevent->db, &fevent->node);
4488	}
4489	if (fevent->db != NULL) {
4490		dns_db_detach(&fevent->db);
4491	}
4492
4493	bucketnum = fctx->bucketnum;
4494
4495	if (dns_rdataset_isassociated(fevent->rdataset)) {
4496		dns_rdataset_disassociate(fevent->rdataset);
4497	}
4498	result = fevent->result;
4499	fevent = NULL;
4500	isc_event_free(&event);
4501
4502	dns_resolver_destroyfetch(&fctx->qminfetch);
4503
4504	LOCK(&res->buckets[bucketnum].lock);
4505	if (SHUTTINGDOWN(fctx)) {
4506		UNLOCK(&res->buckets[bucketnum].lock);
4507		goto cleanup;
4508	}
4509	UNLOCK(&res->buckets[bucketnum].lock);
4510
4511	/*
4512	 * Note: fevent->rdataset must be disassociated and
4513	 * isc_event_free(&event) be called before resuming
4514	 * processing of the 'fctx' to prevent use-after-free.
4515	 * 'fevent' is set to NULL so as to not have a dangling
4516	 * pointer.
4517	 */
4518	if (result == ISC_R_CANCELED) {
4519		fctx_done(fctx, result, __LINE__);
4520		goto cleanup;
4521	}
4522
4523	/*
4524	 * If we're doing "_ A"-style minimization we can get
4525	 * NX answer to minimized query - we need to continue then.
4526	 *
4527	 * Otherwise - either disable minimization if we're
4528	 * in relaxed mode or fail if we're in strict mode.
4529	 */
4530
4531	if ((NXDOMAIN_RESULT(result) &&
4532	     (fctx->options & DNS_FETCHOPT_QMIN_USE_A) == 0) ||
4533	    result == DNS_R_FORMERR || result == DNS_R_REMOTEFORMERR ||
4534	    result == ISC_R_FAILURE)
4535	{
4536		if ((fctx->options & DNS_FETCHOPT_QMIN_STRICT) == 0) {
4537			fctx->qmin_labels = DNS_MAX_LABELS + 1;
4538			/*
4539			 * We store the result. If we succeed in the end
4540			 * we'll issue a warning that the server is broken.
4541			 */
4542			fctx->qmin_warning = result;
4543		} else {
4544			fctx_done(fctx, result, __LINE__);
4545			goto cleanup;
4546		}
4547	}
4548
4549	if (dns_rdataset_isassociated(&fctx->nameservers)) {
4550		dns_rdataset_disassociate(&fctx->nameservers);
4551	}
4552
4553	if (dns_rdatatype_atparent(fctx->type)) {
4554		findoptions |= DNS_DBFIND_NOEXACT;
4555	}
4556	result = dns_view_findzonecut(res->view, &fctx->name, fname, dcname,
4557				      fctx->now, findoptions, true, true,
4558				      &fctx->nameservers, NULL);
4559
4560	/*
4561	 * DNS_R_NXDOMAIN here means we have not loaded the root zone mirror
4562	 * yet - but DNS_R_NXDOMAIN is not a valid return value when doing
4563	 * recursion, we need to patch it.
4564	 */
4565	if (result == DNS_R_NXDOMAIN) {
4566		result = DNS_R_SERVFAIL;
4567	}
4568
4569	if (result != ISC_R_SUCCESS) {
4570		fctx_done(fctx, result, __LINE__);
4571		goto cleanup;
4572	}
4573	fcount_decr(fctx);
4574	dns_name_free(&fctx->domain, fctx->mctx);
4575	dns_name_init(&fctx->domain, NULL);
4576	dns_name_dup(fname, fctx->mctx, &fctx->domain);
4577
4578	result = fcount_incr(fctx, false);
4579	if (result != ISC_R_SUCCESS) {
4580		fctx_done(fctx, result, __LINE__);
4581		goto cleanup;
4582	}
4583
4584	dns_name_free(&fctx->qmindcname, fctx->mctx);
4585	dns_name_init(&fctx->qmindcname, NULL);
4586	dns_name_dup(dcname, fctx->mctx, &fctx->qmindcname);
4587	fctx->ns_ttl = fctx->nameservers.ttl;
4588	fctx->ns_ttl_ok = true;
4589
4590	result = fctx_minimize_qname(fctx);
4591	if (result != ISC_R_SUCCESS) {
4592		fctx_done(fctx, result, __LINE__);
4593		goto cleanup;
4594	}
4595
4596	if (!fctx->minimized) {
4597		/*
4598		 * We have finished minimizing, but fctx->finds was filled at
4599		 * the beginning of the run - now we need to clear it before
4600		 * sending the final query to use proper nameservers.
4601		 */
4602		fctx_cancelqueries(fctx, false, false);
4603		fctx_cleanupall(fctx);
4604	}
4605
4606	fctx_try(fctx, true, false);
4607
4608cleanup:
4609	INSIST(event == NULL);
4610	INSIST(fevent == NULL);
4611	LOCK(&res->buckets[bucketnum].lock);
4612	bucket_empty = fctx_decreference(fctx);
4613	UNLOCK(&res->buckets[bucketnum].lock);
4614	if (bucket_empty) {
4615		empty_bucket(res);
4616	}
4617}
4618
4619static bool
4620fctx_unlink(fetchctx_t *fctx) {
4621	dns_resolver_t *res;
4622	unsigned int bucketnum;
4623
4624	/*
4625	 * Caller must be holding the bucket lock.
4626	 */
4627
4628	REQUIRE(VALID_FCTX(fctx));
4629	REQUIRE(fctx->state == fetchstate_done ||
4630		fctx->state == fetchstate_init);
4631	REQUIRE(ISC_LIST_EMPTY(fctx->events));
4632	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
4633	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
4634	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
4635	REQUIRE(fctx->pending == 0);
4636	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
4637
4638	FCTXTRACE("unlink");
4639
4640	isc_refcount_destroy(&fctx->references);
4641
4642	res = fctx->res;
4643	bucketnum = fctx->bucketnum;
4644
4645	ISC_LIST_UNLINK(res->buckets[bucketnum].fctxs, fctx, link);
4646
4647	INSIST(atomic_fetch_sub_release(&res->nfctx, 1) > 0);
4648
4649	dec_stats(res, dns_resstatscounter_nfetch);
4650
4651	if (atomic_load_acquire(&res->buckets[bucketnum].exiting) &&
4652	    ISC_LIST_EMPTY(res->buckets[bucketnum].fctxs))
4653	{
4654		return (true);
4655	}
4656
4657	return (false);
4658}
4659
4660static void
4661fctx_destroy(fetchctx_t *fctx) {
4662	isc_sockaddr_t *sa, *next_sa;
4663	struct tried *tried;
4664
4665	REQUIRE(VALID_FCTX(fctx));
4666	REQUIRE(fctx->state == fetchstate_done ||
4667		fctx->state == fetchstate_init);
4668	REQUIRE(ISC_LIST_EMPTY(fctx->events));
4669	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
4670	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
4671	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
4672	REQUIRE(fctx->pending == 0);
4673	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
4674	REQUIRE(!ISC_LINK_LINKED(fctx, link));
4675
4676	FCTXTRACE("destroy");
4677
4678	isc_refcount_destroy(&fctx->references);
4679
4680	/*
4681	 * Free bad.
4682	 */
4683	for (sa = ISC_LIST_HEAD(fctx->bad); sa != NULL; sa = next_sa) {
4684		next_sa = ISC_LIST_NEXT(sa, link);
4685		ISC_LIST_UNLINK(fctx->bad, sa, link);
4686		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
4687	}
4688
4689	for (tried = ISC_LIST_HEAD(fctx->edns); tried != NULL;
4690	     tried = ISC_LIST_HEAD(fctx->edns))
4691	{
4692		ISC_LIST_UNLINK(fctx->edns, tried, link);
4693		isc_mem_put(fctx->mctx, tried, sizeof(*tried));
4694	}
4695
4696	for (tried = ISC_LIST_HEAD(fctx->edns512); tried != NULL;
4697	     tried = ISC_LIST_HEAD(fctx->edns512))
4698	{
4699		ISC_LIST_UNLINK(fctx->edns512, tried, link);
4700		isc_mem_put(fctx->mctx, tried, sizeof(*tried));
4701	}
4702
4703	for (sa = ISC_LIST_HEAD(fctx->bad_edns); sa != NULL; sa = next_sa) {
4704		next_sa = ISC_LIST_NEXT(sa, link);
4705		ISC_LIST_UNLINK(fctx->bad_edns, sa, link);
4706		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
4707	}
4708
4709	isc_counter_detach(&fctx->qc);
4710	fcount_decr(fctx);
4711	isc_timer_destroy(&fctx->timer);
4712	if (fctx->timer_try_stale != NULL) {
4713		isc_timer_destroy(&fctx->timer_try_stale);
4714	}
4715	dns_message_detach(&fctx->qmessage);
4716	if (dns_name_countlabels(&fctx->domain) > 0) {
4717		dns_name_free(&fctx->domain, fctx->mctx);
4718	}
4719	if (dns_rdataset_isassociated(&fctx->nameservers)) {
4720		dns_rdataset_disassociate(&fctx->nameservers);
4721	}
4722	dns_name_free(&fctx->name, fctx->mctx);
4723	dns_name_free(&fctx->qminname, fctx->mctx);
4724	dns_name_free(&fctx->qmindcname, fctx->mctx);
4725	dns_db_detach(&fctx->cache);
4726	dns_adb_detach(&fctx->adb);
4727	isc_mem_free(fctx->mctx, fctx->info);
4728	isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx));
4729}
4730
4731/*
4732 * Fetch event handlers.
4733 */
4734
4735static void
4736fctx_timeout(isc_task_t *task, isc_event_t *event) {
4737	fetchctx_t *fctx = event->ev_arg;
4738	isc_timerevent_t *tevent = (isc_timerevent_t *)event;
4739	resquery_t *query;
4740
4741	REQUIRE(VALID_FCTX(fctx));
4742
4743	UNUSED(task);
4744
4745	FCTXTRACE("timeout");
4746
4747	inc_stats(fctx->res, dns_resstatscounter_querytimeout);
4748
4749	if (event->ev_type == ISC_TIMEREVENT_LIFE) {
4750		fctx->reason = NULL;
4751		fctx_done(fctx, ISC_R_TIMEDOUT, __LINE__);
4752	} else {
4753		isc_result_t result;
4754
4755		fctx->timeouts++;
4756		fctx->timeout = true;
4757
4758		/*
4759		 * We could cancel the running queries here, or we could let
4760		 * them keep going.  Since we normally use separate sockets for
4761		 * different queries, we adopt the former approach to reduce
4762		 * the number of open sockets: cancel the oldest query if it
4763		 * expired after the query had started (this is usually the
4764		 * case but is not always so, depending on the task schedule
4765		 * timing).
4766		 */
4767		query = ISC_LIST_HEAD(fctx->queries);
4768		if (query != NULL &&
4769		    isc_time_compare(&tevent->due, &query->start) >= 0)
4770		{
4771			FCTXTRACE("query timed out; no response");
4772			fctx_cancelquery(&query, NULL, NULL, true, false);
4773		}
4774		FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
4775
4776		/*
4777		 * Our timer has triggered.  Reestablish the fctx lifetime
4778		 * timer.
4779		 */
4780		result = fctx_starttimer(fctx);
4781		if (result != ISC_R_SUCCESS) {
4782			fctx_done(fctx, result, __LINE__);
4783		} else {
4784			/* Keep trying */
4785			fctx_try(fctx, true, false);
4786		}
4787	}
4788
4789	isc_event_free(&event);
4790}
4791
4792/*
4793 * Fetch event handlers called if stale answers are enabled
4794 * (stale-answer-enabled) and the fetch took more than
4795 * stale-answer-client-timeout to complete.
4796 */
4797static void
4798fctx_timeout_try_stale(isc_task_t *task, isc_event_t *event) {
4799	fetchctx_t *fctx = event->ev_arg;
4800	dns_fetchevent_t *dns_event, *next_event;
4801	isc_task_t *sender_task;
4802
4803	REQUIRE(VALID_FCTX(fctx));
4804
4805	UNUSED(task);
4806
4807	FCTXTRACE("timeout_try_stale");
4808
4809	if (event->ev_type != ISC_TIMEREVENT_LIFE) {
4810		return;
4811	}
4812
4813	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4814
4815	/*
4816	 * Trigger events of type DNS_EVENT_TRYSTALE.
4817	 */
4818	for (dns_event = ISC_LIST_HEAD(fctx->events); dns_event != NULL;
4819	     dns_event = next_event)
4820	{
4821		next_event = ISC_LIST_NEXT(dns_event, ev_link);
4822
4823		if (dns_event->ev_type != DNS_EVENT_TRYSTALE) {
4824			continue;
4825		}
4826
4827		ISC_LIST_UNLINK(fctx->events, dns_event, ev_link);
4828		sender_task = dns_event->ev_sender;
4829		dns_event->ev_sender = fctx;
4830		dns_event->vresult = ISC_R_TIMEDOUT;
4831		dns_event->result = ISC_R_TIMEDOUT;
4832
4833		isc_task_sendanddetach(&sender_task, ISC_EVENT_PTR(&dns_event));
4834	}
4835
4836	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4837
4838	isc_event_free(&event);
4839}
4840
4841static void
4842fctx_shutdown(fetchctx_t *fctx) {
4843	isc_event_t *cevent;
4844
4845	/*
4846	 * Start the shutdown process for fctx, if it isn't already underway.
4847	 */
4848
4849	FCTXTRACE("shutdown");
4850
4851	/*
4852	 * The caller must be holding the appropriate bucket lock.
4853	 */
4854
4855	if (fctx->want_shutdown) {
4856		return;
4857	}
4858
4859	fctx->want_shutdown = true;
4860
4861	/*
4862	 * Unless we're still initializing (in which case the
4863	 * control event is still outstanding), we need to post
4864	 * the control event to tell the fetch we want it to
4865	 * exit.
4866	 */
4867	if (fctx->state != fetchstate_init) {
4868		cevent = &fctx->control_event;
4869		isc_task_sendto(fctx->res->buckets[fctx->bucketnum].task,
4870				&cevent, fctx->bucketnum);
4871	}
4872}
4873
4874static void
4875fctx_doshutdown(isc_task_t *task, isc_event_t *event) {
4876	fetchctx_t *fctx = event->ev_arg;
4877	bool bucket_empty = false;
4878	dns_resolver_t *res;
4879	unsigned int bucketnum;
4880	dns_validator_t *validator;
4881	bool dodestroy = false;
4882
4883	REQUIRE(VALID_FCTX(fctx));
4884
4885	UNUSED(task);
4886
4887	res = fctx->res;
4888	bucketnum = fctx->bucketnum;
4889
4890	FCTXTRACE("doshutdown");
4891
4892	/*
4893	 * An fctx that is shutting down is no longer in ADDRWAIT mode.
4894	 */
4895	FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
4896
4897	/*
4898	 * Cancel all pending validators.  Note that this must be done
4899	 * without the bucket lock held, since that could cause deadlock.
4900	 */
4901	validator = ISC_LIST_HEAD(fctx->validators);
4902	while (validator != NULL) {
4903		dns_validator_cancel(validator);
4904		validator = ISC_LIST_NEXT(validator, link);
4905	}
4906
4907	if (fctx->nsfetch != NULL) {
4908		dns_resolver_cancelfetch(fctx->nsfetch);
4909	}
4910
4911	if (fctx->qminfetch != NULL) {
4912		dns_resolver_cancelfetch(fctx->qminfetch);
4913	}
4914
4915	/*
4916	 * Shut down anything still running on behalf of this
4917	 * fetch, and clean up finds and addresses.  To avoid deadlock
4918	 * with the ADB, we must do this before we lock the bucket lock.
4919	 */
4920	fctx_stopqueries(fctx, false, false);
4921	fctx_cleanupall(fctx);
4922
4923	LOCK(&res->buckets[bucketnum].lock);
4924
4925	FCTX_ATTR_SET(fctx, FCTX_ATTR_SHUTTINGDOWN);
4926
4927	INSIST(fctx->state == fetchstate_active ||
4928	       fctx->state == fetchstate_done);
4929	INSIST(fctx->want_shutdown);
4930
4931	if (fctx->state != fetchstate_done) {
4932		fctx->state = fetchstate_done;
4933		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
4934	}
4935
4936	if (isc_refcount_current(&fctx->references) == 0 &&
4937	    fctx->pending == 0 && fctx->nqueries == 0 &&
4938	    ISC_LIST_EMPTY(fctx->validators))
4939	{
4940		bucket_empty = fctx_unlink(fctx);
4941		dodestroy = true;
4942	}
4943
4944	UNLOCK(&res->buckets[bucketnum].lock);
4945
4946	if (dodestroy) {
4947		fctx_destroy(fctx);
4948		if (bucket_empty) {
4949			empty_bucket(res);
4950		}
4951	}
4952}
4953
4954static void
4955fctx_start(isc_task_t *task, isc_event_t *event) {
4956	fetchctx_t *fctx = event->ev_arg;
4957	bool done = false, bucket_empty = false;
4958	dns_resolver_t *res;
4959	unsigned int bucketnum;
4960	bool dodestroy = false;
4961
4962	REQUIRE(VALID_FCTX(fctx));
4963
4964	UNUSED(task);
4965
4966	res = fctx->res;
4967	bucketnum = fctx->bucketnum;
4968
4969	FCTXTRACE("start");
4970
4971	LOCK(&res->buckets[bucketnum].lock);
4972
4973	INSIST(fctx->state == fetchstate_init);
4974	if (fctx->want_shutdown) {
4975		/*
4976		 * We haven't started this fctx yet, and we've been requested
4977		 * to shut it down.
4978		 */
4979		FCTX_ATTR_SET(fctx, FCTX_ATTR_SHUTTINGDOWN);
4980		fctx->state = fetchstate_done;
4981		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
4982		/*
4983		 * Since we haven't started, we INSIST that we have no
4984		 * pending ADB finds and no pending validations.
4985		 */
4986		INSIST(fctx->pending == 0);
4987		INSIST(fctx->nqueries == 0);
4988		INSIST(ISC_LIST_EMPTY(fctx->validators));
4989		if (isc_refcount_current(&fctx->references) == 0) {
4990			/*
4991			 * It's now safe to destroy this fctx.
4992			 */
4993			bucket_empty = fctx_unlink(fctx);
4994			dodestroy = true;
4995		}
4996		done = true;
4997	} else {
4998		/*
4999		 * Normal fctx startup.
5000		 */
5001		fctx->state = fetchstate_active;
5002		/*
5003		 * Reset the control event for later use in shutting down
5004		 * the fctx.
5005		 */
5006		ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
5007			       DNS_EVENT_FETCHCONTROL, fctx_doshutdown, fctx,
5008			       NULL, NULL, NULL);
5009	}
5010
5011	UNLOCK(&res->buckets[bucketnum].lock);
5012
5013	if (!done) {
5014		isc_result_t result;
5015
5016		INSIST(!dodestroy);
5017
5018		/*
5019		 * All is well.  Start working on the fetch.
5020		 */
5021		result = fctx_starttimer(fctx);
5022		if (result == ISC_R_SUCCESS && fctx->timer_try_stale != NULL) {
5023			result = fctx_starttimer_trystale(fctx);
5024		}
5025		if (result != ISC_R_SUCCESS) {
5026			fctx_done(fctx, result, __LINE__);
5027		} else {
5028			fctx_try(fctx, false, false);
5029		}
5030	} else if (dodestroy) {
5031		fctx_destroy(fctx);
5032		if (bucket_empty) {
5033			empty_bucket(res);
5034		}
5035	}
5036}
5037
5038/*
5039 * Fetch Creation, Joining, and Cancellation.
5040 */
5041
5042static isc_result_t
5043fctx_join(fetchctx_t *fctx, isc_task_t *task, const isc_sockaddr_t *client,
5044	  dns_messageid_t id, isc_taskaction_t action, void *arg,
5045	  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
5046	  dns_fetch_t *fetch) {
5047	isc_task_t *tclone;
5048	dns_fetchevent_t *event;
5049
5050	FCTXTRACE("join");
5051
5052	/*
5053	 * We store the task we're going to send this event to in the
5054	 * sender field.  We'll make the fetch the sender when we actually
5055	 * send the event.
5056	 */
5057	tclone = NULL;
5058	isc_task_attach(task, &tclone);
5059	event = (dns_fetchevent_t *)isc_event_allocate(
5060		fctx->res->mctx, tclone, DNS_EVENT_FETCHDONE, action, arg,
5061		sizeof(*event));
5062	event->result = DNS_R_SERVFAIL;
5063	event->qtype = fctx->type;
5064	event->db = NULL;
5065	event->node = NULL;
5066	event->rdataset = rdataset;
5067	event->sigrdataset = sigrdataset;
5068	event->fetch = fetch;
5069	event->client = client;
5070	event->id = id;
5071	dns_fixedname_init(&event->foundname);
5072
5073	/*
5074	 * Make sure that we can store the sigrdataset in the
5075	 * first event if it is needed by any of the events.
5076	 */
5077	if (event->sigrdataset != NULL) {
5078		ISC_LIST_PREPEND(fctx->events, event, ev_link);
5079	} else {
5080		ISC_LIST_APPEND(fctx->events, event, ev_link);
5081	}
5082
5083	fctx_increference(fctx);
5084
5085	fetch->magic = DNS_FETCH_MAGIC;
5086	fetch->private = fctx;
5087
5088	return (ISC_R_SUCCESS);
5089}
5090
5091static void
5092fctx_add_event(fetchctx_t *fctx, isc_task_t *task, const isc_sockaddr_t *client,
5093	       dns_messageid_t id, isc_taskaction_t action, void *arg,
5094	       dns_fetch_t *fetch, isc_eventtype_t event_type) {
5095	isc_task_t *tclone;
5096	dns_fetchevent_t *event;
5097	/*
5098	 * We store the task we're going to send this event to in the
5099	 * sender field.  We'll make the fetch the sender when we actually
5100	 * send the event.
5101	 */
5102	tclone = NULL;
5103	isc_task_attach(task, &tclone);
5104	event = (dns_fetchevent_t *)isc_event_allocate(fctx->res->mctx, tclone,
5105						       event_type, action, arg,
5106						       sizeof(*event));
5107	event->result = DNS_R_SERVFAIL;
5108	event->qtype = fctx->type;
5109	event->db = NULL;
5110	event->node = NULL;
5111	event->rdataset = NULL;
5112	event->sigrdataset = NULL;
5113	event->fetch = fetch;
5114	event->client = client;
5115	event->id = id;
5116	ISC_LIST_APPEND(fctx->events, event, ev_link);
5117}
5118
5119static void
5120log_ns_ttl(fetchctx_t *fctx, const char *where) {
5121	char namebuf[DNS_NAME_FORMATSIZE];
5122	char domainbuf[DNS_NAME_FORMATSIZE];
5123
5124	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
5125	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
5126	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5127		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
5128		      "log_ns_ttl: fctx %p: %s: %s (in '%s'?): %u %u", fctx,
5129		      where, namebuf, domainbuf, fctx->ns_ttl_ok, fctx->ns_ttl);
5130}
5131
5132static isc_result_t
5133fctx_create(dns_resolver_t *res, const dns_name_t *name, dns_rdatatype_t type,
5134	    const dns_name_t *domain, dns_rdataset_t *nameservers,
5135	    const isc_sockaddr_t *client, dns_messageid_t id,
5136	    unsigned int options, unsigned int bucketnum, unsigned int depth,
5137	    isc_counter_t *qc, fetchctx_t **fctxp) {
5138	fetchctx_t *fctx;
5139	isc_result_t result;
5140	isc_result_t iresult;
5141	isc_interval_t interval;
5142	unsigned int findoptions = 0;
5143	char buf[DNS_NAME_FORMATSIZE + DNS_RDATATYPE_FORMATSIZE + 1];
5144	isc_mem_t *mctx;
5145	size_t p;
5146	bool try_stale;
5147
5148	/*
5149	 * Caller must be holding the lock for bucket number 'bucketnum'.
5150	 */
5151	REQUIRE(fctxp != NULL && *fctxp == NULL);
5152
5153	mctx = res->buckets[bucketnum].mctx;
5154	fctx = isc_mem_get(mctx, sizeof(*fctx));
5155
5156	fctx->qc = NULL;
5157	if (qc != NULL) {
5158		isc_counter_attach(qc, &fctx->qc);
5159	} else {
5160		result = isc_counter_create(res->mctx, res->maxqueries,
5161					    &fctx->qc);
5162		if (result != ISC_R_SUCCESS) {
5163			goto cleanup_fetch;
5164		}
5165	}
5166
5167	/*
5168	 * Make fctx->info point to a copy of a formatted string
5169	 * "name/type".
5170	 */
5171	dns_name_format(name, buf, sizeof(buf));
5172	p = strlcat(buf, "/", sizeof(buf));
5173	INSIST(p + DNS_RDATATYPE_FORMATSIZE < sizeof(buf));
5174	dns_rdatatype_format(type, buf + p, sizeof(buf) - p);
5175	fctx->info = isc_mem_strdup(mctx, buf);
5176
5177	FCTXTRACE("create");
5178	dns_name_init(&fctx->name, NULL);
5179	dns_name_dup(name, mctx, &fctx->name);
5180	dns_name_init(&fctx->qminname, NULL);
5181	dns_name_dup(name, mctx, &fctx->qminname);
5182	dns_name_init(&fctx->domain, NULL);
5183	dns_rdataset_init(&fctx->nameservers);
5184
5185	fctx->type = type;
5186	fctx->qmintype = type;
5187	fctx->options = options;
5188	/*
5189	 * Note!  We do not attach to the task.  We are relying on the
5190	 * resolver to ensure that this task doesn't go away while we are
5191	 * using it.
5192	 */
5193	fctx->res = res;
5194	isc_refcount_init(&fctx->references, 0);
5195	fctx->bucketnum = bucketnum;
5196	fctx->dbucketnum = RES_NOBUCKET;
5197	fctx->state = fetchstate_init;
5198	fctx->want_shutdown = false;
5199	fctx->cloned = false;
5200	fctx->depth = depth;
5201	fctx->minimized = false;
5202	fctx->ip6arpaskip = false;
5203	fctx->forwarding = false;
5204	fctx->qmin_labels = 1;
5205	fctx->qmin_warning = ISC_R_SUCCESS;
5206	fctx->qminfetch = NULL;
5207	dns_rdataset_init(&fctx->qminrrset);
5208	dns_name_init(&fctx->qmindcname, NULL);
5209	isc_stdtime_get(&fctx->now);
5210	ISC_LIST_INIT(fctx->queries);
5211	ISC_LIST_INIT(fctx->finds);
5212	ISC_LIST_INIT(fctx->altfinds);
5213	ISC_LIST_INIT(fctx->forwaddrs);
5214	ISC_LIST_INIT(fctx->altaddrs);
5215	ISC_LIST_INIT(fctx->forwarders);
5216	fctx->fwdpolicy = dns_fwdpolicy_none;
5217	ISC_LIST_INIT(fctx->bad);
5218	ISC_LIST_INIT(fctx->edns);
5219	ISC_LIST_INIT(fctx->edns512);
5220	ISC_LIST_INIT(fctx->bad_edns);
5221	ISC_LIST_INIT(fctx->validators);
5222	fctx->validator = NULL;
5223	fctx->find = NULL;
5224	fctx->altfind = NULL;
5225	fctx->pending = 0;
5226	fctx->restarts = 0;
5227	fctx->querysent = 0;
5228	fctx->referrals = 0;
5229
5230	fctx->fwdname = dns_fixedname_initname(&fctx->fwdfname);
5231
5232	TIME_NOW(&fctx->start);
5233	fctx->timeouts = 0;
5234	fctx->lamecount = 0;
5235	fctx->quotacount = 0;
5236	fctx->adberr = 0;
5237	fctx->neterr = 0;
5238	fctx->badresp = 0;
5239	fctx->findfail = 0;
5240	fctx->valfail = 0;
5241	fctx->result = ISC_R_FAILURE;
5242	fctx->vresult = ISC_R_SUCCESS;
5243	fctx->exitline = -1; /* sentinel */
5244	fctx->logged = false;
5245	atomic_init(&fctx->attributes, 0);
5246	fctx->spilled = false;
5247	fctx->nqueries = 0;
5248	fctx->reason = NULL;
5249	fctx->rand_buf = 0;
5250	fctx->rand_bits = 0;
5251	fctx->timeout = false;
5252	fctx->addrinfo = NULL;
5253	if (client != NULL) {
5254		isc_sockaddr_format(client, fctx->clientstr,
5255				    sizeof(fctx->clientstr));
5256	} else {
5257		strlcpy(fctx->clientstr, "<unknown>", sizeof(fctx->clientstr));
5258	}
5259	fctx->id = id;
5260	fctx->ns_ttl = 0;
5261	fctx->ns_ttl_ok = false;
5262
5263	dns_name_init(&fctx->nsname, NULL);
5264	fctx->nsfetch = NULL;
5265	dns_rdataset_init(&fctx->nsrrset);
5266
5267	if (domain == NULL) {
5268		dns_forwarders_t *forwarders = NULL;
5269		dns_fixedname_t fixed;
5270		unsigned int labels;
5271		const dns_name_t *fwdname = name;
5272		dns_name_t suffix;
5273		dns_name_t *fname;
5274
5275		/*
5276		 * DS records are found in the parent server. Strip one
5277		 * leading label from the name (to be used in finding
5278		 * the forwarder).
5279		 */
5280		if (dns_rdatatype_atparent(fctx->type) &&
5281		    dns_name_countlabels(name) > 1)
5282		{
5283			dns_name_init(&suffix, NULL);
5284			labels = dns_name_countlabels(name);
5285			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
5286			fwdname = &suffix;
5287		}
5288
5289		/* Find the forwarder for this name. */
5290		fname = dns_fixedname_initname(&fixed);
5291		result = dns_fwdtable_find(fctx->res->view->fwdtable, fwdname,
5292					   fname, &forwarders);
5293		if (result == ISC_R_SUCCESS) {
5294			fctx->fwdpolicy = forwarders->fwdpolicy;
5295			dns_name_copynf(fname, fctx->fwdname);
5296		}
5297
5298		if (fctx->fwdpolicy != dns_fwdpolicy_only) {
5299			dns_fixedname_t dcfixed;
5300			dns_name_t *dcname;
5301			dcname = dns_fixedname_initname(&dcfixed);
5302			/*
5303			 * The caller didn't supply a query domain and
5304			 * nameservers, and we're not in forward-only mode,
5305			 * so find the best nameservers to use.
5306			 */
5307			if (dns_rdatatype_atparent(fctx->type)) {
5308				findoptions |= DNS_DBFIND_NOEXACT;
5309			}
5310			result = dns_view_findzonecut(res->view, name, fname,
5311						      dcname, fctx->now,
5312						      findoptions, true, true,
5313						      &fctx->nameservers, NULL);
5314			if (result != ISC_R_SUCCESS) {
5315				goto cleanup_nameservers;
5316			}
5317
5318			dns_name_dup(fname, mctx, &fctx->domain);
5319			dns_name_dup(dcname, mctx, &fctx->qmindcname);
5320			fctx->ns_ttl = fctx->nameservers.ttl;
5321			fctx->ns_ttl_ok = true;
5322		} else {
5323			/*
5324			 * We're in forward-only mode.  Set the query domain.
5325			 */
5326			dns_name_dup(fname, mctx, &fctx->domain);
5327			dns_name_dup(fname, mctx, &fctx->qmindcname);
5328			/*
5329			 * Disable query minimization
5330			 */
5331			options &= ~DNS_FETCHOPT_QMINIMIZE;
5332		}
5333	} else {
5334		dns_name_dup(domain, mctx, &fctx->domain);
5335		dns_name_dup(domain, mctx, &fctx->qmindcname);
5336		dns_rdataset_clone(nameservers, &fctx->nameservers);
5337		fctx->ns_ttl = fctx->nameservers.ttl;
5338		fctx->ns_ttl_ok = true;
5339	}
5340
5341	/*
5342	 * Are there too many simultaneous queries for this domain?
5343	 */
5344	result = fcount_incr(fctx, false);
5345	if (result != ISC_R_SUCCESS) {
5346		result = fctx->res->quotaresp[dns_quotatype_zone];
5347		inc_stats(res, dns_resstatscounter_zonequota);
5348		goto cleanup_domain;
5349	}
5350
5351	log_ns_ttl(fctx, "fctx_create");
5352
5353	if (!dns_name_issubdomain(&fctx->name, &fctx->domain)) {
5354		dns_name_format(&fctx->domain, buf, sizeof(buf));
5355		UNEXPECTED_ERROR(__FILE__, __LINE__,
5356				 "'%s' is not subdomain of '%s'", fctx->info,
5357				 buf);
5358		result = ISC_R_UNEXPECTED;
5359		goto cleanup_fcount;
5360	}
5361
5362	fctx->qmessage = NULL;
5363	dns_message_create(mctx, DNS_MESSAGE_INTENTRENDER, &fctx->qmessage);
5364
5365	/*
5366	 * Compute an expiration time for the entire fetch.
5367	 */
5368	isc_interval_set(&interval, res->query_timeout / 1000,
5369			 res->query_timeout % 1000 * 1000000);
5370	iresult = isc_time_nowplusinterval(&fctx->expires, &interval);
5371	if (iresult != ISC_R_SUCCESS) {
5372		UNEXPECTED_ERROR(__FILE__, __LINE__,
5373				 "isc_time_nowplusinterval: %s",
5374				 isc_result_totext(iresult));
5375		result = ISC_R_UNEXPECTED;
5376		goto cleanup_qmessage;
5377	}
5378
5379	try_stale = ((options & DNS_FETCHOPT_TRYSTALE_ONTIMEOUT) != 0);
5380	if (try_stale) {
5381		INSIST(res->view->staleanswerclienttimeout <=
5382		       (res->query_timeout - 1000));
5383		/*
5384		 * Compute an expiration time after which stale data will
5385		 * attempted to be served, if stale answers are enabled and
5386		 * target RRset is available in cache.
5387		 */
5388		isc_interval_set(
5389			&interval, res->view->staleanswerclienttimeout / 1000,
5390			res->view->staleanswerclienttimeout % 1000 * 1000000);
5391		iresult = isc_time_nowplusinterval(&fctx->expires_try_stale,
5392						   &interval);
5393		if (iresult != ISC_R_SUCCESS) {
5394			UNEXPECTED_ERROR(__FILE__, __LINE__,
5395					 "isc_time_nowplusinterval: %s",
5396					 isc_result_totext(iresult));
5397			result = ISC_R_UNEXPECTED;
5398			goto cleanup_qmessage;
5399		}
5400	}
5401
5402	/*
5403	 * Default retry interval initialization.  We set the interval now
5404	 * mostly so it won't be uninitialized.  It will be set to the
5405	 * correct value before a query is issued.
5406	 */
5407	isc_interval_set(&fctx->interval, 2, 0);
5408
5409	/*
5410	 * Create an inactive timer for resolver-query-timeout. It
5411	 * will be made active when the fetch is actually started.
5412	 */
5413	fctx->timer = NULL;
5414
5415	iresult = isc_timer_create(res->timermgr, isc_timertype_inactive, NULL,
5416				   NULL, res->buckets[bucketnum].task,
5417				   fctx_timeout, fctx, &fctx->timer);
5418	if (iresult != ISC_R_SUCCESS) {
5419		UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_timer_create: %s",
5420				 isc_result_totext(iresult));
5421		result = ISC_R_UNEXPECTED;
5422		goto cleanup_qmessage;
5423	}
5424
5425	/*
5426	 * If stale answers are enabled, then create an inactive timer
5427	 * for stale-answer-client-timeout. It will be made active when
5428	 * the fetch is actually started.
5429	 */
5430	fctx->timer_try_stale = NULL;
5431	if (try_stale) {
5432		iresult = isc_timer_create(
5433			res->timermgr, isc_timertype_inactive, NULL, NULL,
5434			res->buckets[bucketnum].task, fctx_timeout_try_stale,
5435			fctx, &fctx->timer_try_stale);
5436		if (iresult != ISC_R_SUCCESS) {
5437			UNEXPECTED_ERROR(__FILE__, __LINE__,
5438					 "isc_timer_create: %s",
5439					 isc_result_totext(iresult));
5440			result = ISC_R_UNEXPECTED;
5441			goto cleanup_qmessage;
5442		}
5443	}
5444
5445	/*
5446	 * Attach to the view's cache and adb.
5447	 */
5448	fctx->cache = NULL;
5449	dns_db_attach(res->view->cachedb, &fctx->cache);
5450	fctx->adb = NULL;
5451	dns_adb_attach(res->view->adb, &fctx->adb);
5452	fctx->mctx = NULL;
5453	isc_mem_attach(mctx, &fctx->mctx);
5454
5455	ISC_LIST_INIT(fctx->events);
5456	ISC_LINK_INIT(fctx, link);
5457	fctx->magic = FCTX_MAGIC;
5458
5459	/*
5460	 * If qname minimization is enabled we need to trim
5461	 * the name in fctx to proper length.
5462	 */
5463	if ((options & DNS_FETCHOPT_QMINIMIZE) != 0) {
5464		fctx->ip6arpaskip =
5465			(options & DNS_FETCHOPT_QMIN_SKIP_IP6A) != 0 &&
5466			dns_name_issubdomain(&fctx->name, &ip6_arpa);
5467		result = fctx_minimize_qname(fctx);
5468		if (result != ISC_R_SUCCESS) {
5469			goto cleanup_mctx;
5470		}
5471	}
5472
5473	ISC_LIST_APPEND(res->buckets[bucketnum].fctxs, fctx, link);
5474
5475	INSIST(atomic_fetch_add_relaxed(&res->nfctx, 1) < UINT32_MAX);
5476
5477	inc_stats(res, dns_resstatscounter_nfetch);
5478
5479	*fctxp = fctx;
5480
5481	return (ISC_R_SUCCESS);
5482
5483cleanup_mctx:
5484	fctx->magic = 0;
5485	isc_mem_detach(&fctx->mctx);
5486	dns_adb_detach(&fctx->adb);
5487	dns_db_detach(&fctx->cache);
5488	isc_timer_destroy(&fctx->timer);
5489	isc_timer_destroy(&fctx->timer_try_stale);
5490
5491cleanup_qmessage:
5492	dns_message_detach(&fctx->qmessage);
5493
5494cleanup_fcount:
5495	fcount_decr(fctx);
5496
5497cleanup_domain:
5498	if (dns_name_countlabels(&fctx->domain) > 0) {
5499		dns_name_free(&fctx->domain, mctx);
5500	}
5501	if (dns_name_countlabels(&fctx->qmindcname) > 0) {
5502		dns_name_free(&fctx->qmindcname, mctx);
5503	}
5504
5505cleanup_nameservers:
5506	if (dns_rdataset_isassociated(&fctx->nameservers)) {
5507		dns_rdataset_disassociate(&fctx->nameservers);
5508	}
5509	dns_name_free(&fctx->name, mctx);
5510	dns_name_free(&fctx->qminname, mctx);
5511	isc_mem_free(mctx, fctx->info);
5512	isc_counter_detach(&fctx->qc);
5513
5514cleanup_fetch:
5515	isc_mem_put(mctx, fctx, sizeof(*fctx));
5516
5517	return (result);
5518}
5519
5520/*
5521 * Handle Responses
5522 */
5523static bool
5524is_lame(fetchctx_t *fctx, dns_message_t *message) {
5525	dns_name_t *name;
5526	dns_rdataset_t *rdataset;
5527	isc_result_t result;
5528
5529	if (message->rcode != dns_rcode_noerror &&
5530	    message->rcode != dns_rcode_yxdomain &&
5531	    message->rcode != dns_rcode_nxdomain)
5532	{
5533		return (false);
5534	}
5535
5536	if (message->counts[DNS_SECTION_ANSWER] != 0) {
5537		return (false);
5538	}
5539
5540	if (message->counts[DNS_SECTION_AUTHORITY] == 0) {
5541		return (false);
5542	}
5543
5544	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
5545	while (result == ISC_R_SUCCESS) {
5546		name = NULL;
5547		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
5548		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
5549		     rdataset = ISC_LIST_NEXT(rdataset, link))
5550		{
5551			dns_namereln_t namereln;
5552			int order;
5553			unsigned int labels;
5554			if (rdataset->type != dns_rdatatype_ns) {
5555				continue;
5556			}
5557			namereln = dns_name_fullcompare(name, &fctx->domain,
5558							&order, &labels);
5559			if (namereln == dns_namereln_equal &&
5560			    (message->flags & DNS_MESSAGEFLAG_AA) != 0)
5561			{
5562				return (false);
5563			}
5564			if (namereln == dns_namereln_subdomain) {
5565				return (false);
5566			}
5567			return (true);
5568		}
5569		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
5570	}
5571
5572	return (false);
5573}
5574
5575static void
5576log_lame(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo) {
5577	char namebuf[DNS_NAME_FORMATSIZE];
5578	char domainbuf[DNS_NAME_FORMATSIZE];
5579	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
5580
5581	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
5582	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
5583	isc_sockaddr_format(&addrinfo->sockaddr, addrbuf, sizeof(addrbuf));
5584	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
5585		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
5586		      "lame server resolving '%s' (in '%s'?): %s", namebuf,
5587		      domainbuf, addrbuf);
5588}
5589
5590static void
5591log_formerr(fetchctx_t *fctx, const char *format, ...) {
5592	char nsbuf[ISC_SOCKADDR_FORMATSIZE];
5593	char msgbuf[2048];
5594	va_list args;
5595
5596	va_start(args, format);
5597	vsnprintf(msgbuf, sizeof(msgbuf), format, args);
5598	va_end(args);
5599
5600	isc_sockaddr_format(&fctx->addrinfo->sockaddr, nsbuf, sizeof(nsbuf));
5601
5602	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5603		      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5604		      "DNS format error from %s resolving %s for %s: %s", nsbuf,
5605		      fctx->info, fctx->clientstr, msgbuf);
5606}
5607
5608static isc_result_t
5609same_question(fetchctx_t *fctx, dns_message_t *message) {
5610	isc_result_t result;
5611	dns_name_t *name;
5612	dns_rdataset_t *rdataset;
5613
5614	/*
5615	 * Caller must be holding the fctx lock.
5616	 */
5617
5618	/*
5619	 * XXXRTH  Currently we support only one question.
5620	 */
5621	if (ISC_UNLIKELY(message->counts[DNS_SECTION_QUESTION] == 0)) {
5622		if ((message->flags & DNS_MESSAGEFLAG_TC) != 0) {
5623			/*
5624			 * If TC=1 and the question section is empty, we
5625			 * accept the reply message as a truncated
5626			 * answer, to be retried over TCP.
5627			 *
5628			 * It is really a FORMERR condition, but this is
5629			 * a workaround to accept replies from some
5630			 * implementations.
5631			 *
5632			 * Because the question section matching is not
5633			 * performed, the worst that could happen is
5634			 * that an attacker who gets past the ID and
5635			 * source port checks can force the use of
5636			 * TCP. This is considered an acceptable risk.
5637			 */
5638			log_formerr(fctx, "empty question section, "
5639					  "accepting it anyway as TC=1");
5640			return (ISC_R_SUCCESS);
5641		} else {
5642			log_formerr(fctx, "empty question section");
5643			return (DNS_R_FORMERR);
5644		}
5645	} else if (ISC_UNLIKELY(message->counts[DNS_SECTION_QUESTION] > 1)) {
5646		log_formerr(fctx, "too many questions");
5647		return (DNS_R_FORMERR);
5648	}
5649
5650	result = dns_message_firstname(message, DNS_SECTION_QUESTION);
5651	if (result != ISC_R_SUCCESS) {
5652		return (result);
5653	}
5654	name = NULL;
5655	dns_message_currentname(message, DNS_SECTION_QUESTION, &name);
5656	rdataset = ISC_LIST_HEAD(name->list);
5657	INSIST(rdataset != NULL);
5658	INSIST(ISC_LIST_NEXT(rdataset, link) == NULL);
5659
5660	if (fctx->type != rdataset->type ||
5661	    fctx->res->rdclass != rdataset->rdclass ||
5662	    !dns_name_equal(&fctx->name, name))
5663	{
5664		char namebuf[DNS_NAME_FORMATSIZE];
5665		char classbuf[DNS_RDATACLASS_FORMATSIZE];
5666		char typebuf[DNS_RDATATYPE_FORMATSIZE];
5667
5668		dns_name_format(name, namebuf, sizeof(namebuf));
5669		dns_rdataclass_format(rdataset->rdclass, classbuf,
5670				      sizeof(classbuf));
5671		dns_rdatatype_format(rdataset->type, typebuf, sizeof(typebuf));
5672		log_formerr(fctx, "question section mismatch: got %s/%s/%s",
5673			    namebuf, classbuf, typebuf);
5674		return (DNS_R_FORMERR);
5675	}
5676
5677	return (ISC_R_SUCCESS);
5678}
5679
5680static void
5681clone_results(fetchctx_t *fctx) {
5682	dns_fetchevent_t *event, *hevent;
5683	dns_name_t *name, *hname;
5684
5685	FCTXTRACE("clone_results");
5686
5687	/*
5688	 * Set up any other events to have the same data as the first
5689	 * event.
5690	 *
5691	 * Caller must be holding the appropriate lock.
5692	 */
5693
5694	fctx->cloned = true;
5695	hevent = ISC_LIST_HEAD(fctx->events);
5696	if (hevent == NULL) {
5697		return;
5698	}
5699	hname = dns_fixedname_name(&hevent->foundname);
5700	for (event = ISC_LIST_NEXT(hevent, ev_link); event != NULL;
5701	     event = ISC_LIST_NEXT(event, ev_link))
5702	{
5703		if (event->ev_type == DNS_EVENT_TRYSTALE) {
5704			/*
5705			 * We don't need to clone resulting data to this
5706			 * type of event, as its associated callback is only
5707			 * called when stale-answer-client-timeout triggers,
5708			 * and the logic in there doesn't expect any result
5709			 * as input, as it will itself lookup for stale data
5710			 * in cache to use as result, if any is available.
5711			 *
5712			 * Also, if we reached this point, then the whole fetch
5713			 * context is done, it will cancel timers, process
5714			 * associated callbacks of type DNS_EVENT_FETCHDONE, and
5715			 * silently remove/free events of type
5716			 * DNS_EVENT_TRYSTALE.
5717			 */
5718			continue;
5719		}
5720		name = dns_fixedname_name(&event->foundname);
5721		dns_name_copynf(hname, name);
5722		event->result = hevent->result;
5723		dns_db_attach(hevent->db, &event->db);
5724		dns_db_attachnode(hevent->db, hevent->node, &event->node);
5725		INSIST(hevent->rdataset != NULL);
5726		INSIST(event->rdataset != NULL);
5727		if (dns_rdataset_isassociated(hevent->rdataset)) {
5728			dns_rdataset_clone(hevent->rdataset, event->rdataset);
5729		}
5730		INSIST(!(hevent->sigrdataset == NULL &&
5731			 event->sigrdataset != NULL));
5732		if (hevent->sigrdataset != NULL &&
5733		    dns_rdataset_isassociated(hevent->sigrdataset) &&
5734		    event->sigrdataset != NULL)
5735		{
5736			dns_rdataset_clone(hevent->sigrdataset,
5737					   event->sigrdataset);
5738		}
5739	}
5740}
5741
5742#define CACHE(r)      (((r)->attributes & DNS_RDATASETATTR_CACHE) != 0)
5743#define ANSWER(r)     (((r)->attributes & DNS_RDATASETATTR_ANSWER) != 0)
5744#define ANSWERSIG(r)  (((r)->attributes & DNS_RDATASETATTR_ANSWERSIG) != 0)
5745#define EXTERNAL(r)   (((r)->attributes & DNS_RDATASETATTR_EXTERNAL) != 0)
5746#define CHAINING(r)   (((r)->attributes & DNS_RDATASETATTR_CHAINING) != 0)
5747#define CHASE(r)      (((r)->attributes & DNS_RDATASETATTR_CHASE) != 0)
5748#define CHECKNAMES(r) (((r)->attributes & DNS_RDATASETATTR_CHECKNAMES) != 0)
5749
5750/*
5751 * The validator has finished.
5752 */
5753static void
5754validated(isc_task_t *task, isc_event_t *event) {
5755	dns_adbaddrinfo_t *addrinfo;
5756	dns_dbnode_t *node = NULL;
5757	dns_dbnode_t *nsnode = NULL;
5758	dns_fetchevent_t *hevent;
5759	dns_name_t *name;
5760	dns_rdataset_t *ardataset = NULL;
5761	dns_rdataset_t *asigrdataset = NULL;
5762	dns_rdataset_t *rdataset;
5763	dns_rdataset_t *sigrdataset;
5764	dns_resolver_t *res;
5765	dns_valarg_t *valarg = event->ev_arg;
5766	dns_validatorevent_t *vevent;
5767	fetchctx_t *fctx;
5768	bool chaining;
5769	bool negative;
5770	bool sentresponse;
5771	bool bucket_empty;
5772	isc_result_t eresult = ISC_R_SUCCESS;
5773	isc_result_t result = ISC_R_SUCCESS;
5774	isc_stdtime_t now;
5775	uint32_t ttl;
5776	unsigned options;
5777	uint32_t bucketnum;
5778	dns_fixedname_t fwild;
5779	dns_name_t *wild = NULL;
5780	dns_message_t *message = NULL;
5781
5782	UNUSED(task); /* for now */
5783
5784	REQUIRE(event->ev_type == DNS_EVENT_VALIDATORDONE);
5785	REQUIRE(VALID_FCTX(valarg->fctx));
5786	REQUIRE(!ISC_LIST_EMPTY(valarg->fctx->validators));
5787
5788	fctx = valarg->fctx;
5789	fctx_increference(fctx);
5790	dns_message_attach(valarg->message, &message);
5791
5792	res = fctx->res;
5793	addrinfo = valarg->addrinfo;
5794
5795	vevent = (dns_validatorevent_t *)event;
5796	fctx->vresult = vevent->result;
5797
5798	FCTXTRACE("received validation completion event");
5799
5800	bucketnum = fctx->bucketnum;
5801	LOCK(&res->buckets[bucketnum].lock);
5802
5803	ISC_LIST_UNLINK(fctx->validators, vevent->validator, link);
5804	fctx->validator = NULL;
5805	UNLOCK(&res->buckets[bucketnum].lock);
5806
5807	/*
5808	 * Destroy the validator early so that we can
5809	 * destroy the fctx if necessary.  Save the wildcard name.
5810	 */
5811	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL) {
5812		wild = dns_fixedname_initname(&fwild);
5813		dns_name_copynf(dns_fixedname_name(&vevent->validator->wild),
5814				wild);
5815	}
5816	dns_validator_destroy(&vevent->validator);
5817	dns_message_detach(&valarg->message);
5818	isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
5819
5820	negative = (vevent->rdataset == NULL);
5821
5822	LOCK(&res->buckets[bucketnum].lock);
5823	sentresponse = ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0);
5824
5825	/*
5826	 * If shutting down, ignore the results.
5827	 */
5828	if (SHUTTINGDOWN(fctx) && !sentresponse) {
5829		UNLOCK(&res->buckets[bucketnum].lock);
5830		goto cleanup_event;
5831	}
5832
5833	isc_stdtime_get(&now);
5834
5835	/*
5836	 * If chaining, we need to make sure that the right result code is
5837	 * returned, and that the rdatasets are bound.
5838	 */
5839	if (vevent->result == ISC_R_SUCCESS && !negative &&
5840	    vevent->rdataset != NULL && CHAINING(vevent->rdataset))
5841	{
5842		if (vevent->rdataset->type == dns_rdatatype_cname) {
5843			eresult = DNS_R_CNAME;
5844		} else {
5845			INSIST(vevent->rdataset->type == dns_rdatatype_dname);
5846			eresult = DNS_R_DNAME;
5847		}
5848		chaining = true;
5849	} else {
5850		chaining = false;
5851	}
5852
5853	/*
5854	 * Either we're not shutting down, or we are shutting down but want
5855	 * to cache the result anyway (if this was a validation started by
5856	 * a query with cd set)
5857	 */
5858
5859	hevent = ISC_LIST_HEAD(fctx->events);
5860	if (hevent != NULL) {
5861		if (!negative && !chaining &&
5862		    (fctx->type == dns_rdatatype_any ||
5863		     fctx->type == dns_rdatatype_rrsig ||
5864		     fctx->type == dns_rdatatype_sig))
5865		{
5866			/*
5867			 * Don't bind rdatasets; the caller
5868			 * will iterate the node.
5869			 */
5870		} else {
5871			ardataset = hevent->rdataset;
5872			asigrdataset = hevent->sigrdataset;
5873		}
5874	}
5875
5876	if (vevent->result != ISC_R_SUCCESS) {
5877		FCTXTRACE("validation failed");
5878		inc_stats(res, dns_resstatscounter_valfail);
5879		fctx->valfail++;
5880		fctx->vresult = vevent->result;
5881		if (fctx->vresult != DNS_R_BROKENCHAIN) {
5882			result = ISC_R_NOTFOUND;
5883			if (vevent->rdataset != NULL) {
5884				result = dns_db_findnode(
5885					fctx->cache, vevent->name, true, &node);
5886			}
5887			if (result == ISC_R_SUCCESS) {
5888				(void)dns_db_deleterdataset(fctx->cache, node,
5889							    NULL, vevent->type,
5890							    0);
5891				if (vevent->sigrdataset != NULL) {
5892					(void)dns_db_deleterdataset(
5893						fctx->cache, node, NULL,
5894						dns_rdatatype_rrsig,
5895						vevent->type);
5896				}
5897				dns_db_detachnode(fctx->cache, &node);
5898			}
5899		} else if (!negative) {
5900			/*
5901			 * Cache the data as pending for later validation.
5902			 */
5903			result = ISC_R_NOTFOUND;
5904			if (vevent->rdataset != NULL) {
5905				result = dns_db_findnode(
5906					fctx->cache, vevent->name, true, &node);
5907			}
5908			if (result == ISC_R_SUCCESS) {
5909				(void)dns_db_addrdataset(
5910					fctx->cache, node, NULL, now,
5911					vevent->rdataset, 0, NULL);
5912				if (vevent->sigrdataset != NULL) {
5913					(void)dns_db_addrdataset(
5914						fctx->cache, node, NULL, now,
5915						vevent->sigrdataset, 0, NULL);
5916				}
5917				dns_db_detachnode(fctx->cache, &node);
5918			}
5919		}
5920		result = fctx->vresult;
5921		add_bad(fctx, message, addrinfo, result, badns_validation);
5922		UNLOCK(&res->buckets[bucketnum].lock);
5923		INSIST(fctx->validator == NULL);
5924		fctx->validator = ISC_LIST_HEAD(fctx->validators);
5925		if (fctx->validator != NULL) {
5926			dns_validator_send(fctx->validator);
5927		} else if (sentresponse) {
5928			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
5929		} else if (result == DNS_R_BROKENCHAIN) {
5930			isc_result_t tresult;
5931			isc_time_t expire;
5932			isc_interval_t i;
5933
5934			isc_interval_set(&i, DNS_RESOLVER_BADCACHETTL(fctx), 0);
5935			tresult = isc_time_nowplusinterval(&expire, &i);
5936			if (negative &&
5937			    (fctx->type == dns_rdatatype_dnskey ||
5938			     fctx->type == dns_rdatatype_ds) &&
5939			    tresult == ISC_R_SUCCESS)
5940			{
5941				dns_resolver_addbadcache(res, &fctx->name,
5942							 fctx->type, &expire);
5943			}
5944			fctx_done(fctx, result, __LINE__); /* Locks bucket. */
5945		} else {
5946			fctx_try(fctx, true, true); /* Locks bucket. */
5947		}
5948
5949		goto cleanup_event;
5950	}
5951
5952	if (negative) {
5953		dns_rdatatype_t covers;
5954		FCTXTRACE("nonexistence validation OK");
5955
5956		inc_stats(res, dns_resstatscounter_valnegsuccess);
5957
5958		/*
5959		 * Cache DS NXDOMAIN separately to other types.
5960		 */
5961		if (message->rcode == dns_rcode_nxdomain &&
5962		    fctx->type != dns_rdatatype_ds)
5963		{
5964			covers = dns_rdatatype_any;
5965		} else {
5966			covers = fctx->type;
5967		}
5968
5969		result = dns_db_findnode(fctx->cache, vevent->name, true,
5970					 &node);
5971		if (result != ISC_R_SUCCESS) {
5972			goto noanswer_response;
5973		}
5974
5975		/*
5976		 * If we are asking for a SOA record set the cache time
5977		 * to zero to facilitate locating the containing zone of
5978		 * a arbitrary zone.
5979		 */
5980		ttl = res->view->maxncachettl;
5981		if (fctx->type == dns_rdatatype_soa &&
5982		    covers == dns_rdatatype_any && res->zero_no_soa_ttl)
5983		{
5984			ttl = 0;
5985		}
5986
5987		result = ncache_adderesult(message, fctx->cache, node, covers,
5988					   now, fctx->res->view->minncachettl,
5989					   ttl, vevent->optout, vevent->secure,
5990					   ardataset, &eresult);
5991		if (result != ISC_R_SUCCESS) {
5992			goto noanswer_response;
5993		}
5994		goto answer_response;
5995	} else {
5996		inc_stats(res, dns_resstatscounter_valsuccess);
5997	}
5998
5999	FCTXTRACE("validation OK");
6000
6001	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL) {
6002		result = dns_rdataset_addnoqname(
6003			vevent->rdataset,
6004			vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF]);
6005		RUNTIME_CHECK(result == ISC_R_SUCCESS);
6006		INSIST(vevent->sigrdataset != NULL);
6007		vevent->sigrdataset->ttl = vevent->rdataset->ttl;
6008		if (vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER] != NULL) {
6009			result = dns_rdataset_addclosest(
6010				vevent->rdataset,
6011				vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER]);
6012			RUNTIME_CHECK(result == ISC_R_SUCCESS);
6013		}
6014	} else if (vevent->rdataset->trust == dns_trust_answer &&
6015		   vevent->rdataset->type != dns_rdatatype_rrsig)
6016	{
6017		isc_result_t tresult;
6018		dns_name_t *noqname = NULL;
6019		tresult = findnoqname(fctx, message, vevent->name,
6020				      vevent->rdataset->type, &noqname);
6021		if (tresult == ISC_R_SUCCESS && noqname != NULL) {
6022			tresult = dns_rdataset_addnoqname(vevent->rdataset,
6023							  noqname);
6024			RUNTIME_CHECK(tresult == ISC_R_SUCCESS);
6025		}
6026	}
6027
6028	/*
6029	 * The data was already cached as pending data.
6030	 * Re-cache it as secure and bind the cached
6031	 * rdatasets to the first event on the fetch
6032	 * event list.
6033	 */
6034	result = dns_db_findnode(fctx->cache, vevent->name, true, &node);
6035	if (result != ISC_R_SUCCESS) {
6036		goto noanswer_response;
6037	}
6038
6039	options = 0;
6040	if ((fctx->options & DNS_FETCHOPT_PREFETCH) != 0) {
6041		options = DNS_DBADD_PREFETCH;
6042	}
6043	result = dns_db_addrdataset(fctx->cache, node, NULL, now,
6044				    vevent->rdataset, options, ardataset);
6045	if (result != ISC_R_SUCCESS && result != DNS_R_UNCHANGED) {
6046		goto noanswer_response;
6047	}
6048	if (ardataset != NULL && NEGATIVE(ardataset)) {
6049		if (NXDOMAIN(ardataset)) {
6050			eresult = DNS_R_NCACHENXDOMAIN;
6051		} else {
6052			eresult = DNS_R_NCACHENXRRSET;
6053		}
6054	} else if (vevent->sigrdataset != NULL) {
6055		result = dns_db_addrdataset(fctx->cache, node, NULL, now,
6056					    vevent->sigrdataset, options,
6057					    asigrdataset);
6058		if (result != ISC_R_SUCCESS && result != DNS_R_UNCHANGED) {
6059			goto noanswer_response;
6060		}
6061	}
6062
6063	if (sentresponse) {
6064		/*
6065		 * If we only deferred the destroy because we wanted to cache
6066		 * the data, destroy now.
6067		 */
6068		dns_db_detachnode(fctx->cache, &node);
6069		UNLOCK(&res->buckets[bucketnum].lock);
6070		goto cleanup_event;
6071	}
6072
6073	if (!ISC_LIST_EMPTY(fctx->validators)) {
6074		INSIST(!negative);
6075		INSIST(fctx->type == dns_rdatatype_any ||
6076		       fctx->type == dns_rdatatype_rrsig ||
6077		       fctx->type == dns_rdatatype_sig);
6078		/*
6079		 * Don't send a response yet - we have
6080		 * more rdatasets that still need to
6081		 * be validated.
6082		 */
6083		dns_db_detachnode(fctx->cache, &node);
6084		UNLOCK(&res->buckets[bucketnum].lock);
6085		dns_validator_send(ISC_LIST_HEAD(fctx->validators));
6086		goto cleanup_event;
6087	}
6088
6089answer_response:
6090	/*
6091	 * Cache any SOA/NS/NSEC records that happened to be validated.
6092	 */
6093	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6094	while (result == ISC_R_SUCCESS) {
6095		name = NULL;
6096		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
6097		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
6098		     rdataset = ISC_LIST_NEXT(rdataset, link))
6099		{
6100			if ((rdataset->type != dns_rdatatype_ns &&
6101			     rdataset->type != dns_rdatatype_soa &&
6102			     rdataset->type != dns_rdatatype_nsec) ||
6103			    rdataset->trust != dns_trust_secure)
6104			{
6105				continue;
6106			}
6107			for (sigrdataset = ISC_LIST_HEAD(name->list);
6108			     sigrdataset != NULL;
6109			     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
6110			{
6111				if (sigrdataset->type != dns_rdatatype_rrsig ||
6112				    sigrdataset->covers != rdataset->type)
6113				{
6114					continue;
6115				}
6116				break;
6117			}
6118			if (sigrdataset == NULL ||
6119			    sigrdataset->trust != dns_trust_secure)
6120			{
6121				continue;
6122			}
6123			result = dns_db_findnode(fctx->cache, name, true,
6124						 &nsnode);
6125			if (result != ISC_R_SUCCESS) {
6126				continue;
6127			}
6128
6129			result = dns_db_addrdataset(fctx->cache, nsnode, NULL,
6130						    now, rdataset, 0, NULL);
6131			if (result == ISC_R_SUCCESS) {
6132				result = dns_db_addrdataset(
6133					fctx->cache, nsnode, NULL, now,
6134					sigrdataset, 0, NULL);
6135			}
6136			dns_db_detachnode(fctx->cache, &nsnode);
6137			if (result != ISC_R_SUCCESS) {
6138				continue;
6139			}
6140		}
6141		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
6142	}
6143
6144	/*
6145	 * Add the wild card entry.
6146	 */
6147	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL &&
6148	    vevent->rdataset != NULL &&
6149	    dns_rdataset_isassociated(vevent->rdataset) &&
6150	    vevent->rdataset->trust == dns_trust_secure &&
6151	    vevent->sigrdataset != NULL &&
6152	    dns_rdataset_isassociated(vevent->sigrdataset) &&
6153	    vevent->sigrdataset->trust == dns_trust_secure && wild != NULL)
6154	{
6155		dns_dbnode_t *wnode = NULL;
6156
6157		result = dns_db_findnode(fctx->cache, wild, true, &wnode);
6158		if (result == ISC_R_SUCCESS) {
6159			result = dns_db_addrdataset(fctx->cache, wnode, NULL,
6160						    now, vevent->rdataset, 0,
6161						    NULL);
6162		}
6163		if (result == ISC_R_SUCCESS) {
6164			(void)dns_db_addrdataset(fctx->cache, wnode, NULL, now,
6165						 vevent->sigrdataset, 0, NULL);
6166		}
6167		if (wnode != NULL) {
6168			dns_db_detachnode(fctx->cache, &wnode);
6169		}
6170	}
6171
6172	result = ISC_R_SUCCESS;
6173
6174	/*
6175	 * Respond with an answer, positive or negative,
6176	 * as opposed to an error.  'node' must be non-NULL.
6177	 */
6178
6179	FCTX_ATTR_SET(fctx, FCTX_ATTR_HAVEANSWER);
6180
6181	if (hevent != NULL) {
6182		/*
6183		 * Negative results must be indicated in event->result.
6184		 */
6185		INSIST(hevent->rdataset != NULL);
6186		if (dns_rdataset_isassociated(hevent->rdataset) &&
6187		    NEGATIVE(hevent->rdataset))
6188		{
6189			INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
6190			       eresult == DNS_R_NCACHENXRRSET);
6191		}
6192		hevent->result = eresult;
6193		dns_name_copynf(vevent->name,
6194				dns_fixedname_name(&hevent->foundname));
6195		dns_db_attach(fctx->cache, &hevent->db);
6196		dns_db_transfernode(fctx->cache, &node, &hevent->node);
6197		clone_results(fctx);
6198	}
6199
6200noanswer_response:
6201	if (node != NULL) {
6202		dns_db_detachnode(fctx->cache, &node);
6203	}
6204
6205	UNLOCK(&res->buckets[bucketnum].lock);
6206	fctx_done(fctx, result, __LINE__); /* Locks bucket. */
6207
6208cleanup_event:
6209	INSIST(node == NULL);
6210	dns_message_detach(&message);
6211	isc_event_free(&event);
6212
6213	LOCK(&res->buckets[bucketnum].lock);
6214	bucket_empty = fctx_decreference(fctx);
6215	UNLOCK(&res->buckets[bucketnum].lock);
6216	if (bucket_empty) {
6217		empty_bucket(res);
6218	}
6219}
6220
6221static void
6222fctx_log(void *arg, int level, const char *fmt, ...) {
6223	char msgbuf[2048];
6224	va_list args;
6225	fetchctx_t *fctx = arg;
6226
6227	va_start(args, fmt);
6228	vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
6229	va_end(args);
6230
6231	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6232		      DNS_LOGMODULE_RESOLVER, level, "fctx %p(%s): %s", fctx,
6233		      fctx->info, msgbuf);
6234}
6235
6236static isc_result_t
6237findnoqname(fetchctx_t *fctx, dns_message_t *message, dns_name_t *name,
6238	    dns_rdatatype_t type, dns_name_t **noqnamep) {
6239	dns_rdataset_t *nrdataset, *next, *sigrdataset;
6240	dns_rdata_rrsig_t rrsig;
6241	isc_result_t result;
6242	unsigned int labels;
6243	dns_section_t section;
6244	dns_name_t *zonename;
6245	dns_fixedname_t fzonename;
6246	dns_name_t *closest;
6247	dns_fixedname_t fclosest;
6248	dns_name_t *nearest;
6249	dns_fixedname_t fnearest;
6250	dns_rdatatype_t found = dns_rdatatype_none;
6251	dns_name_t *noqname = NULL;
6252
6253	FCTXTRACE("findnoqname");
6254
6255	REQUIRE(noqnamep != NULL && *noqnamep == NULL);
6256
6257	/*
6258	 * Find the SIG for this rdataset, if we have it.
6259	 */
6260	for (sigrdataset = ISC_LIST_HEAD(name->list); sigrdataset != NULL;
6261	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
6262	{
6263		if (sigrdataset->type == dns_rdatatype_rrsig &&
6264		    sigrdataset->covers == type)
6265		{
6266			break;
6267		}
6268	}
6269
6270	if (sigrdataset == NULL) {
6271		return (ISC_R_NOTFOUND);
6272	}
6273
6274	labels = dns_name_countlabels(name);
6275
6276	for (result = dns_rdataset_first(sigrdataset); result == ISC_R_SUCCESS;
6277	     result = dns_rdataset_next(sigrdataset))
6278	{
6279		dns_rdata_t rdata = DNS_RDATA_INIT;
6280		dns_rdataset_current(sigrdataset, &rdata);
6281		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
6282		RUNTIME_CHECK(result == ISC_R_SUCCESS);
6283		/* Wildcard has rrsig.labels < labels - 1. */
6284		if (rrsig.labels + 1U >= labels) {
6285			continue;
6286		}
6287		break;
6288	}
6289
6290	if (result == ISC_R_NOMORE) {
6291		return (ISC_R_NOTFOUND);
6292	}
6293	if (result != ISC_R_SUCCESS) {
6294		return (result);
6295	}
6296
6297	zonename = dns_fixedname_initname(&fzonename);
6298	closest = dns_fixedname_initname(&fclosest);
6299	nearest = dns_fixedname_initname(&fnearest);
6300
6301#define NXND(x) ((x) == ISC_R_SUCCESS)
6302
6303	section = DNS_SECTION_AUTHORITY;
6304	for (result = dns_message_firstname(message, section);
6305	     result == ISC_R_SUCCESS;
6306	     result = dns_message_nextname(message, section))
6307	{
6308		dns_name_t *nsec = NULL;
6309		dns_message_currentname(message, section, &nsec);
6310		for (nrdataset = ISC_LIST_HEAD(nsec->list); nrdataset != NULL;
6311		     nrdataset = next)
6312		{
6313			bool data = false, exists = false;
6314			bool optout = false, unknown = false;
6315			bool setclosest = false;
6316			bool setnearest = false;
6317
6318			next = ISC_LIST_NEXT(nrdataset, link);
6319			if (nrdataset->type != dns_rdatatype_nsec &&
6320			    nrdataset->type != dns_rdatatype_nsec3)
6321			{
6322				continue;
6323			}
6324
6325			if (nrdataset->type == dns_rdatatype_nsec &&
6326			    NXND(dns_nsec_noexistnodata(
6327				    type, name, nsec, nrdataset, &exists, &data,
6328				    NULL, fctx_log, fctx)))
6329			{
6330				if (!exists) {
6331					noqname = nsec;
6332					found = dns_rdatatype_nsec;
6333				}
6334			}
6335
6336			if (nrdataset->type == dns_rdatatype_nsec3 &&
6337			    NXND(dns_nsec3_noexistnodata(
6338				    type, name, nsec, nrdataset, zonename,
6339				    &exists, &data, &optout, &unknown,
6340				    &setclosest, &setnearest, closest, nearest,
6341				    fctx_log, fctx)))
6342			{
6343				if (!exists && setnearest) {
6344					noqname = nsec;
6345					found = dns_rdatatype_nsec3;
6346				}
6347			}
6348		}
6349	}
6350	if (result == ISC_R_NOMORE) {
6351		result = ISC_R_SUCCESS;
6352	}
6353	if (noqname != NULL) {
6354		for (sigrdataset = ISC_LIST_HEAD(noqname->list);
6355		     sigrdataset != NULL;
6356		     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
6357		{
6358			if (sigrdataset->type == dns_rdatatype_rrsig &&
6359			    sigrdataset->covers == found)
6360			{
6361				break;
6362			}
6363		}
6364		if (sigrdataset != NULL) {
6365			*noqnamep = noqname;
6366		}
6367	}
6368	return (result);
6369}
6370
6371static isc_result_t
6372cache_name(fetchctx_t *fctx, dns_name_t *name, dns_message_t *message,
6373	   dns_adbaddrinfo_t *addrinfo, isc_stdtime_t now) {
6374	dns_rdataset_t *rdataset = NULL, *sigrdataset = NULL;
6375	dns_rdataset_t *addedrdataset = NULL;
6376	dns_rdataset_t *ardataset = NULL, *asigrdataset = NULL;
6377	dns_rdataset_t *valrdataset = NULL, *valsigrdataset = NULL;
6378	dns_dbnode_t *node = NULL, **anodep = NULL;
6379	dns_db_t **adbp = NULL;
6380	dns_name_t *aname = NULL;
6381	dns_resolver_t *res = fctx->res;
6382	bool need_validation = false;
6383	bool secure_domain = false;
6384	bool have_answer = false;
6385	isc_result_t result, eresult = ISC_R_SUCCESS;
6386	dns_fetchevent_t *event = NULL;
6387	unsigned int options;
6388	isc_task_t *task;
6389	bool fail;
6390	unsigned int valoptions = 0;
6391	bool checknta = true;
6392
6393	FCTXTRACE("cache_name");
6394
6395	/*
6396	 * The appropriate bucket lock must be held.
6397	 */
6398	task = res->buckets[fctx->bucketnum].task;
6399
6400	/*
6401	 * Is DNSSEC validation required for this name?
6402	 */
6403	if ((fctx->options & DNS_FETCHOPT_NONTA) != 0) {
6404		valoptions |= DNS_VALIDATOR_NONTA;
6405		checknta = false;
6406	}
6407
6408	if (res->view->enablevalidation) {
6409		result = issecuredomain(res->view, name, fctx->type, now,
6410					checknta, NULL, &secure_domain);
6411		if (result != ISC_R_SUCCESS) {
6412			return (result);
6413		}
6414	}
6415
6416	if ((fctx->options & DNS_FETCHOPT_NOCDFLAG) != 0) {
6417		valoptions |= DNS_VALIDATOR_NOCDFLAG;
6418	}
6419
6420	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
6421		need_validation = false;
6422	} else {
6423		need_validation = secure_domain;
6424	}
6425
6426	if (((name->attributes & DNS_NAMEATTR_ANSWER) != 0) &&
6427	    (!need_validation))
6428	{
6429		have_answer = true;
6430		event = ISC_LIST_HEAD(fctx->events);
6431
6432		if (event != NULL) {
6433			adbp = &event->db;
6434			aname = dns_fixedname_name(&event->foundname);
6435			dns_name_copynf(name, aname);
6436			anodep = &event->node;
6437			/*
6438			 * If this is an ANY, SIG or RRSIG query, we're not
6439			 * going to return any rdatasets, unless we encountered
6440			 * a CNAME or DNAME as "the answer".  In this case,
6441			 * we're going to return DNS_R_CNAME or DNS_R_DNAME
6442			 * and we must set up the rdatasets.
6443			 */
6444			if ((fctx->type != dns_rdatatype_any &&
6445			     fctx->type != dns_rdatatype_rrsig &&
6446			     fctx->type != dns_rdatatype_sig) ||
6447			    (name->attributes & DNS_NAMEATTR_CHAINING) != 0)
6448			{
6449				ardataset = event->rdataset;
6450				asigrdataset = event->sigrdataset;
6451			}
6452		}
6453	}
6454
6455	/*
6456	 * Find or create the cache node.
6457	 */
6458	node = NULL;
6459	result = dns_db_findnode(fctx->cache, name, true, &node);
6460	if (result != ISC_R_SUCCESS) {
6461		return (result);
6462	}
6463
6464	/*
6465	 * Cache or validate each cacheable rdataset.
6466	 */
6467	fail = ((fctx->res->options & DNS_RESOLVER_CHECKNAMESFAIL) != 0);
6468	for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
6469	     rdataset = ISC_LIST_NEXT(rdataset, link))
6470	{
6471		if (!CACHE(rdataset)) {
6472			continue;
6473		}
6474		if (CHECKNAMES(rdataset)) {
6475			char namebuf[DNS_NAME_FORMATSIZE];
6476			char typebuf[DNS_RDATATYPE_FORMATSIZE];
6477			char classbuf[DNS_RDATATYPE_FORMATSIZE];
6478
6479			dns_name_format(name, namebuf, sizeof(namebuf));
6480			dns_rdatatype_format(rdataset->type, typebuf,
6481					     sizeof(typebuf));
6482			dns_rdataclass_format(rdataset->rdclass, classbuf,
6483					      sizeof(classbuf));
6484			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6485				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
6486				      "check-names %s %s/%s/%s",
6487				      fail ? "failure" : "warning", namebuf,
6488				      typebuf, classbuf);
6489			if (fail) {
6490				if (ANSWER(rdataset)) {
6491					dns_db_detachnode(fctx->cache, &node);
6492					return (DNS_R_BADNAME);
6493				}
6494				continue;
6495			}
6496		}
6497
6498		/*
6499		 * Enforce the configure maximum cache TTL.
6500		 */
6501		if (rdataset->ttl > res->view->maxcachettl) {
6502			rdataset->ttl = res->view->maxcachettl;
6503		}
6504
6505		/*
6506		 * Enforce configured minimum cache TTL.
6507		 */
6508		if (rdataset->ttl < res->view->mincachettl) {
6509			rdataset->ttl = res->view->mincachettl;
6510		}
6511
6512		/*
6513		 * Mark the rdataset as being prefetch eligible.
6514		 */
6515		if (rdataset->ttl >= fctx->res->view->prefetch_eligible) {
6516			rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
6517		}
6518
6519		/*
6520		 * Find the SIG for this rdataset, if we have it.
6521		 */
6522		for (sigrdataset = ISC_LIST_HEAD(name->list);
6523		     sigrdataset != NULL;
6524		     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
6525		{
6526			if (sigrdataset->type == dns_rdatatype_rrsig &&
6527			    sigrdataset->covers == rdataset->type)
6528			{
6529				break;
6530			}
6531		}
6532
6533		/*
6534		 * If this RRset is in a secure domain, is in bailiwick,
6535		 * and is not glue, attempt DNSSEC validation.	(We do not
6536		 * attempt to validate glue or out-of-bailiwick data--even
6537		 * though there might be some performance benefit to doing
6538		 * so--because it makes it simpler and safer to ensure that
6539		 * records from a secure domain are only cached if validated
6540		 * within the context of a query to the domain that owns
6541		 * them.)
6542		 */
6543		if (secure_domain && rdataset->trust != dns_trust_glue &&
6544		    !EXTERNAL(rdataset))
6545		{
6546			dns_trust_t trust;
6547
6548			/*
6549			 * RRSIGs are validated as part of validating the
6550			 * type they cover.
6551			 */
6552			if (rdataset->type == dns_rdatatype_rrsig) {
6553				continue;
6554			}
6555
6556			if (sigrdataset == NULL && need_validation &&
6557			    !ANSWER(rdataset))
6558			{
6559				/*
6560				 * Ignore unrelated non-answer
6561				 * rdatasets that are missing signatures.
6562				 */
6563				continue;
6564			}
6565
6566			/*
6567			 * Normalize the rdataset and sigrdataset TTLs.
6568			 */
6569			if (sigrdataset != NULL) {
6570				rdataset->ttl = ISC_MIN(rdataset->ttl,
6571							sigrdataset->ttl);
6572				sigrdataset->ttl = rdataset->ttl;
6573			}
6574
6575			/*
6576			 * Mark the rdataset as being prefetch eligible.
6577			 */
6578			if (rdataset->ttl >= fctx->res->view->prefetch_eligible)
6579			{
6580				rdataset->attributes |=
6581					DNS_RDATASETATTR_PREFETCH;
6582			}
6583
6584			/*
6585			 * Cache this rdataset/sigrdataset pair as
6586			 * pending data.  Track whether it was additional
6587			 * or not. If this was a priming query, additional
6588			 * should be cached as glue.
6589			 */
6590			if (rdataset->trust == dns_trust_additional) {
6591				trust = dns_trust_pending_additional;
6592			} else {
6593				trust = dns_trust_pending_answer;
6594			}
6595
6596			rdataset->trust = trust;
6597			if (sigrdataset != NULL) {
6598				sigrdataset->trust = trust;
6599			}
6600			if (!need_validation || !ANSWER(rdataset)) {
6601				options = 0;
6602				if (ANSWER(rdataset) &&
6603				    rdataset->type != dns_rdatatype_rrsig)
6604				{
6605					isc_result_t tresult;
6606					dns_name_t *noqname = NULL;
6607					tresult = findnoqname(
6608						fctx, message, name,
6609						rdataset->type, &noqname);
6610					if (tresult == ISC_R_SUCCESS &&
6611					    noqname != NULL)
6612					{
6613						(void)dns_rdataset_addnoqname(
6614							rdataset, noqname);
6615					}
6616				}
6617				if ((fctx->options & DNS_FETCHOPT_PREFETCH) !=
6618				    0)
6619				{
6620					options = DNS_DBADD_PREFETCH;
6621				}
6622				if ((fctx->options & DNS_FETCHOPT_NOCACHED) !=
6623				    0)
6624				{
6625					options |= DNS_DBADD_FORCE;
6626				}
6627				addedrdataset = ardataset;
6628				result = dns_db_addrdataset(
6629					fctx->cache, node, NULL, now, rdataset,
6630					options, addedrdataset);
6631				if (result == DNS_R_UNCHANGED) {
6632					result = ISC_R_SUCCESS;
6633					if (!need_validation &&
6634					    ardataset != NULL &&
6635					    NEGATIVE(ardataset))
6636					{
6637						/*
6638						 * The answer in the cache is
6639						 * better than the answer we
6640						 * found, and is a negative
6641						 * cache entry, so we must set
6642						 * eresult appropriately.
6643						 */
6644						if (NXDOMAIN(ardataset)) {
6645							eresult =
6646								DNS_R_NCACHENXDOMAIN;
6647						} else {
6648							eresult =
6649								DNS_R_NCACHENXRRSET;
6650						}
6651						/*
6652						 * We have a negative response
6653						 * from the cache so don't
6654						 * attempt to add the RRSIG
6655						 * rrset.
6656						 */
6657						continue;
6658					}
6659				}
6660				if (result != ISC_R_SUCCESS) {
6661					break;
6662				}
6663				if (sigrdataset != NULL) {
6664					addedrdataset = asigrdataset;
6665					result = dns_db_addrdataset(
6666						fctx->cache, node, NULL, now,
6667						sigrdataset, options,
6668						addedrdataset);
6669					if (result == DNS_R_UNCHANGED) {
6670						result = ISC_R_SUCCESS;
6671					}
6672					if (result != ISC_R_SUCCESS) {
6673						break;
6674					}
6675				} else if (!ANSWER(rdataset)) {
6676					continue;
6677				}
6678			}
6679
6680			if (ANSWER(rdataset) && need_validation) {
6681				if (fctx->type != dns_rdatatype_any &&
6682				    fctx->type != dns_rdatatype_rrsig &&
6683				    fctx->type != dns_rdatatype_sig)
6684				{
6685					/*
6686					 * This is The Answer.  We will
6687					 * validate it, but first we cache
6688					 * the rest of the response - it may
6689					 * contain useful keys.
6690					 */
6691					INSIST(valrdataset == NULL &&
6692					       valsigrdataset == NULL);
6693					valrdataset = rdataset;
6694					valsigrdataset = sigrdataset;
6695				} else {
6696					/*
6697					 * This is one of (potentially)
6698					 * multiple answers to an ANY
6699					 * or SIG query.  To keep things
6700					 * simple, we just start the
6701					 * validator right away rather
6702					 * than caching first and
6703					 * having to remember which
6704					 * rdatasets needed validation.
6705					 */
6706					result = valcreate(
6707						fctx, message, addrinfo, name,
6708						rdataset->type, rdataset,
6709						sigrdataset, valoptions, task);
6710				}
6711			} else if (CHAINING(rdataset)) {
6712				if (rdataset->type == dns_rdatatype_cname) {
6713					eresult = DNS_R_CNAME;
6714				} else {
6715					INSIST(rdataset->type ==
6716					       dns_rdatatype_dname);
6717					eresult = DNS_R_DNAME;
6718				}
6719			}
6720		} else if (!EXTERNAL(rdataset)) {
6721			/*
6722			 * It's OK to cache this rdataset now.
6723			 */
6724			if (ANSWER(rdataset)) {
6725				addedrdataset = ardataset;
6726			} else if (ANSWERSIG(rdataset)) {
6727				addedrdataset = asigrdataset;
6728			} else {
6729				addedrdataset = NULL;
6730			}
6731			if (CHAINING(rdataset)) {
6732				if (rdataset->type == dns_rdatatype_cname) {
6733					eresult = DNS_R_CNAME;
6734				} else {
6735					INSIST(rdataset->type ==
6736					       dns_rdatatype_dname);
6737					eresult = DNS_R_DNAME;
6738				}
6739			}
6740			if (rdataset->trust == dns_trust_glue &&
6741			    (rdataset->type == dns_rdatatype_ns ||
6742			     (rdataset->type == dns_rdatatype_rrsig &&
6743			      rdataset->covers == dns_rdatatype_ns)))
6744			{
6745				/*
6746				 * If the trust level is 'dns_trust_glue'
6747				 * then we are adding data from a referral
6748				 * we got while executing the search algorithm.
6749				 * New referral data always takes precedence
6750				 * over the existing cache contents.
6751				 */
6752				options = DNS_DBADD_FORCE;
6753			} else if ((fctx->options & DNS_FETCHOPT_PREFETCH) != 0)
6754			{
6755				options = DNS_DBADD_PREFETCH;
6756			} else {
6757				options = 0;
6758			}
6759
6760			if (ANSWER(rdataset) &&
6761			    rdataset->type != dns_rdatatype_rrsig)
6762			{
6763				isc_result_t tresult;
6764				dns_name_t *noqname = NULL;
6765				tresult = findnoqname(fctx, message, name,
6766						      rdataset->type, &noqname);
6767				if (tresult == ISC_R_SUCCESS && noqname != NULL)
6768				{
6769					(void)dns_rdataset_addnoqname(rdataset,
6770								      noqname);
6771				}
6772			}
6773
6774			/*
6775			 * Now we can add the rdataset.
6776			 */
6777			result = dns_db_addrdataset(fctx->cache, node, NULL,
6778						    now, rdataset, options,
6779						    addedrdataset);
6780
6781			if (result == DNS_R_UNCHANGED) {
6782				if (ANSWER(rdataset) && ardataset != NULL &&
6783				    NEGATIVE(ardataset))
6784				{
6785					/*
6786					 * The answer in the cache is better
6787					 * than the answer we found, and is
6788					 * a negative cache entry, so we
6789					 * must set eresult appropriately.
6790					 */
6791					if (NXDOMAIN(ardataset)) {
6792						eresult = DNS_R_NCACHENXDOMAIN;
6793					} else {
6794						eresult = DNS_R_NCACHENXRRSET;
6795					}
6796				}
6797				result = ISC_R_SUCCESS;
6798			} else if (result != ISC_R_SUCCESS) {
6799				break;
6800			}
6801		}
6802	}
6803
6804	if (valrdataset != NULL) {
6805		dns_rdatatype_t vtype = fctx->type;
6806		if (CHAINING(valrdataset)) {
6807			if (valrdataset->type == dns_rdatatype_cname) {
6808				vtype = dns_rdatatype_cname;
6809			} else {
6810				vtype = dns_rdatatype_dname;
6811			}
6812		}
6813
6814		result = valcreate(fctx, message, addrinfo, name, vtype,
6815				   valrdataset, valsigrdataset, valoptions,
6816				   task);
6817	}
6818
6819	if (result == ISC_R_SUCCESS && have_answer) {
6820		FCTX_ATTR_SET(fctx, FCTX_ATTR_HAVEANSWER);
6821		if (event != NULL) {
6822			/*
6823			 * Negative results must be indicated in event->result.
6824			 */
6825			if (dns_rdataset_isassociated(event->rdataset) &&
6826			    NEGATIVE(event->rdataset))
6827			{
6828				INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
6829				       eresult == DNS_R_NCACHENXRRSET);
6830			}
6831			event->result = eresult;
6832			if (adbp != NULL && *adbp != NULL) {
6833				if (anodep != NULL && *anodep != NULL) {
6834					dns_db_detachnode(*adbp, anodep);
6835				}
6836				dns_db_detach(adbp);
6837			}
6838			dns_db_attach(fctx->cache, adbp);
6839			dns_db_transfernode(fctx->cache, &node, anodep);
6840			clone_results(fctx);
6841		}
6842	}
6843
6844	if (node != NULL) {
6845		dns_db_detachnode(fctx->cache, &node);
6846	}
6847
6848	return (result);
6849}
6850
6851static isc_result_t
6852cache_message(fetchctx_t *fctx, dns_message_t *message,
6853	      dns_adbaddrinfo_t *addrinfo, isc_stdtime_t now) {
6854	isc_result_t result;
6855	dns_section_t section;
6856	dns_name_t *name;
6857
6858	FCTXTRACE("cache_message");
6859
6860	FCTX_ATTR_CLR(fctx, FCTX_ATTR_WANTCACHE);
6861
6862	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
6863
6864	for (section = DNS_SECTION_ANSWER; section <= DNS_SECTION_ADDITIONAL;
6865	     section++)
6866	{
6867		result = dns_message_firstname(message, section);
6868		while (result == ISC_R_SUCCESS) {
6869			name = NULL;
6870			dns_message_currentname(message, section, &name);
6871			if ((name->attributes & DNS_NAMEATTR_CACHE) != 0) {
6872				result = cache_name(fctx, name, message,
6873						    addrinfo, now);
6874				if (result != ISC_R_SUCCESS) {
6875					break;
6876				}
6877			}
6878			result = dns_message_nextname(message, section);
6879		}
6880		if (result != ISC_R_NOMORE) {
6881			break;
6882		}
6883	}
6884	if (result == ISC_R_NOMORE) {
6885		result = ISC_R_SUCCESS;
6886	}
6887
6888	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
6889
6890	return (result);
6891}
6892
6893/*
6894 * Do what dns_ncache_addoptout() does, and then compute an appropriate eresult.
6895 */
6896static isc_result_t
6897ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node,
6898		  dns_rdatatype_t covers, isc_stdtime_t now, dns_ttl_t minttl,
6899		  dns_ttl_t maxttl, bool optout, bool secure,
6900		  dns_rdataset_t *ardataset, isc_result_t *eresultp) {
6901	isc_result_t result;
6902	dns_rdataset_t rdataset;
6903
6904	if (ardataset == NULL) {
6905		dns_rdataset_init(&rdataset);
6906		ardataset = &rdataset;
6907	}
6908	if (secure) {
6909		result = dns_ncache_addoptout(message, cache, node, covers, now,
6910					      minttl, maxttl, optout,
6911					      ardataset);
6912	} else {
6913		result = dns_ncache_add(message, cache, node, covers, now,
6914					minttl, maxttl, ardataset);
6915	}
6916	if (result == DNS_R_UNCHANGED || result == ISC_R_SUCCESS) {
6917		/*
6918		 * If the cache now contains a negative entry and we
6919		 * care about whether it is DNS_R_NCACHENXDOMAIN or
6920		 * DNS_R_NCACHENXRRSET then extract it.
6921		 */
6922		if (NEGATIVE(ardataset)) {
6923			/*
6924			 * The cache data is a negative cache entry.
6925			 */
6926			if (NXDOMAIN(ardataset)) {
6927				*eresultp = DNS_R_NCACHENXDOMAIN;
6928			} else {
6929				*eresultp = DNS_R_NCACHENXRRSET;
6930			}
6931		} else {
6932			/*
6933			 * Either we don't care about the nature of the
6934			 * cache rdataset (because no fetch is interested
6935			 * in the outcome), or the cache rdataset is not
6936			 * a negative cache entry.  Whichever case it is,
6937			 * we can return success.
6938			 *
6939			 * XXXRTH  There's a CNAME/DNAME problem here.
6940			 */
6941			*eresultp = ISC_R_SUCCESS;
6942		}
6943		result = ISC_R_SUCCESS;
6944	}
6945	if (ardataset == &rdataset && dns_rdataset_isassociated(ardataset)) {
6946		dns_rdataset_disassociate(ardataset);
6947	}
6948
6949	return (result);
6950}
6951
6952static isc_result_t
6953ncache_message(fetchctx_t *fctx, dns_message_t *message,
6954	       dns_adbaddrinfo_t *addrinfo, dns_rdatatype_t covers,
6955	       isc_stdtime_t now) {
6956	isc_result_t result, eresult;
6957	dns_name_t *name;
6958	dns_resolver_t *res;
6959	dns_db_t **adbp;
6960	dns_dbnode_t *node, **anodep;
6961	dns_rdataset_t *ardataset;
6962	bool need_validation, secure_domain;
6963	dns_name_t *aname;
6964	dns_fetchevent_t *event;
6965	uint32_t ttl;
6966	unsigned int valoptions = 0;
6967	bool checknta = true;
6968
6969	FCTXTRACE("ncache_message");
6970
6971	FCTX_ATTR_CLR(fctx, FCTX_ATTR_WANTNCACHE);
6972
6973	res = fctx->res;
6974	need_validation = false;
6975	POST(need_validation);
6976	secure_domain = false;
6977	eresult = ISC_R_SUCCESS;
6978	name = &fctx->name;
6979	node = NULL;
6980
6981	/*
6982	 * XXXMPA remove when we follow cnames and adjust the setting
6983	 * of FCTX_ATTR_WANTNCACHE in rctx_answer_none().
6984	 */
6985	INSIST(message->counts[DNS_SECTION_ANSWER] == 0);
6986
6987	/*
6988	 * Is DNSSEC validation required for this name?
6989	 */
6990	if ((fctx->options & DNS_FETCHOPT_NONTA) != 0) {
6991		valoptions |= DNS_VALIDATOR_NONTA;
6992		checknta = false;
6993	}
6994
6995	if (fctx->res->view->enablevalidation) {
6996		result = issecuredomain(res->view, name, fctx->type, now,
6997					checknta, NULL, &secure_domain);
6998		if (result != ISC_R_SUCCESS) {
6999			return (result);
7000		}
7001	}
7002
7003	if ((fctx->options & DNS_FETCHOPT_NOCDFLAG) != 0) {
7004		valoptions |= DNS_VALIDATOR_NOCDFLAG;
7005	}
7006
7007	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
7008		need_validation = false;
7009	} else {
7010		need_validation = secure_domain;
7011	}
7012
7013	if (secure_domain) {
7014		/*
7015		 * Mark all rdatasets as pending.
7016		 */
7017		dns_rdataset_t *trdataset;
7018		dns_name_t *tname;
7019
7020		result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
7021		while (result == ISC_R_SUCCESS) {
7022			tname = NULL;
7023			dns_message_currentname(message, DNS_SECTION_AUTHORITY,
7024						&tname);
7025			for (trdataset = ISC_LIST_HEAD(tname->list);
7026			     trdataset != NULL;
7027			     trdataset = ISC_LIST_NEXT(trdataset, link))
7028			{
7029				trdataset->trust = dns_trust_pending_answer;
7030			}
7031			result = dns_message_nextname(message,
7032						      DNS_SECTION_AUTHORITY);
7033		}
7034		if (result != ISC_R_NOMORE) {
7035			return (result);
7036		}
7037	}
7038
7039	if (need_validation) {
7040		/*
7041		 * Do negative response validation.
7042		 */
7043		result = valcreate(fctx, message, addrinfo, name, fctx->type,
7044				   NULL, NULL, valoptions,
7045				   res->buckets[fctx->bucketnum].task);
7046		/*
7047		 * If validation is necessary, return now.  Otherwise continue
7048		 * to process the message, letting the validation complete
7049		 * in its own good time.
7050		 */
7051		return (result);
7052	}
7053
7054	LOCK(&res->buckets[fctx->bucketnum].lock);
7055
7056	adbp = NULL;
7057	aname = NULL;
7058	anodep = NULL;
7059	ardataset = NULL;
7060	if (!HAVE_ANSWER(fctx)) {
7061		event = ISC_LIST_HEAD(fctx->events);
7062		if (event != NULL) {
7063			adbp = &event->db;
7064			aname = dns_fixedname_name(&event->foundname);
7065			dns_name_copynf(name, aname);
7066			anodep = &event->node;
7067			ardataset = event->rdataset;
7068		}
7069	} else {
7070		event = NULL;
7071	}
7072
7073	result = dns_db_findnode(fctx->cache, name, true, &node);
7074	if (result != ISC_R_SUCCESS) {
7075		goto unlock;
7076	}
7077
7078	/*
7079	 * If we are asking for a SOA record set the cache time
7080	 * to zero to facilitate locating the containing zone of
7081	 * a arbitrary zone.
7082	 */
7083	ttl = fctx->res->view->maxncachettl;
7084	if (fctx->type == dns_rdatatype_soa && covers == dns_rdatatype_any &&
7085	    fctx->res->zero_no_soa_ttl)
7086	{
7087		ttl = 0;
7088	}
7089
7090	result = ncache_adderesult(message, fctx->cache, node, covers, now,
7091				   fctx->res->view->minncachettl, ttl, false,
7092				   false, ardataset, &eresult);
7093	if (result != ISC_R_SUCCESS) {
7094		goto unlock;
7095	}
7096
7097	if (!HAVE_ANSWER(fctx)) {
7098		FCTX_ATTR_SET(fctx, FCTX_ATTR_HAVEANSWER);
7099		if (event != NULL) {
7100			event->result = eresult;
7101			if (adbp != NULL && *adbp != NULL) {
7102				if (anodep != NULL && *anodep != NULL) {
7103					dns_db_detachnode(*adbp, anodep);
7104				}
7105				dns_db_detach(adbp);
7106			}
7107			dns_db_attach(fctx->cache, adbp);
7108			dns_db_transfernode(fctx->cache, &node, anodep);
7109			clone_results(fctx);
7110		}
7111	}
7112
7113unlock:
7114	UNLOCK(&res->buckets[fctx->bucketnum].lock);
7115
7116	if (node != NULL) {
7117		dns_db_detachnode(fctx->cache, &node);
7118	}
7119
7120	return (result);
7121}
7122
7123static void
7124mark_related(dns_name_t *name, dns_rdataset_t *rdataset, bool external,
7125	     bool gluing) {
7126	name->attributes |= DNS_NAMEATTR_CACHE;
7127	if (gluing) {
7128		rdataset->trust = dns_trust_glue;
7129		/*
7130		 * Glue with 0 TTL causes problems.  We force the TTL to
7131		 * 1 second to prevent this.
7132		 */
7133		if (rdataset->ttl == 0) {
7134			rdataset->ttl = 1;
7135		}
7136	} else {
7137		rdataset->trust = dns_trust_additional;
7138	}
7139	/*
7140	 * Avoid infinite loops by only marking new rdatasets.
7141	 */
7142	if (!CACHE(rdataset)) {
7143		name->attributes |= DNS_NAMEATTR_CHASE;
7144		rdataset->attributes |= DNS_RDATASETATTR_CHASE;
7145	}
7146	rdataset->attributes |= DNS_RDATASETATTR_CACHE;
7147	if (external) {
7148		rdataset->attributes |= DNS_RDATASETATTR_EXTERNAL;
7149	}
7150}
7151
7152/*
7153 * Returns true if 'name' is external to the namespace for which
7154 * the server being queried can answer, either because it's not a
7155 * subdomain or because it's below a forward declaration or a
7156 * locally served zone.
7157 */
7158static bool
7159name_external(const dns_name_t *name, dns_rdatatype_t type, fetchctx_t *fctx) {
7160	isc_result_t result;
7161	dns_forwarders_t *forwarders = NULL;
7162	dns_fixedname_t fixed, zfixed;
7163	dns_name_t *fname = dns_fixedname_initname(&fixed);
7164	dns_name_t *zfname = dns_fixedname_initname(&zfixed);
7165	dns_name_t *apex = NULL;
7166	dns_name_t suffix;
7167	dns_zone_t *zone = NULL;
7168	unsigned int labels;
7169	dns_namereln_t rel;
7170
7171	apex = (ISDUALSTACK(fctx->addrinfo) || !ISFORWARDER(fctx->addrinfo))
7172		       ? &fctx->domain
7173		       : fctx->fwdname;
7174
7175	/*
7176	 * The name is outside the queried namespace.
7177	 */
7178	rel = dns_name_fullcompare(name, apex, &(int){ 0 },
7179				   &(unsigned int){ 0U });
7180	if (rel != dns_namereln_subdomain && rel != dns_namereln_equal) {
7181		return (true);
7182	}
7183
7184	/*
7185	 * If the record lives in the parent zone, adjust the name so we
7186	 * look for the correct zone or forward clause.
7187	 */
7188	labels = dns_name_countlabels(name);
7189	if (dns_rdatatype_atparent(type) && labels > 1U) {
7190		dns_name_init(&suffix, NULL);
7191		dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
7192		name = &suffix;
7193	} else if (rel == dns_namereln_equal) {
7194		/* If 'name' is 'apex', no further checking is needed. */
7195		return (false);
7196	}
7197
7198	/*
7199	 * If there is a locally served zone between 'apex' and 'name'
7200	 * then don't cache.
7201	 */
7202	LOCK(&fctx->res->view->lock);
7203	if (fctx->res->view->zonetable != NULL) {
7204		unsigned int options = DNS_ZTFIND_NOEXACT | DNS_ZTFIND_MIRROR;
7205		result = dns_zt_find(fctx->res->view->zonetable, name, options,
7206				     zfname, &zone);
7207		if (zone != NULL) {
7208			dns_zone_detach(&zone);
7209		}
7210		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7211			if (dns_name_fullcompare(zfname, apex, &(int){ 0 },
7212						 &(unsigned int){ 0U }) ==
7213			    dns_namereln_subdomain)
7214			{
7215				UNLOCK(&fctx->res->view->lock);
7216				return (true);
7217			}
7218		}
7219	}
7220	UNLOCK(&fctx->res->view->lock);
7221
7222	/*
7223	 * Look for a forward declaration below 'name'.
7224	 */
7225	result = dns_fwdtable_find(fctx->res->view->fwdtable, name, fname,
7226				   &forwarders);
7227
7228	if (ISFORWARDER(fctx->addrinfo)) {
7229		/*
7230		 * See if the forwarder declaration is better.
7231		 */
7232		if (result == ISC_R_SUCCESS) {
7233			return (!dns_name_equal(fname, fctx->fwdname));
7234		}
7235
7236		/*
7237		 * If the lookup failed, the configuration must have
7238		 * changed: play it safe and don't cache.
7239		 */
7240		return (true);
7241	} else if (result == ISC_R_SUCCESS &&
7242		   forwarders->fwdpolicy == dns_fwdpolicy_only &&
7243		   !ISC_LIST_EMPTY(forwarders->fwdrs))
7244	{
7245		/*
7246		 * If 'name' is covered by a 'forward only' clause then we
7247		 * can't cache this repsonse.
7248		 */
7249		return (true);
7250	}
7251
7252	return (false);
7253}
7254
7255static isc_result_t
7256check_section(void *arg, const dns_name_t *addname, dns_rdatatype_t type,
7257	      dns_section_t section) {
7258	respctx_t *rctx = arg;
7259	fetchctx_t *fctx = rctx->fctx;
7260	isc_result_t result;
7261	dns_name_t *name = NULL;
7262	dns_rdataset_t *rdataset = NULL;
7263	bool external;
7264	dns_rdatatype_t rtype;
7265	bool gluing;
7266
7267	REQUIRE(VALID_FCTX(fctx));
7268
7269#if CHECK_FOR_GLUE_IN_ANSWER
7270	if (section == DNS_SECTION_ANSWER && type != dns_rdatatype_a) {
7271		return (ISC_R_SUCCESS);
7272	}
7273#endif /* if CHECK_FOR_GLUE_IN_ANSWER */
7274
7275	gluing = (GLUING(fctx) || (fctx->type == dns_rdatatype_ns &&
7276				   dns_name_equal(&fctx->name, dns_rootname)));
7277
7278	result = dns_message_findname(rctx->query->rmessage, section, addname,
7279				      dns_rdatatype_any, 0, &name, NULL);
7280	if (result == ISC_R_SUCCESS) {
7281		external = name_external(name, type, fctx);
7282		if (type == dns_rdatatype_a) {
7283			for (rdataset = ISC_LIST_HEAD(name->list);
7284			     rdataset != NULL;
7285			     rdataset = ISC_LIST_NEXT(rdataset, link))
7286			{
7287				if (rdataset->type == dns_rdatatype_rrsig) {
7288					rtype = rdataset->covers;
7289				} else {
7290					rtype = rdataset->type;
7291				}
7292				if (rtype == dns_rdatatype_a ||
7293				    rtype == dns_rdatatype_aaaa)
7294				{
7295					mark_related(name, rdataset, external,
7296						     gluing);
7297				}
7298			}
7299		} else {
7300			result = dns_message_findtype(name, type, 0, &rdataset);
7301			if (result == ISC_R_SUCCESS) {
7302				mark_related(name, rdataset, external, gluing);
7303				/*
7304				 * Do we have its SIG too?
7305				 */
7306				rdataset = NULL;
7307				result = dns_message_findtype(
7308					name, dns_rdatatype_rrsig, type,
7309					&rdataset);
7310				if (result == ISC_R_SUCCESS) {
7311					mark_related(name, rdataset, external,
7312						     gluing);
7313				}
7314			}
7315		}
7316	}
7317
7318	return (ISC_R_SUCCESS);
7319}
7320
7321static isc_result_t
7322check_related(void *arg, const dns_name_t *addname, dns_rdatatype_t type) {
7323	return (check_section(arg, addname, type, DNS_SECTION_ADDITIONAL));
7324}
7325
7326#ifndef CHECK_FOR_GLUE_IN_ANSWER
7327#define CHECK_FOR_GLUE_IN_ANSWER 0
7328#endif /* ifndef CHECK_FOR_GLUE_IN_ANSWER */
7329
7330#if CHECK_FOR_GLUE_IN_ANSWER
7331static isc_result_t
7332check_answer(void *arg, const dns_name_t *addname, dns_rdatatype_t type) {
7333	return (check_section(arg, addname, type, DNS_SECTION_ANSWER));
7334}
7335#endif /* if CHECK_FOR_GLUE_IN_ANSWER */
7336
7337static bool
7338is_answeraddress_allowed(dns_view_t *view, dns_name_t *name,
7339			 dns_rdataset_t *rdataset) {
7340	isc_result_t result;
7341	dns_rdata_t rdata = DNS_RDATA_INIT;
7342	struct in_addr ina;
7343	struct in6_addr in6a;
7344	isc_netaddr_t netaddr;
7345	char addrbuf[ISC_NETADDR_FORMATSIZE];
7346	char namebuf[DNS_NAME_FORMATSIZE];
7347	char classbuf[64];
7348	char typebuf[64];
7349	int match;
7350
7351	/* By default, we allow any addresses. */
7352	if (view->denyansweracl == NULL) {
7353		return (true);
7354	}
7355
7356	/*
7357	 * If the owner name matches one in the exclusion list, either exactly
7358	 * or partially, allow it.
7359	 */
7360	if (view->answeracl_exclude != NULL) {
7361		dns_rbtnode_t *node = NULL;
7362
7363		result = dns_rbt_findnode(view->answeracl_exclude, name, NULL,
7364					  &node, NULL, 0, NULL, NULL);
7365
7366		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7367			return (true);
7368		}
7369	}
7370
7371	/*
7372	 * Otherwise, search the filter list for a match for each address
7373	 * record.  If a match is found, the address should be filtered,
7374	 * so should the entire answer.
7375	 */
7376	for (result = dns_rdataset_first(rdataset); result == ISC_R_SUCCESS;
7377	     result = dns_rdataset_next(rdataset))
7378	{
7379		dns_rdata_reset(&rdata);
7380		dns_rdataset_current(rdataset, &rdata);
7381		if (rdataset->type == dns_rdatatype_a) {
7382			INSIST(rdata.length == sizeof(ina.s_addr));
7383			memmove(&ina.s_addr, rdata.data, sizeof(ina.s_addr));
7384			isc_netaddr_fromin(&netaddr, &ina);
7385		} else {
7386			INSIST(rdata.length == sizeof(in6a.s6_addr));
7387			memmove(in6a.s6_addr, rdata.data, sizeof(in6a.s6_addr));
7388			isc_netaddr_fromin6(&netaddr, &in6a);
7389		}
7390
7391		result = dns_acl_match(&netaddr, NULL, view->denyansweracl,
7392				       &view->aclenv, &match, NULL);
7393		if (result == ISC_R_SUCCESS && match > 0) {
7394			isc_netaddr_format(&netaddr, addrbuf, sizeof(addrbuf));
7395			dns_name_format(name, namebuf, sizeof(namebuf));
7396			dns_rdatatype_format(rdataset->type, typebuf,
7397					     sizeof(typebuf));
7398			dns_rdataclass_format(rdataset->rdclass, classbuf,
7399					      sizeof(classbuf));
7400			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7401				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7402				      "answer address %s denied for %s/%s/%s",
7403				      addrbuf, namebuf, typebuf, classbuf);
7404			return (false);
7405		}
7406	}
7407
7408	return (true);
7409}
7410
7411static bool
7412is_answertarget_allowed(fetchctx_t *fctx, dns_name_t *qname, dns_name_t *rname,
7413			dns_rdataset_t *rdataset, bool *chainingp) {
7414	isc_result_t result;
7415	dns_rbtnode_t *node = NULL;
7416	char qnamebuf[DNS_NAME_FORMATSIZE];
7417	char tnamebuf[DNS_NAME_FORMATSIZE];
7418	char classbuf[64];
7419	char typebuf[64];
7420	dns_name_t *tname = NULL;
7421	dns_rdata_cname_t cname;
7422	dns_rdata_dname_t dname;
7423	dns_view_t *view = fctx->res->view;
7424	dns_rdata_t rdata = DNS_RDATA_INIT;
7425	unsigned int nlabels;
7426	dns_fixedname_t fixed;
7427	dns_name_t prefix;
7428	int order;
7429
7430	REQUIRE(rdataset != NULL);
7431	REQUIRE(rdataset->type == dns_rdatatype_cname ||
7432		rdataset->type == dns_rdatatype_dname);
7433
7434	/*
7435	 * By default, we allow any target name.
7436	 * If newqname != NULL we also need to extract the newqname.
7437	 */
7438	if (chainingp == NULL && view->denyanswernames == NULL) {
7439		return (true);
7440	}
7441
7442	result = dns_rdataset_first(rdataset);
7443	RUNTIME_CHECK(result == ISC_R_SUCCESS);
7444	dns_rdataset_current(rdataset, &rdata);
7445	switch (rdataset->type) {
7446	case dns_rdatatype_cname:
7447		result = dns_rdata_tostruct(&rdata, &cname, NULL);
7448		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7449		tname = &cname.cname;
7450		break;
7451	case dns_rdatatype_dname:
7452		if (dns_name_fullcompare(qname, rname, &order, &nlabels) !=
7453		    dns_namereln_subdomain)
7454		{
7455			return (true);
7456		}
7457		result = dns_rdata_tostruct(&rdata, &dname, NULL);
7458		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7459		dns_name_init(&prefix, NULL);
7460		tname = dns_fixedname_initname(&fixed);
7461		nlabels = dns_name_countlabels(rname);
7462		dns_name_split(qname, nlabels, &prefix, NULL);
7463		result = dns_name_concatenate(&prefix, &dname.dname, tname,
7464					      NULL);
7465		if (result == DNS_R_NAMETOOLONG) {
7466			if (chainingp != NULL) {
7467				*chainingp = true;
7468			}
7469			return (true);
7470		}
7471		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7472		break;
7473	default:
7474		UNREACHABLE();
7475	}
7476
7477	if (chainingp != NULL) {
7478		*chainingp = true;
7479	}
7480
7481	if (view->denyanswernames == NULL) {
7482		return (true);
7483	}
7484
7485	/*
7486	 * If the owner name matches one in the exclusion list, either exactly
7487	 * or partially, allow it.
7488	 */
7489	if (view->answernames_exclude != NULL) {
7490		result = dns_rbt_findnode(view->answernames_exclude, qname,
7491					  NULL, &node, NULL, 0, NULL, NULL);
7492		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7493			return (true);
7494		}
7495	}
7496
7497	/*
7498	 * If the target name is a subdomain of the search domain, allow it.
7499	 *
7500	 * Note that if BIND is configured as a forwarding DNS server, the
7501	 * search domain will always match the root domain ("."), so we
7502	 * must also check whether forwarding is enabled so that filters
7503	 * can be applied; see GL #1574.
7504	 */
7505	if (!fctx->forwarding && dns_name_issubdomain(tname, &fctx->domain)) {
7506		return (true);
7507	}
7508
7509	/*
7510	 * Otherwise, apply filters.
7511	 */
7512	result = dns_rbt_findnode(view->denyanswernames, tname, NULL, &node,
7513				  NULL, 0, NULL, NULL);
7514	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7515		dns_name_format(qname, qnamebuf, sizeof(qnamebuf));
7516		dns_name_format(tname, tnamebuf, sizeof(tnamebuf));
7517		dns_rdatatype_format(rdataset->type, typebuf, sizeof(typebuf));
7518		dns_rdataclass_format(view->rdclass, classbuf,
7519				      sizeof(classbuf));
7520		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7521			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7522			      "%s target %s denied for %s/%s", typebuf,
7523			      tnamebuf, qnamebuf, classbuf);
7524		return (false);
7525	}
7526
7527	return (true);
7528}
7529
7530static void
7531trim_ns_ttl(fetchctx_t *fctx, dns_name_t *name, dns_rdataset_t *rdataset) {
7532	char ns_namebuf[DNS_NAME_FORMATSIZE];
7533	char namebuf[DNS_NAME_FORMATSIZE];
7534	char tbuf[DNS_RDATATYPE_FORMATSIZE];
7535
7536	if (fctx->ns_ttl_ok && rdataset->ttl > fctx->ns_ttl) {
7537		dns_name_format(name, ns_namebuf, sizeof(ns_namebuf));
7538		dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
7539		dns_rdatatype_format(fctx->type, tbuf, sizeof(tbuf));
7540
7541		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7542			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
7543			      "fctx %p: trimming ttl of %s/NS for %s/%s: "
7544			      "%u -> %u",
7545			      fctx, ns_namebuf, namebuf, tbuf, rdataset->ttl,
7546			      fctx->ns_ttl);
7547		rdataset->ttl = fctx->ns_ttl;
7548	}
7549}
7550
7551static bool
7552validinanswer(dns_rdataset_t *rdataset, fetchctx_t *fctx) {
7553	if (rdataset->type == dns_rdatatype_nsec3) {
7554		/*
7555		 * NSEC3 records are not allowed to
7556		 * appear in the answer section.
7557		 */
7558		log_formerr(fctx, "NSEC3 in answer");
7559		return (false);
7560	}
7561	if (rdataset->type == dns_rdatatype_tkey) {
7562		/*
7563		 * TKEY is not a valid record in a
7564		 * response to any query we can make.
7565		 */
7566		log_formerr(fctx, "TKEY in answer");
7567		return (false);
7568	}
7569	if (rdataset->rdclass != fctx->res->rdclass) {
7570		log_formerr(fctx, "Mismatched class in answer");
7571		return (false);
7572	}
7573	return (true);
7574}
7575
7576static void
7577fctx_increference(fetchctx_t *fctx) {
7578	REQUIRE(VALID_FCTX(fctx));
7579
7580	isc_refcount_increment0(&fctx->references);
7581}
7582
7583/*
7584 * Requires bucket lock to be held.
7585 */
7586static bool
7587fctx_decreference(fetchctx_t *fctx) {
7588	bool bucket_empty = false;
7589
7590	REQUIRE(VALID_FCTX(fctx));
7591
7592	if (isc_refcount_decrement(&fctx->references) == 1) {
7593		/*
7594		 * No one cares about the result of this fetch anymore.
7595		 */
7596		if (fctx->pending == 0 && fctx->nqueries == 0 &&
7597		    ISC_LIST_EMPTY(fctx->validators) && SHUTTINGDOWN(fctx))
7598		{
7599			/*
7600			 * This fctx is already shutdown; we were just
7601			 * waiting for the last reference to go away.
7602			 */
7603			bucket_empty = fctx_unlink(fctx);
7604			fctx_destroy(fctx);
7605		} else {
7606			/*
7607			 * Initiate shutdown.
7608			 */
7609			fctx_shutdown(fctx);
7610		}
7611	}
7612	return (bucket_empty);
7613}
7614
7615static void
7616resume_dslookup(isc_task_t *task, isc_event_t *event) {
7617	dns_fetchevent_t *fevent;
7618	dns_resolver_t *res;
7619	fetchctx_t *fctx;
7620	isc_result_t result;
7621	uint32_t bucketnum;
7622	bool bucket_empty;
7623	dns_rdataset_t nameservers;
7624	dns_fixedname_t fixed;
7625	dns_name_t *domain;
7626
7627	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
7628	fevent = (dns_fetchevent_t *)event;
7629	fctx = event->ev_arg;
7630	REQUIRE(VALID_FCTX(fctx));
7631	res = fctx->res;
7632	bucketnum = fctx->bucketnum;
7633
7634	UNUSED(task);
7635	FCTXTRACE("resume_dslookup");
7636
7637	if (fevent->node != NULL) {
7638		dns_db_detachnode(fevent->db, &fevent->node);
7639	}
7640	if (fevent->db != NULL) {
7641		dns_db_detach(&fevent->db);
7642	}
7643
7644	dns_rdataset_init(&nameservers);
7645
7646	/*
7647	 * Note: fevent->rdataset must be disassociated and
7648	 * isc_event_free(&event) be called before resuming
7649	 * processing of the 'fctx' to prevent use-after-free.
7650	 * 'fevent' is set to NULL so as to not have a dangling
7651	 * pointer.
7652	 */
7653	if (fevent->result == ISC_R_CANCELED) {
7654		if (dns_rdataset_isassociated(fevent->rdataset)) {
7655			dns_rdataset_disassociate(fevent->rdataset);
7656		}
7657		fevent = NULL;
7658		isc_event_free(&event);
7659
7660		dns_resolver_destroyfetch(&fctx->nsfetch);
7661		fctx_done(fctx, ISC_R_CANCELED, __LINE__);
7662	} else if (fevent->result == ISC_R_SUCCESS) {
7663		FCTXTRACE("resuming DS lookup");
7664
7665		dns_resolver_destroyfetch(&fctx->nsfetch);
7666		if (dns_rdataset_isassociated(&fctx->nameservers)) {
7667			dns_rdataset_disassociate(&fctx->nameservers);
7668		}
7669		dns_rdataset_clone(fevent->rdataset, &fctx->nameservers);
7670		fctx->ns_ttl = fctx->nameservers.ttl;
7671		fctx->ns_ttl_ok = true;
7672		log_ns_ttl(fctx, "resume_dslookup");
7673
7674		if (dns_rdataset_isassociated(fevent->rdataset)) {
7675			dns_rdataset_disassociate(fevent->rdataset);
7676		}
7677		fevent = NULL;
7678		isc_event_free(&event);
7679
7680		fcount_decr(fctx);
7681		dns_name_free(&fctx->domain, fctx->mctx);
7682		dns_name_init(&fctx->domain, NULL);
7683		dns_name_dup(&fctx->nsname, fctx->mctx, &fctx->domain);
7684		result = fcount_incr(fctx, true);
7685		if (result != ISC_R_SUCCESS) {
7686			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7687			goto cleanup;
7688		}
7689		/*
7690		 * Try again.
7691		 */
7692		fctx_try(fctx, true, false);
7693	} else {
7694		unsigned int n;
7695		dns_rdataset_t *nsrdataset = NULL;
7696
7697		/*
7698		 * Retrieve state from fctx->nsfetch before we destroy it.
7699		 */
7700		domain = dns_fixedname_initname(&fixed);
7701		dns_name_copynf(&fctx->nsfetch->private->domain, domain);
7702		if (dns_name_equal(&fctx->nsname, domain)) {
7703			if (dns_rdataset_isassociated(fevent->rdataset)) {
7704				dns_rdataset_disassociate(fevent->rdataset);
7705			}
7706			fevent = NULL;
7707			isc_event_free(&event);
7708
7709			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
7710			dns_resolver_destroyfetch(&fctx->nsfetch);
7711			goto cleanup;
7712		}
7713		if (dns_rdataset_isassociated(
7714			    &fctx->nsfetch->private->nameservers))
7715		{
7716			dns_rdataset_clone(&fctx->nsfetch->private->nameservers,
7717					   &nameservers);
7718			nsrdataset = &nameservers;
7719		} else {
7720			domain = NULL;
7721		}
7722		dns_resolver_destroyfetch(&fctx->nsfetch);
7723		n = dns_name_countlabels(&fctx->nsname);
7724		dns_name_getlabelsequence(&fctx->nsname, 1, n - 1,
7725					  &fctx->nsname);
7726
7727		if (dns_rdataset_isassociated(fevent->rdataset)) {
7728			dns_rdataset_disassociate(fevent->rdataset);
7729		}
7730		fevent = NULL;
7731		isc_event_free(&event);
7732
7733		FCTXTRACE("continuing to look for parent's NS records");
7734
7735		result = dns_resolver_createfetch(
7736			fctx->res, &fctx->nsname, dns_rdatatype_ns, domain,
7737			nsrdataset, NULL, NULL, 0, fctx->options, 0, NULL, task,
7738			resume_dslookup, fctx, &fctx->nsrrset, NULL,
7739			&fctx->nsfetch);
7740		/*
7741		 * fevent->rdataset (a.k.a. fctx->nsrrset) must not be
7742		 * accessed below this point to prevent races with
7743		 * another thread concurrently processing the fetch.
7744		 */
7745		if (result != ISC_R_SUCCESS) {
7746			if (result == DNS_R_DUPLICATE) {
7747				result = DNS_R_SERVFAIL;
7748			}
7749			fctx_done(fctx, result, __LINE__);
7750		} else {
7751			fctx_increference(fctx);
7752		}
7753	}
7754
7755cleanup:
7756	INSIST(event == NULL);
7757	INSIST(fevent == NULL);
7758	if (dns_rdataset_isassociated(&nameservers)) {
7759		dns_rdataset_disassociate(&nameservers);
7760	}
7761	LOCK(&res->buckets[bucketnum].lock);
7762	bucket_empty = fctx_decreference(fctx);
7763	UNLOCK(&res->buckets[bucketnum].lock);
7764	if (bucket_empty) {
7765		empty_bucket(res);
7766	}
7767}
7768
7769static void
7770checknamessection(dns_message_t *message, dns_section_t section) {
7771	isc_result_t result;
7772	dns_name_t *name;
7773	dns_rdata_t rdata = DNS_RDATA_INIT;
7774	dns_rdataset_t *rdataset;
7775
7776	for (result = dns_message_firstname(message, section);
7777	     result == ISC_R_SUCCESS;
7778	     result = dns_message_nextname(message, section))
7779	{
7780		name = NULL;
7781		dns_message_currentname(message, section, &name);
7782		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
7783		     rdataset = ISC_LIST_NEXT(rdataset, link))
7784		{
7785			for (result = dns_rdataset_first(rdataset);
7786			     result == ISC_R_SUCCESS;
7787			     result = dns_rdataset_next(rdataset))
7788			{
7789				dns_rdataset_current(rdataset, &rdata);
7790				if (!dns_rdata_checkowner(name, rdata.rdclass,
7791							  rdata.type, false) ||
7792				    !dns_rdata_checknames(&rdata, name, NULL))
7793				{
7794					rdataset->attributes |=
7795						DNS_RDATASETATTR_CHECKNAMES;
7796				}
7797				dns_rdata_reset(&rdata);
7798			}
7799		}
7800	}
7801}
7802
7803static void
7804checknames(dns_message_t *message) {
7805	checknamessection(message, DNS_SECTION_ANSWER);
7806	checknamessection(message, DNS_SECTION_AUTHORITY);
7807	checknamessection(message, DNS_SECTION_ADDITIONAL);
7808}
7809
7810/*
7811 * Log server NSID at log level 'level'
7812 */
7813static void
7814log_nsid(isc_buffer_t *opt, size_t nsid_len, resquery_t *query, int level,
7815	 isc_mem_t *mctx) {
7816	static const char hex[17] = "0123456789abcdef";
7817	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
7818	size_t buflen;
7819	unsigned char *p, *nsid;
7820	unsigned char *buf = NULL, *pbuf = NULL;
7821
7822	REQUIRE(nsid_len <= UINT16_MAX);
7823
7824	/* Allocate buffer for storing hex version of the NSID */
7825	buflen = nsid_len * 2 + 1;
7826	buf = isc_mem_get(mctx, buflen);
7827	pbuf = isc_mem_get(mctx, nsid_len + 1);
7828
7829	/* Convert to hex */
7830	p = buf;
7831	nsid = isc_buffer_current(opt);
7832	for (size_t i = 0; i < nsid_len; i++) {
7833		*p++ = hex[(nsid[i] >> 4) & 0xf];
7834		*p++ = hex[nsid[i] & 0xf];
7835	}
7836	*p = '\0';
7837
7838	/* Make printable version */
7839	p = pbuf;
7840	for (size_t i = 0; i < nsid_len; i++) {
7841		*p++ = isprint(nsid[i]) ? nsid[i] : '.';
7842	}
7843	*p = '\0';
7844
7845	isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
7846			    sizeof(addrbuf));
7847	isc_log_write(dns_lctx, DNS_LOGCATEGORY_NSID, DNS_LOGMODULE_RESOLVER,
7848		      level, "received NSID %s (\"%s\") from %s", buf, pbuf,
7849		      addrbuf);
7850
7851	isc_mem_put(mctx, pbuf, nsid_len + 1);
7852	isc_mem_put(mctx, buf, buflen);
7853}
7854
7855static bool
7856iscname(dns_message_t *message, dns_name_t *name) {
7857	isc_result_t result;
7858
7859	result = dns_message_findname(message, DNS_SECTION_ANSWER, name,
7860				      dns_rdatatype_cname, 0, NULL, NULL);
7861	return (result == ISC_R_SUCCESS ? true : false);
7862}
7863
7864static bool
7865betterreferral(respctx_t *rctx) {
7866	isc_result_t result;
7867	dns_name_t *name;
7868	dns_rdataset_t *rdataset;
7869
7870	for (result = dns_message_firstname(rctx->query->rmessage,
7871					    DNS_SECTION_AUTHORITY);
7872	     result == ISC_R_SUCCESS;
7873	     result = dns_message_nextname(rctx->query->rmessage,
7874					   DNS_SECTION_AUTHORITY))
7875	{
7876		name = NULL;
7877		dns_message_currentname(rctx->query->rmessage,
7878					DNS_SECTION_AUTHORITY, &name);
7879		if (!isstrictsubdomain(name, &rctx->fctx->domain)) {
7880			continue;
7881		}
7882		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
7883		     rdataset = ISC_LIST_NEXT(rdataset, link))
7884		{
7885			if (rdataset->type == dns_rdatatype_ns) {
7886				return (true);
7887			}
7888		}
7889	}
7890	return (false);
7891}
7892
7893/*
7894 * resquery_response():
7895 * Handles responses received in response to iterative queries sent by
7896 * resquery_send(). Sets up a response context (respctx_t).
7897 */
7898static void
7899resquery_response(isc_task_t *task, isc_event_t *event) {
7900	isc_result_t result = ISC_R_SUCCESS;
7901	resquery_t *query = event->ev_arg;
7902	dns_dispatchevent_t *devent = (dns_dispatchevent_t *)event;
7903	fetchctx_t *fctx;
7904	respctx_t rctx;
7905
7906	REQUIRE(VALID_QUERY(query));
7907	fctx = query->fctx;
7908	REQUIRE(VALID_FCTX(fctx));
7909	REQUIRE(event->ev_type == DNS_EVENT_DISPATCH);
7910
7911	QTRACE("response");
7912
7913	if (isc_sockaddr_pf(&query->addrinfo->sockaddr) == PF_INET) {
7914		inc_stats(fctx->res, dns_resstatscounter_responsev4);
7915	} else {
7916		inc_stats(fctx->res, dns_resstatscounter_responsev6);
7917	}
7918
7919	(void)isc_timer_touch(fctx->timer);
7920
7921	rctx_respinit(task, devent, query, fctx, &rctx);
7922
7923	if (atomic_load_acquire(&fctx->res->exiting)) {
7924		result = ISC_R_SHUTTINGDOWN;
7925		FCTXTRACE("resolver shutting down");
7926		rctx_done(&rctx, result);
7927		return;
7928	}
7929
7930	fctx->timeouts = 0;
7931	fctx->timeout = false;
7932	fctx->addrinfo = query->addrinfo;
7933
7934	/*
7935	 * Check whether the dispatcher has failed; if so we're done
7936	 */
7937	result = rctx_dispfail(&rctx);
7938	if (result == ISC_R_COMPLETE) {
7939		return;
7940	}
7941
7942	if (query->tsig != NULL) {
7943		result = dns_message_setquerytsig(query->rmessage, query->tsig);
7944		if (result != ISC_R_SUCCESS) {
7945			FCTXTRACE3("unable to set query tsig", result);
7946			rctx_done(&rctx, result);
7947			return;
7948		}
7949	}
7950
7951	if (query->tsigkey) {
7952		result = dns_message_settsigkey(query->rmessage,
7953						query->tsigkey);
7954		if (result != ISC_R_SUCCESS) {
7955			FCTXTRACE3("unable to set tsig key", result);
7956			rctx_done(&rctx, result);
7957			return;
7958		}
7959	}
7960
7961	dns_message_setclass(query->rmessage, fctx->res->rdclass);
7962
7963	if ((rctx.retryopts & DNS_FETCHOPT_TCP) == 0) {
7964		if ((rctx.retryopts & DNS_FETCHOPT_NOEDNS0) == 0) {
7965			dns_adb_setudpsize(
7966				fctx->adb, query->addrinfo,
7967				isc_buffer_usedlength(&devent->buffer));
7968		} else {
7969			dns_adb_plainresponse(fctx->adb, query->addrinfo);
7970		}
7971	}
7972
7973	/*
7974	 * Parse response message.
7975	 */
7976	result = rctx_parse(&rctx);
7977	if (result == ISC_R_COMPLETE) {
7978		return;
7979	}
7980
7981	/*
7982	 * Log the incoming packet.
7983	 */
7984	rctx_logpacket(&rctx);
7985
7986	if (query->rmessage->rdclass != fctx->res->rdclass) {
7987		rctx.resend = true;
7988		FCTXTRACE("bad class");
7989		rctx_done(&rctx, result);
7990		return;
7991	}
7992
7993	/*
7994	 * Process receive opt record.
7995	 */
7996	rctx.opt = dns_message_getopt(query->rmessage);
7997	if (rctx.opt != NULL) {
7998		rctx_opt(&rctx);
7999	}
8000
8001	if (query->rmessage->cc_bad && (rctx.retryopts & DNS_FETCHOPT_TCP) == 0)
8002	{
8003		/*
8004		 * If the COOKIE is bad, assume it is an attack and
8005		 * keep listening for a good answer.
8006		 */
8007		rctx.nextitem = true;
8008		if (isc_log_wouldlog(dns_lctx, ISC_LOG_INFO)) {
8009			char addrbuf[ISC_SOCKADDR_FORMATSIZE];
8010			isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
8011					    sizeof(addrbuf));
8012			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
8013				      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
8014				      "bad cookie from %s", addrbuf);
8015		}
8016		rctx_done(&rctx, result);
8017		return;
8018	}
8019
8020	/*
8021	 * Is the question the same as the one we asked?
8022	 * NOERROR/NXDOMAIN/YXDOMAIN/REFUSED/SERVFAIL/BADCOOKIE must have
8023	 * the same question.
8024	 * FORMERR/NOTIMP if they have a question section then it must match.
8025	 */
8026	switch (query->rmessage->rcode) {
8027	case dns_rcode_notimp:
8028	case dns_rcode_formerr:
8029		if (query->rmessage->counts[DNS_SECTION_QUESTION] == 0) {
8030			break;
8031		}
8032		FALLTHROUGH;
8033	case dns_rcode_nxrrset: /* Not expected. */
8034	case dns_rcode_badcookie:
8035	case dns_rcode_noerror:
8036	case dns_rcode_nxdomain:
8037	case dns_rcode_yxdomain:
8038	case dns_rcode_refused:
8039	case dns_rcode_servfail:
8040	default:
8041		result = same_question(fctx, query->rmessage);
8042		if (result != ISC_R_SUCCESS) {
8043			FCTXTRACE3("response did not match question", result);
8044			rctx.nextitem = true;
8045			rctx_done(&rctx, result);
8046			return;
8047		}
8048		break;
8049	}
8050
8051	/*
8052	 * If the message is signed, check the signature.  If not, this
8053	 * returns success anyway.
8054	 */
8055	result = dns_message_checksig(query->rmessage, fctx->res->view);
8056	if (result != ISC_R_SUCCESS) {
8057		FCTXTRACE3("signature check failed", result);
8058		if (result == DNS_R_UNEXPECTEDTSIG ||
8059		    result == DNS_R_EXPECTEDTSIG)
8060		{
8061			rctx.nextitem = true;
8062		}
8063		rctx_done(&rctx, result);
8064		return;
8065	}
8066
8067	/*
8068	 * The dispatcher should ensure we only get responses with QR set.
8069	 */
8070	INSIST((query->rmessage->flags & DNS_MESSAGEFLAG_QR) != 0);
8071	/*
8072	 * INSIST() that the message comes from the place we sent it to,
8073	 * since the dispatch code should ensure this.
8074	 *
8075	 * INSIST() that the message id is correct (this should also be
8076	 * ensured by the dispatch code).
8077	 */
8078
8079	/*
8080	 * If we have had a server cookie and don't get one retry over TCP.
8081	 * This may be a misconfigured anycast server or an attempt to send
8082	 * a spoofed response.  Skip if we have a valid tsig.
8083	 */
8084	if (dns_message_gettsig(query->rmessage, NULL) == NULL &&
8085	    !query->rmessage->cc_ok && !query->rmessage->cc_bad &&
8086	    (rctx.retryopts & DNS_FETCHOPT_TCP) == 0)
8087	{
8088		unsigned char cookie[COOKIE_BUFFER_SIZE];
8089		if (dns_adb_getcookie(fctx->adb, query->addrinfo, cookie,
8090				      sizeof(cookie)) > CLIENT_COOKIE_SIZE)
8091		{
8092			if (isc_log_wouldlog(dns_lctx, ISC_LOG_INFO)) {
8093				char addrbuf[ISC_SOCKADDR_FORMATSIZE];
8094				isc_sockaddr_format(&query->addrinfo->sockaddr,
8095						    addrbuf, sizeof(addrbuf));
8096				isc_log_write(
8097					dns_lctx, DNS_LOGCATEGORY_RESOLVER,
8098					DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
8099					"missing expected cookie from %s",
8100					addrbuf);
8101			}
8102			rctx.retryopts |= DNS_FETCHOPT_TCP;
8103			rctx.resend = true;
8104			rctx_done(&rctx, result);
8105			return;
8106		}
8107		/*
8108		 * XXXMPA When support for DNS COOKIE becomes ubiquitous, fall
8109		 * back to TCP for all non-COOKIE responses.
8110		 */
8111	}
8112
8113	rctx_edns(&rctx);
8114
8115	/*
8116	 * Deal with truncated responses by retrying using TCP.
8117	 */
8118	if ((query->rmessage->flags & DNS_MESSAGEFLAG_TC) != 0) {
8119		rctx.truncated = true;
8120	}
8121
8122	if (rctx.truncated) {
8123		inc_stats(fctx->res, dns_resstatscounter_truncated);
8124		if ((rctx.retryopts & DNS_FETCHOPT_TCP) != 0) {
8125			rctx.broken_server = DNS_R_TRUNCATEDTCP;
8126			rctx.next_server = true;
8127		} else {
8128			rctx.retryopts |= DNS_FETCHOPT_TCP;
8129			rctx.resend = true;
8130		}
8131		FCTXTRACE3("message truncated", result);
8132		rctx_done(&rctx, result);
8133		return;
8134	}
8135
8136	/*
8137	 * Is it a query response?
8138	 */
8139	if (query->rmessage->opcode != dns_opcode_query) {
8140		rctx.broken_server = DNS_R_UNEXPECTEDOPCODE;
8141		rctx.next_server = true;
8142		FCTXTRACE("invalid message opcode");
8143		rctx_done(&rctx, result);
8144		return;
8145	}
8146
8147	/*
8148	 * Update statistics about erroneous responses.
8149	 */
8150	switch (query->rmessage->rcode) {
8151	case dns_rcode_noerror:
8152		/* no error */
8153		break;
8154	case dns_rcode_nxdomain:
8155		inc_stats(fctx->res, dns_resstatscounter_nxdomain);
8156		break;
8157	case dns_rcode_servfail:
8158		inc_stats(fctx->res, dns_resstatscounter_servfail);
8159		break;
8160	case dns_rcode_formerr:
8161		inc_stats(fctx->res, dns_resstatscounter_formerr);
8162		break;
8163	case dns_rcode_refused:
8164		inc_stats(fctx->res, dns_resstatscounter_refused);
8165		break;
8166	case dns_rcode_badvers:
8167		inc_stats(fctx->res, dns_resstatscounter_badvers);
8168		break;
8169	case dns_rcode_badcookie:
8170		inc_stats(fctx->res, dns_resstatscounter_badcookie);
8171		break;
8172	default:
8173		inc_stats(fctx->res, dns_resstatscounter_othererror);
8174		break;
8175	}
8176
8177	/*
8178	 * Bad server?
8179	 */
8180	result = rctx_badserver(&rctx, result);
8181	if (result == ISC_R_COMPLETE) {
8182		return;
8183	}
8184
8185	/*
8186	 * Lame server?
8187	 */
8188	result = rctx_lameserver(&rctx);
8189	if (result == ISC_R_COMPLETE) {
8190		return;
8191	}
8192
8193	/*
8194	 * Handle delegation-only zones like NET or COM.
8195	 */
8196	rctx_delonly_zone(&rctx);
8197
8198	/*
8199	 * Optionally call dns_rdata_checkowner() and dns_rdata_checknames()
8200	 * to validate the names in the response message.
8201	 */
8202	if ((fctx->res->options & DNS_RESOLVER_CHECKNAMES) != 0) {
8203		checknames(query->rmessage);
8204	}
8205
8206	/*
8207	 * Clear cache bits.
8208	 */
8209	FCTX_ATTR_CLR(fctx, (FCTX_ATTR_WANTNCACHE | FCTX_ATTR_WANTCACHE));
8210
8211	/*
8212	 * Did we get any answers?
8213	 */
8214	if (query->rmessage->counts[DNS_SECTION_ANSWER] > 0 &&
8215	    (query->rmessage->rcode == dns_rcode_noerror ||
8216	     query->rmessage->rcode == dns_rcode_yxdomain ||
8217	     query->rmessage->rcode == dns_rcode_nxdomain))
8218	{
8219		result = rctx_answer(&rctx);
8220		if (result == ISC_R_COMPLETE) {
8221			return;
8222		}
8223	} else if (query->rmessage->counts[DNS_SECTION_AUTHORITY] > 0 ||
8224		   query->rmessage->rcode == dns_rcode_noerror ||
8225		   query->rmessage->rcode == dns_rcode_nxdomain)
8226	{
8227		/*
8228		 * This might be an NXDOMAIN, NXRRSET, or referral.
8229		 * Call rctx_answer_none() to determine which it is.
8230		 */
8231		result = rctx_answer_none(&rctx);
8232		switch (result) {
8233		case ISC_R_SUCCESS:
8234		case DNS_R_CHASEDSSERVERS:
8235			break;
8236		case DNS_R_DELEGATION:
8237			/* With NOFOLLOW we want to pass the result code */
8238			if ((fctx->options & DNS_FETCHOPT_NOFOLLOW) == 0) {
8239				result = ISC_R_SUCCESS;
8240			}
8241			break;
8242		default:
8243			/*
8244			 * Something has gone wrong.
8245			 */
8246			if (result == DNS_R_FORMERR) {
8247				rctx.next_server = true;
8248			}
8249			FCTXTRACE3("rctx_answer_none", result);
8250			rctx_done(&rctx, result);
8251			return;
8252		}
8253	} else {
8254		/*
8255		 * The server is insane.
8256		 */
8257		/* XXXRTH Log */
8258		rctx.broken_server = DNS_R_UNEXPECTEDRCODE;
8259		rctx.next_server = true;
8260		FCTXTRACE("broken server: unexpected rcode");
8261		rctx_done(&rctx, result);
8262		return;
8263	}
8264
8265	/*
8266	 * Follow additional section data chains.
8267	 */
8268	rctx_additional(&rctx);
8269
8270	/*
8271	 * Cache the cacheable parts of the message.  This may also cause
8272	 * work to be queued to the DNSSEC validator.
8273	 */
8274	if (WANTCACHE(fctx)) {
8275		isc_result_t tresult;
8276		tresult = cache_message(fctx, query->rmessage, query->addrinfo,
8277					rctx.now);
8278		if (tresult != ISC_R_SUCCESS) {
8279			FCTXTRACE3("cache_message complete", tresult);
8280			rctx_done(&rctx, tresult);
8281			return;
8282		}
8283	}
8284
8285	/*
8286	 * Negative caching
8287	 */
8288	rctx_ncache(&rctx);
8289
8290	rctx_done(&rctx, result);
8291}
8292
8293/*
8294 * rctx_respinit():
8295 * Initialize the response context structure 'rctx' to all zeroes, then set
8296 * the task, event, query and fctx information from resquery_response().
8297 */
8298static void
8299rctx_respinit(isc_task_t *task, dns_dispatchevent_t *devent, resquery_t *query,
8300	      fetchctx_t *fctx, respctx_t *rctx) {
8301	memset(rctx, 0, sizeof(*rctx));
8302
8303	rctx->task = task;
8304	rctx->devent = devent;
8305	rctx->query = query;
8306	rctx->fctx = fctx;
8307	rctx->broken_type = badns_response;
8308	rctx->retryopts = query->options;
8309
8310	/*
8311	 * XXXRTH  We should really get the current time just once.  We
8312	 *		need a routine to convert from an isc_time_t to an
8313	 *		isc_stdtime_t.
8314	 */
8315	TIME_NOW(&rctx->tnow);
8316	rctx->finish = &rctx->tnow;
8317	isc_stdtime_get(&rctx->now);
8318}
8319
8320/*
8321 * rctx_answer_init():
8322 * Clear and reinitialize those portions of 'rctx' that will be needed
8323 * when scanning the answer section of the response message. This can be
8324 * called more than once if scanning needs to be restarted (though currently
8325 * there are no cases in which this occurs).
8326 */
8327static void
8328rctx_answer_init(respctx_t *rctx) {
8329	fetchctx_t *fctx = rctx->fctx;
8330
8331	rctx->aa = ((rctx->query->rmessage->flags & DNS_MESSAGEFLAG_AA) != 0);
8332	if (rctx->aa) {
8333		rctx->trust = dns_trust_authanswer;
8334	} else {
8335		rctx->trust = dns_trust_answer;
8336	}
8337
8338	/*
8339	 * There can be multiple RRSIG and SIG records at a name so
8340	 * we treat these types as a subset of ANY.
8341	 */
8342	rctx->type = fctx->type;
8343	if (rctx->type == dns_rdatatype_rrsig ||
8344	    rctx->type == dns_rdatatype_sig)
8345	{
8346		rctx->type = dns_rdatatype_any;
8347	}
8348
8349	/*
8350	 * Bigger than any valid DNAME label count.
8351	 */
8352	rctx->dname_labels = dns_name_countlabels(&fctx->name);
8353	rctx->domain_labels = dns_name_countlabels(&fctx->domain);
8354
8355	rctx->found_type = dns_rdatatype_none;
8356
8357	rctx->aname = NULL;
8358	rctx->ardataset = NULL;
8359
8360	rctx->cname = NULL;
8361	rctx->crdataset = NULL;
8362
8363	rctx->dname = NULL;
8364	rctx->drdataset = NULL;
8365
8366	rctx->ns_name = NULL;
8367	rctx->ns_rdataset = NULL;
8368
8369	rctx->soa_name = NULL;
8370	rctx->ds_name = NULL;
8371	rctx->found_name = NULL;
8372}
8373
8374/*
8375 * rctx_dispfail():
8376 * Handle the case where the dispatcher failed
8377 */
8378static isc_result_t
8379rctx_dispfail(respctx_t *rctx) {
8380	dns_dispatchevent_t *devent = rctx->devent;
8381	fetchctx_t *fctx = rctx->fctx;
8382	resquery_t *query = rctx->query;
8383
8384	if (devent->result == ISC_R_SUCCESS) {
8385		return (ISC_R_SUCCESS);
8386	}
8387
8388	if (devent->result == ISC_R_EOF &&
8389	    (rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0)
8390	{
8391		/*
8392		 * The problem might be that they don't understand EDNS0.
8393		 * Turn it off and try again.
8394		 */
8395		rctx->retryopts |= DNS_FETCHOPT_NOEDNS0;
8396		rctx->resend = true;
8397		add_bad_edns(fctx, &query->addrinfo->sockaddr);
8398	} else {
8399		/*
8400		 * There's no hope for this response.
8401		 */
8402		rctx->next_server = true;
8403
8404		/*
8405		 * If this is a network error on an exclusive query
8406		 * socket, mark the server as bad so that we won't try
8407		 * it for this fetch again.  Also adjust finish and
8408		 * no_response so that we penalize this address in SRTT
8409		 * adjustment later.
8410		 */
8411		if (query->exclusivesocket &&
8412		    (devent->result == ISC_R_HOSTUNREACH ||
8413		     devent->result == ISC_R_NETUNREACH ||
8414		     devent->result == ISC_R_CONNREFUSED ||
8415		     devent->result == ISC_R_CANCELED))
8416		{
8417			rctx->broken_server = devent->result;
8418			rctx->broken_type = badns_unreachable;
8419			rctx->finish = NULL;
8420			rctx->no_response = true;
8421		}
8422	}
8423	FCTXTRACE3("dispatcher failure", devent->result);
8424	rctx_done(rctx, ISC_R_SUCCESS);
8425	return (ISC_R_COMPLETE);
8426}
8427
8428/*
8429 * rctx_parse():
8430 * Parse the response message.
8431 */
8432static isc_result_t
8433rctx_parse(respctx_t *rctx) {
8434	isc_result_t result;
8435	fetchctx_t *fctx = rctx->fctx;
8436	resquery_t *query = rctx->query;
8437
8438	result = dns_message_parse(query->rmessage, &rctx->devent->buffer, 0);
8439	if (result == ISC_R_SUCCESS) {
8440		return (ISC_R_SUCCESS);
8441	}
8442
8443	FCTXTRACE3("message failed to parse", result);
8444
8445	switch (result) {
8446	case ISC_R_UNEXPECTEDEND:
8447		if (query->rmessage->question_ok &&
8448		    (query->rmessage->flags & DNS_MESSAGEFLAG_TC) != 0 &&
8449		    (rctx->retryopts & DNS_FETCHOPT_TCP) == 0)
8450		{
8451			/*
8452			 * We defer retrying via TCP for a bit so we can
8453			 * check out this message further.
8454			 */
8455			rctx->truncated = true;
8456			return (ISC_R_SUCCESS);
8457		}
8458
8459		/*
8460		 * Either the message ended prematurely,
8461		 * and/or wasn't marked as being truncated,
8462		 * and/or this is a response to a query we
8463		 * sent over TCP.  In all of these cases,
8464		 * something is wrong with the remote
8465		 * server and we don't want to retry using
8466		 * TCP.
8467		 */
8468		if ((rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0) {
8469			/*
8470			 * The problem might be that they
8471			 * don't understand EDNS0.  Turn it
8472			 * off and try again.
8473			 */
8474			rctx->retryopts |= DNS_FETCHOPT_NOEDNS0;
8475			rctx->resend = true;
8476			add_bad_edns(fctx, &query->addrinfo->sockaddr);
8477			inc_stats(fctx->res, dns_resstatscounter_edns0fail);
8478		} else {
8479			rctx->broken_server = result;
8480			rctx->next_server = true;
8481		}
8482
8483		rctx_done(rctx, result);
8484		break;
8485	case DNS_R_FORMERR:
8486		if ((rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0) {
8487			/*
8488			 * The problem might be that they
8489			 * don't understand EDNS0.  Turn it
8490			 * off and try again.
8491			 */
8492			rctx->retryopts |= DNS_FETCHOPT_NOEDNS0;
8493			rctx->resend = true;
8494			add_bad_edns(fctx, &query->addrinfo->sockaddr);
8495			inc_stats(fctx->res, dns_resstatscounter_edns0fail);
8496		} else {
8497			rctx->broken_server = DNS_R_UNEXPECTEDRCODE;
8498			rctx->next_server = true;
8499		}
8500
8501		rctx_done(rctx, result);
8502		break;
8503	default:
8504		/*
8505		 * Something bad has happened.
8506		 */
8507		rctx_done(rctx, result);
8508		break;
8509	}
8510
8511	return (ISC_R_COMPLETE);
8512}
8513
8514/*
8515 * rctx_opt():
8516 * Process the OPT record in the response.
8517 */
8518static void
8519rctx_opt(respctx_t *rctx) {
8520	resquery_t *query = rctx->query;
8521	fetchctx_t *fctx = rctx->fctx;
8522	dns_rdata_t rdata;
8523	isc_buffer_t optbuf;
8524	isc_result_t result;
8525	uint16_t optcode;
8526	uint16_t optlen;
8527	unsigned char *optvalue;
8528	dns_adbaddrinfo_t *addrinfo;
8529	unsigned char cookie[CLIENT_COOKIE_SIZE];
8530	bool seen_cookie = false;
8531	bool seen_nsid = false;
8532
8533	result = dns_rdataset_first(rctx->opt);
8534	if (result == ISC_R_SUCCESS) {
8535		dns_rdata_init(&rdata);
8536		dns_rdataset_current(rctx->opt, &rdata);
8537		isc_buffer_init(&optbuf, rdata.data, rdata.length);
8538		isc_buffer_add(&optbuf, rdata.length);
8539		while (isc_buffer_remaininglength(&optbuf) >= 4) {
8540			optcode = isc_buffer_getuint16(&optbuf);
8541			optlen = isc_buffer_getuint16(&optbuf);
8542			INSIST(optlen <= isc_buffer_remaininglength(&optbuf));
8543			switch (optcode) {
8544			case DNS_OPT_NSID:
8545				if (!seen_nsid && (query->options &
8546						   DNS_FETCHOPT_WANTNSID) != 0)
8547				{
8548					log_nsid(&optbuf, optlen, query,
8549						 ISC_LOG_INFO, fctx->res->mctx);
8550				}
8551				isc_buffer_forward(&optbuf, optlen);
8552				seen_nsid = true;
8553				break;
8554			case DNS_OPT_COOKIE:
8555				/*
8556				 * Only process the first cookie option.
8557				 */
8558				if (seen_cookie) {
8559					isc_buffer_forward(&optbuf, optlen);
8560					break;
8561				}
8562				optvalue = isc_buffer_current(&optbuf);
8563				compute_cc(query, cookie, sizeof(cookie));
8564				INSIST(query->rmessage->cc_bad == 0 &&
8565				       query->rmessage->cc_ok == 0);
8566				if (optlen >= CLIENT_COOKIE_SIZE &&
8567				    memcmp(cookie, optvalue,
8568					   CLIENT_COOKIE_SIZE) == 0)
8569				{
8570					query->rmessage->cc_ok = 1;
8571					inc_stats(fctx->res,
8572						  dns_resstatscounter_cookieok);
8573					addrinfo = query->addrinfo;
8574					dns_adb_setcookie(fctx->adb, addrinfo,
8575							  optvalue, optlen);
8576				} else {
8577					query->rmessage->cc_bad = 1;
8578				}
8579				isc_buffer_forward(&optbuf, optlen);
8580				inc_stats(fctx->res,
8581					  dns_resstatscounter_cookiein);
8582				seen_cookie = true;
8583				break;
8584			default:
8585				isc_buffer_forward(&optbuf, optlen);
8586				break;
8587			}
8588		}
8589		INSIST(isc_buffer_remaininglength(&optbuf) == 0U);
8590	}
8591}
8592
8593/*
8594 * rctx_edns():
8595 * Determine whether the remote server is using EDNS correctly or
8596 * incorrectly and record that information if needed.
8597 */
8598static void
8599rctx_edns(respctx_t *rctx) {
8600	resquery_t *query = rctx->query;
8601	fetchctx_t *fctx = rctx->fctx;
8602
8603	/*
8604	 * We have an affirmative response to the query and we have
8605	 * previously got a response from this server which indicated
8606	 * EDNS may not be supported so we can now cache the lack of
8607	 * EDNS support.
8608	 */
8609	if (rctx->opt == NULL && !EDNSOK(query->addrinfo) &&
8610	    (query->rmessage->rcode == dns_rcode_noerror ||
8611	     query->rmessage->rcode == dns_rcode_nxdomain ||
8612	     query->rmessage->rcode == dns_rcode_refused ||
8613	     query->rmessage->rcode == dns_rcode_yxdomain) &&
8614	    bad_edns(fctx, &query->addrinfo->sockaddr))
8615	{
8616		dns_message_logpacket(
8617			query->rmessage, "received packet (bad edns) from",
8618			&query->addrinfo->sockaddr, DNS_LOGCATEGORY_RESOLVER,
8619			DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
8620			fctx->res->mctx);
8621		dns_adb_changeflags(fctx->adb, query->addrinfo,
8622				    DNS_FETCHOPT_NOEDNS0, DNS_FETCHOPT_NOEDNS0);
8623	} else if (rctx->opt == NULL &&
8624		   (query->rmessage->flags & DNS_MESSAGEFLAG_TC) == 0 &&
8625		   !EDNSOK(query->addrinfo) &&
8626		   (query->rmessage->rcode == dns_rcode_noerror ||
8627		    query->rmessage->rcode == dns_rcode_nxdomain) &&
8628		   (rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0)
8629	{
8630		/*
8631		 * We didn't get a OPT record in response to a EDNS query.
8632		 *
8633		 * Old versions of named incorrectly drop the OPT record
8634		 * when there is a signed, truncated response so we check
8635		 * that TC is not set.
8636		 *
8637		 * Record that the server is not talking EDNS.  While this
8638		 * should be safe to do for any rcode we limit it to NOERROR
8639		 * and NXDOMAIN.
8640		 */
8641		dns_message_logpacket(
8642			query->rmessage, "received packet (no opt) from",
8643			&query->addrinfo->sockaddr, DNS_LOGCATEGORY_RESOLVER,
8644			DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
8645			fctx->res->mctx);
8646		dns_adb_changeflags(fctx->adb, query->addrinfo,
8647				    DNS_FETCHOPT_NOEDNS0, DNS_FETCHOPT_NOEDNS0);
8648	}
8649
8650	/*
8651	 * If we get a non error EDNS response record the fact so we
8652	 * won't fallback to plain DNS in the future for this server.
8653	 */
8654	if (rctx->opt != NULL && !EDNSOK(query->addrinfo) &&
8655	    (rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0 &&
8656	    (query->rmessage->rcode == dns_rcode_noerror ||
8657	     query->rmessage->rcode == dns_rcode_nxdomain ||
8658	     query->rmessage->rcode == dns_rcode_refused ||
8659	     query->rmessage->rcode == dns_rcode_yxdomain))
8660	{
8661		dns_adb_changeflags(fctx->adb, query->addrinfo,
8662				    FCTX_ADDRINFO_EDNSOK, FCTX_ADDRINFO_EDNSOK);
8663	}
8664}
8665
8666/*
8667 * rctx_answer():
8668 * We might have answers, or we might have a malformed delegation with
8669 * records in the answer section. Call rctx_answer_positive() or
8670 * rctx_answer_none() as appropriate.
8671 */
8672static isc_result_t
8673rctx_answer(respctx_t *rctx) {
8674	isc_result_t result;
8675	fetchctx_t *fctx = rctx->fctx;
8676	resquery_t *query = rctx->query;
8677
8678	if ((query->rmessage->flags & DNS_MESSAGEFLAG_AA) != 0 ||
8679	    ISFORWARDER(query->addrinfo))
8680	{
8681		result = rctx_answer_positive(rctx);
8682		if (result != ISC_R_SUCCESS) {
8683			FCTXTRACE3("rctx_answer_positive (AA/fwd)", result);
8684		}
8685	} else if (iscname(query->rmessage, &fctx->name) &&
8686		   fctx->type != dns_rdatatype_any &&
8687		   fctx->type != dns_rdatatype_cname)
8688	{
8689		/*
8690		 * A BIND8 server could return a non-authoritative
8691		 * answer when a CNAME is followed.  We should treat
8692		 * it as a valid answer.
8693		 */
8694		result = rctx_answer_positive(rctx);
8695		if (result != ISC_R_SUCCESS) {
8696			FCTXTRACE3("rctx_answer_positive (!ANY/!CNAME)",
8697				   result);
8698		}
8699	} else if (fctx->type != dns_rdatatype_ns && !betterreferral(rctx)) {
8700		result = rctx_answer_positive(rctx);
8701		if (result != ISC_R_SUCCESS) {
8702			FCTXTRACE3("rctx_answer_positive (!NS)", result);
8703		}
8704	} else {
8705		/*
8706		 * This may be a delegation. First let's check for
8707		 */
8708
8709		if (fctx->type == dns_rdatatype_ns) {
8710			/*
8711			 * A BIND 8 server could incorrectly return a
8712			 * non-authoritative answer to an NS query
8713			 * instead of a referral. Since this answer
8714			 * lacks the SIGs necessary to do DNSSEC
8715			 * validation, we must invoke the following
8716			 * special kludge to treat it as a referral.
8717			 */
8718			rctx->ns_in_answer = true;
8719			result = rctx_answer_none(rctx);
8720			if (result != ISC_R_SUCCESS) {
8721				FCTXTRACE3("rctx_answer_none (NS)", result);
8722			}
8723		} else {
8724			/*
8725			 * Some other servers may still somehow include
8726			 * an answer when it should return a referral
8727			 * with an empty answer.  Check to see if we can
8728			 * treat this as a referral by ignoring the
8729			 * answer.  Further more, there may be an
8730			 * implementation that moves A/AAAA glue records
8731			 * to the answer section for that type of
8732			 * delegation when the query is for that glue
8733			 * record. glue_in_answer will handle
8734			 * such a corner case.
8735			 */
8736			rctx->glue_in_answer = true;
8737			result = rctx_answer_none(rctx);
8738			if (result != ISC_R_SUCCESS) {
8739				FCTXTRACE3("rctx_answer_none", result);
8740			}
8741		}
8742
8743		if (result == DNS_R_DELEGATION) {
8744			result = ISC_R_SUCCESS;
8745		} else {
8746			/*
8747			 * At this point, AA is not set, the response
8748			 * is not a referral, and the server is not a
8749			 * forwarder.  It is technically lame and it's
8750			 * easier to treat it as such than to figure out
8751			 * some more elaborate course of action.
8752			 */
8753			rctx->broken_server = DNS_R_LAME;
8754			rctx->next_server = true;
8755			rctx_done(rctx, result);
8756			return (ISC_R_COMPLETE);
8757		}
8758	}
8759
8760	if (result != ISC_R_SUCCESS) {
8761		if (result == DNS_R_FORMERR) {
8762			rctx->next_server = true;
8763		}
8764		rctx_done(rctx, result);
8765		return (ISC_R_COMPLETE);
8766	}
8767
8768	return (ISC_R_SUCCESS);
8769}
8770
8771/*
8772 * rctx_answer_positive():
8773 * Handles positive responses. Depending which type of answer this is
8774 * (matching QNAME/QTYPE, CNAME, DNAME, ANY) calls the proper routine
8775 * to handle it (rctx_answer_match(), rctx_answer_cname(),
8776 * rctx_answer_dname(), rctx_answer_any()).
8777 */
8778static isc_result_t
8779rctx_answer_positive(respctx_t *rctx) {
8780	isc_result_t result;
8781	fetchctx_t *fctx = rctx->fctx;
8782
8783	FCTXTRACE("rctx_answer");
8784
8785	rctx_answer_init(rctx);
8786	rctx_answer_scan(rctx);
8787
8788	/*
8789	 * Determine which type of positive answer this is:
8790	 * type ANY, CNAME, DNAME, or an answer matching QNAME/QTYPE.
8791	 * Call the appropriate routine to handle the answer type.
8792	 */
8793	if (rctx->aname != NULL && rctx->type == dns_rdatatype_any) {
8794		result = rctx_answer_any(rctx);
8795		if (result == ISC_R_COMPLETE) {
8796			return (rctx->result);
8797		}
8798	} else if (rctx->aname != NULL) {
8799		result = rctx_answer_match(rctx);
8800		if (result == ISC_R_COMPLETE) {
8801			return (rctx->result);
8802		}
8803	} else if (rctx->cname != NULL) {
8804		result = rctx_answer_cname(rctx);
8805		if (result == ISC_R_COMPLETE) {
8806			return (rctx->result);
8807		}
8808	} else if (rctx->dname != NULL) {
8809		result = rctx_answer_dname(rctx);
8810		if (result == ISC_R_COMPLETE) {
8811			return (rctx->result);
8812		}
8813	} else {
8814		log_formerr(fctx, "reply has no answer");
8815		return (DNS_R_FORMERR);
8816	}
8817
8818	/*
8819	 * This response is now potentially cacheable.
8820	 */
8821	FCTX_ATTR_SET(fctx, FCTX_ATTR_WANTCACHE);
8822
8823	/*
8824	 * Did chaining end before we got the final answer?
8825	 */
8826	if (rctx->chaining) {
8827		return (ISC_R_SUCCESS);
8828	}
8829
8830	/*
8831	 * We didn't end with an incomplete chain, so the rcode should be
8832	 * "no error".
8833	 */
8834	if (rctx->query->rmessage->rcode != dns_rcode_noerror) {
8835		log_formerr(fctx, "CNAME/DNAME chain complete, but RCODE "
8836				  "indicates error");
8837		return (DNS_R_FORMERR);
8838	}
8839
8840	/*
8841	 * Cache records in the authority section, if
8842	 * there are any suitable for caching.
8843	 */
8844	rctx_authority_positive(rctx);
8845
8846	log_ns_ttl(fctx, "rctx_answer");
8847
8848	if (rctx->ns_rdataset != NULL &&
8849	    dns_name_equal(&fctx->domain, rctx->ns_name) &&
8850	    !dns_name_equal(rctx->ns_name, dns_rootname))
8851	{
8852		trim_ns_ttl(fctx, rctx->ns_name, rctx->ns_rdataset);
8853	}
8854
8855	return (ISC_R_SUCCESS);
8856}
8857
8858/*
8859 * rctx_answer_scan():
8860 * Perform a single pass over the answer section of a response, looking
8861 * for an answer that matches QNAME/QTYPE, or a CNAME matching QNAME, or a
8862 * covering DNAME. If more than one rdataset is found matching these
8863 * criteria, then only one is kept. Order of preference is 1) the
8864 * shortest DNAME, 2) the first matching answer, or 3) the first CNAME.
8865 */
8866static void
8867rctx_answer_scan(respctx_t *rctx) {
8868	isc_result_t result;
8869	fetchctx_t *fctx = rctx->fctx;
8870	dns_rdataset_t *rdataset = NULL;
8871
8872	for (result = dns_message_firstname(rctx->query->rmessage,
8873					    DNS_SECTION_ANSWER);
8874	     result == ISC_R_SUCCESS;
8875	     result = dns_message_nextname(rctx->query->rmessage,
8876					   DNS_SECTION_ANSWER))
8877	{
8878		int order;
8879		unsigned int nlabels;
8880		dns_namereln_t namereln;
8881		dns_name_t *name = NULL;
8882
8883		dns_message_currentname(rctx->query->rmessage,
8884					DNS_SECTION_ANSWER, &name);
8885		namereln = dns_name_fullcompare(&fctx->name, name, &order,
8886						&nlabels);
8887		switch (namereln) {
8888		case dns_namereln_equal:
8889			for (rdataset = ISC_LIST_HEAD(name->list);
8890			     rdataset != NULL;
8891			     rdataset = ISC_LIST_NEXT(rdataset, link))
8892			{
8893				if (rdataset->type == rctx->type ||
8894				    rctx->type == dns_rdatatype_any)
8895				{
8896					rctx->aname = name;
8897					if (rctx->type != dns_rdatatype_any) {
8898						rctx->ardataset = rdataset;
8899					}
8900					break;
8901				}
8902				if (rdataset->type == dns_rdatatype_cname) {
8903					rctx->cname = name;
8904					rctx->crdataset = rdataset;
8905					break;
8906				}
8907			}
8908			break;
8909
8910		case dns_namereln_subdomain:
8911			/*
8912			 * Don't accept DNAME from parent namespace.
8913			 */
8914			if (name_external(name, dns_rdatatype_dname, fctx)) {
8915				continue;
8916			}
8917
8918			/*
8919			 * In-scope DNAME records must have at least
8920			 * as many labels as the domain being queried.
8921			 * They also must be less that qname's labels
8922			 * and any previously found dname.
8923			 */
8924			if (nlabels >= rctx->dname_labels ||
8925			    nlabels < rctx->domain_labels)
8926			{
8927				continue;
8928			}
8929
8930			/*
8931			 * We are looking for the shortest DNAME if there
8932			 * are multiple ones (which there shouldn't be).
8933			 */
8934			for (rdataset = ISC_LIST_HEAD(name->list);
8935			     rdataset != NULL;
8936			     rdataset = ISC_LIST_NEXT(rdataset, link))
8937			{
8938				if (rdataset->type != dns_rdatatype_dname) {
8939					continue;
8940				}
8941				rctx->dname = name;
8942				rctx->drdataset = rdataset;
8943				rctx->dname_labels = nlabels;
8944				break;
8945			}
8946			break;
8947		default:
8948			break;
8949		}
8950	}
8951
8952	/*
8953	 * If a DNAME was found, then any CNAME or other answer matching
8954	 * QNAME that may also have been found must be ignored.  Similarly,
8955	 * if a matching answer was found along with a CNAME, the CNAME
8956	 * must be ignored.
8957	 */
8958	if (rctx->dname != NULL) {
8959		rctx->aname = NULL;
8960		rctx->ardataset = NULL;
8961		rctx->cname = NULL;
8962		rctx->crdataset = NULL;
8963	} else if (rctx->aname != NULL) {
8964		rctx->cname = NULL;
8965		rctx->crdataset = NULL;
8966	}
8967}
8968
8969/*
8970 * rctx_answer_any():
8971 * Handle responses to queries of type ANY. Scan the answer section,
8972 * and as long as each RRset is of a type that is valid in the answer
8973 * section, and the rdata isn't filtered, cache it.
8974 */
8975static isc_result_t
8976rctx_answer_any(respctx_t *rctx) {
8977	dns_rdataset_t *rdataset = NULL;
8978	fetchctx_t *fctx = rctx->fctx;
8979
8980	for (rdataset = ISC_LIST_HEAD(rctx->aname->list); rdataset != NULL;
8981	     rdataset = ISC_LIST_NEXT(rdataset, link))
8982	{
8983		if (!validinanswer(rdataset, fctx)) {
8984			rctx->result = DNS_R_FORMERR;
8985			return (ISC_R_COMPLETE);
8986		}
8987
8988		if ((fctx->type == dns_rdatatype_sig ||
8989		     fctx->type == dns_rdatatype_rrsig) &&
8990		    rdataset->type != fctx->type)
8991		{
8992			continue;
8993		}
8994
8995		if ((rdataset->type == dns_rdatatype_a ||
8996		     rdataset->type == dns_rdatatype_aaaa) &&
8997		    !is_answeraddress_allowed(fctx->res->view, rctx->aname,
8998					      rdataset))
8999		{
9000			rctx->result = DNS_R_SERVFAIL;
9001			return (ISC_R_COMPLETE);
9002		}
9003
9004		if ((rdataset->type == dns_rdatatype_cname ||
9005		     rdataset->type == dns_rdatatype_dname) &&
9006		    !is_answertarget_allowed(fctx, &fctx->name, rctx->aname,
9007					     rdataset, NULL))
9008		{
9009			rctx->result = DNS_R_SERVFAIL;
9010			return (ISC_R_COMPLETE);
9011		}
9012
9013		rctx->aname->attributes |= DNS_NAMEATTR_CACHE;
9014		rctx->aname->attributes |= DNS_NAMEATTR_ANSWER;
9015		rdataset->attributes |= DNS_RDATASETATTR_ANSWER;
9016		rdataset->attributes |= DNS_RDATASETATTR_CACHE;
9017		rdataset->trust = rctx->trust;
9018
9019		(void)dns_rdataset_additionaldata(rdataset, check_related,
9020						  rctx);
9021	}
9022
9023	return (ISC_R_SUCCESS);
9024}
9025
9026/*
9027 * rctx_answer_match():
9028 * Handle responses that match the QNAME/QTYPE of the resolver query.
9029 * If QTYPE is valid in the answer section and the rdata isn't filtered,
9030 * the answer can be cached. If there is additional section data related
9031 * to the answer, it can be cached as well.
9032 */
9033static isc_result_t
9034rctx_answer_match(respctx_t *rctx) {
9035	dns_rdataset_t *sigrdataset = NULL;
9036	fetchctx_t *fctx = rctx->fctx;
9037
9038	if (!validinanswer(rctx->ardataset, fctx)) {
9039		rctx->result = DNS_R_FORMERR;
9040		return (ISC_R_COMPLETE);
9041	}
9042
9043	if ((rctx->ardataset->type == dns_rdatatype_a ||
9044	     rctx->ardataset->type == dns_rdatatype_aaaa) &&
9045	    !is_answeraddress_allowed(fctx->res->view, rctx->aname,
9046				      rctx->ardataset))
9047	{
9048		rctx->result = DNS_R_SERVFAIL;
9049		return (ISC_R_COMPLETE);
9050	}
9051	if ((rctx->ardataset->type == dns_rdatatype_cname ||
9052	     rctx->ardataset->type == dns_rdatatype_dname) &&
9053	    rctx->type != rctx->ardataset->type &&
9054	    rctx->type != dns_rdatatype_any &&
9055	    !is_answertarget_allowed(fctx, &fctx->name, rctx->aname,
9056				     rctx->ardataset, NULL))
9057	{
9058		rctx->result = DNS_R_SERVFAIL;
9059		return (ISC_R_COMPLETE);
9060	}
9061
9062	rctx->aname->attributes |= DNS_NAMEATTR_CACHE;
9063	rctx->aname->attributes |= DNS_NAMEATTR_ANSWER;
9064	rctx->ardataset->attributes |= DNS_RDATASETATTR_ANSWER;
9065	rctx->ardataset->attributes |= DNS_RDATASETATTR_CACHE;
9066	rctx->ardataset->trust = rctx->trust;
9067	(void)dns_rdataset_additionaldata(rctx->ardataset, check_related, rctx);
9068
9069	for (sigrdataset = ISC_LIST_HEAD(rctx->aname->list);
9070	     sigrdataset != NULL;
9071	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
9072	{
9073		if (!validinanswer(sigrdataset, fctx)) {
9074			rctx->result = DNS_R_FORMERR;
9075			return (ISC_R_COMPLETE);
9076		}
9077
9078		if (sigrdataset->type != dns_rdatatype_rrsig ||
9079		    sigrdataset->covers != rctx->type)
9080		{
9081			continue;
9082		}
9083
9084		sigrdataset->attributes |= DNS_RDATASETATTR_ANSWERSIG;
9085		sigrdataset->attributes |= DNS_RDATASETATTR_CACHE;
9086		sigrdataset->trust = rctx->trust;
9087		break;
9088	}
9089
9090	return (ISC_R_SUCCESS);
9091}
9092
9093/*
9094 * rctx_answer_cname():
9095 * Handle answers containing a CNAME. Cache the CNAME, and flag that
9096 * there may be additional chain answers to find.
9097 */
9098static isc_result_t
9099rctx_answer_cname(respctx_t *rctx) {
9100	dns_rdataset_t *sigrdataset = NULL;
9101	fetchctx_t *fctx = rctx->fctx;
9102
9103	if (!validinanswer(rctx->crdataset, fctx)) {
9104		rctx->result = DNS_R_FORMERR;
9105		return (ISC_R_COMPLETE);
9106	}
9107
9108	if (rctx->type == dns_rdatatype_rrsig ||
9109	    rctx->type == dns_rdatatype_key || rctx->type == dns_rdatatype_nsec)
9110	{
9111		char buf[DNS_RDATATYPE_FORMATSIZE];
9112		dns_rdatatype_format(rctx->type, buf, sizeof(buf));
9113		log_formerr(fctx, "CNAME response for %s RR", buf);
9114		rctx->result = DNS_R_FORMERR;
9115		return (ISC_R_COMPLETE);
9116	}
9117
9118	if (!is_answertarget_allowed(fctx, &fctx->name, rctx->cname,
9119				     rctx->crdataset, NULL))
9120	{
9121		rctx->result = DNS_R_SERVFAIL;
9122		return (ISC_R_COMPLETE);
9123	}
9124
9125	rctx->cname->attributes |= DNS_NAMEATTR_CACHE;
9126	rctx->cname->attributes |= DNS_NAMEATTR_ANSWER;
9127	rctx->cname->attributes |= DNS_NAMEATTR_CHAINING;
9128	rctx->crdataset->attributes |= DNS_RDATASETATTR_ANSWER;
9129	rctx->crdataset->attributes |= DNS_RDATASETATTR_CACHE;
9130	rctx->crdataset->attributes |= DNS_RDATASETATTR_CHAINING;
9131	rctx->crdataset->trust = rctx->trust;
9132
9133	for (sigrdataset = ISC_LIST_HEAD(rctx->cname->list);
9134	     sigrdataset != NULL;
9135	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
9136	{
9137		if (!validinanswer(sigrdataset, fctx)) {
9138			rctx->result = DNS_R_FORMERR;
9139			return (ISC_R_COMPLETE);
9140		}
9141
9142		if (sigrdataset->type != dns_rdatatype_rrsig ||
9143		    sigrdataset->covers != dns_rdatatype_cname)
9144		{
9145			continue;
9146		}
9147
9148		sigrdataset->attributes |= DNS_RDATASETATTR_ANSWERSIG;
9149		sigrdataset->attributes |= DNS_RDATASETATTR_CACHE;
9150		sigrdataset->trust = rctx->trust;
9151		break;
9152	}
9153
9154	rctx->chaining = true;
9155	return (ISC_R_SUCCESS);
9156}
9157
9158/*
9159 * rctx_answer_dname():
9160 * Handle responses with covering DNAME records.
9161 */
9162static isc_result_t
9163rctx_answer_dname(respctx_t *rctx) {
9164	dns_rdataset_t *sigrdataset = NULL;
9165	fetchctx_t *fctx = rctx->fctx;
9166
9167	if (!validinanswer(rctx->drdataset, fctx)) {
9168		rctx->result = DNS_R_FORMERR;
9169		return (ISC_R_COMPLETE);
9170	}
9171
9172	if (!is_answertarget_allowed(fctx, &fctx->name, rctx->dname,
9173				     rctx->drdataset, &rctx->chaining))
9174	{
9175		rctx->result = DNS_R_SERVFAIL;
9176		return (ISC_R_COMPLETE);
9177	}
9178
9179	rctx->dname->attributes |= DNS_NAMEATTR_CACHE;
9180	rctx->dname->attributes |= DNS_NAMEATTR_ANSWER;
9181	rctx->dname->attributes |= DNS_NAMEATTR_CHAINING;
9182	rctx->drdataset->attributes |= DNS_RDATASETATTR_ANSWER;
9183	rctx->drdataset->attributes |= DNS_RDATASETATTR_CACHE;
9184	rctx->drdataset->attributes |= DNS_RDATASETATTR_CHAINING;
9185	rctx->drdataset->trust = rctx->trust;
9186
9187	for (sigrdataset = ISC_LIST_HEAD(rctx->dname->list);
9188	     sigrdataset != NULL;
9189	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
9190	{
9191		if (!validinanswer(sigrdataset, fctx)) {
9192			rctx->result = DNS_R_FORMERR;
9193			return (ISC_R_COMPLETE);
9194		}
9195
9196		if (sigrdataset->type != dns_rdatatype_rrsig ||
9197		    sigrdataset->covers != dns_rdatatype_dname)
9198		{
9199			continue;
9200		}
9201
9202		sigrdataset->attributes |= DNS_RDATASETATTR_ANSWERSIG;
9203		sigrdataset->attributes |= DNS_RDATASETATTR_CACHE;
9204		sigrdataset->trust = rctx->trust;
9205		break;
9206	}
9207
9208	return (ISC_R_SUCCESS);
9209}
9210
9211/*
9212 * rctx_authority_positive():
9213 * Examine the records in the authority section (if there are any) for a
9214 * positive answer.  We expect the names for all rdatasets in this section
9215 * to be subdomains of the domain being queried; any that are not are
9216 * skipped.  We expect to find only *one* owner name; any names
9217 * after the first one processed are ignored. We expect to find only
9218 * rdatasets of type NS, RRSIG, or SIG; all others are ignored. Whatever
9219 * remains can be cached at trust level authauthority or additional
9220 * (depending on whether the AA bit was set on the answer).
9221 */
9222static void
9223rctx_authority_positive(respctx_t *rctx) {
9224	fetchctx_t *fctx = rctx->fctx;
9225	bool done = false;
9226	isc_result_t result;
9227
9228	result = dns_message_firstname(rctx->query->rmessage,
9229				       DNS_SECTION_AUTHORITY);
9230	while (!done && result == ISC_R_SUCCESS) {
9231		dns_name_t *name = NULL;
9232
9233		dns_message_currentname(rctx->query->rmessage,
9234					DNS_SECTION_AUTHORITY, &name);
9235
9236		if (!name_external(name, dns_rdatatype_ns, fctx)) {
9237			dns_rdataset_t *rdataset = NULL;
9238
9239			/*
9240			 * We expect to find NS or SIG NS rdatasets, and
9241			 * nothing else.
9242			 */
9243			for (rdataset = ISC_LIST_HEAD(name->list);
9244			     rdataset != NULL;
9245			     rdataset = ISC_LIST_NEXT(rdataset, link))
9246			{
9247				if (rdataset->type == dns_rdatatype_ns ||
9248				    (rdataset->type == dns_rdatatype_rrsig &&
9249				     rdataset->covers == dns_rdatatype_ns))
9250				{
9251					name->attributes |= DNS_NAMEATTR_CACHE;
9252					rdataset->attributes |=
9253						DNS_RDATASETATTR_CACHE;
9254
9255					if (rctx->aa) {
9256						rdataset->trust =
9257							dns_trust_authauthority;
9258					} else {
9259						rdataset->trust =
9260							dns_trust_additional;
9261					}
9262
9263					if (rdataset->type == dns_rdatatype_ns)
9264					{
9265						rctx->ns_name = name;
9266						rctx->ns_rdataset = rdataset;
9267					}
9268					/*
9269					 * Mark any additional data related
9270					 * to this rdataset.
9271					 */
9272					(void)dns_rdataset_additionaldata(
9273						rdataset, check_related, rctx);
9274					done = true;
9275				}
9276			}
9277		}
9278
9279		result = dns_message_nextname(rctx->query->rmessage,
9280					      DNS_SECTION_AUTHORITY);
9281	}
9282}
9283
9284/*
9285 * rctx_answer_none():
9286 * Handles a response without an answer: this is either a negative
9287 * response (NXDOMAIN or NXRRSET) or a referral. Determine which it is,
9288 * then either scan the authority section for negative caching and
9289 * DNSSEC proof of nonexistence, or else call rctx_referral().
9290 */
9291static isc_result_t
9292rctx_answer_none(respctx_t *rctx) {
9293	isc_result_t result;
9294	fetchctx_t *fctx = rctx->fctx;
9295
9296	FCTXTRACE("rctx_answer_none");
9297
9298	rctx_answer_init(rctx);
9299
9300	/*
9301	 * Sometimes we can tell if its a negative response by looking at
9302	 * the message header.
9303	 */
9304	if (rctx->query->rmessage->rcode == dns_rcode_nxdomain ||
9305	    (rctx->query->rmessage->counts[DNS_SECTION_ANSWER] == 0 &&
9306	     rctx->query->rmessage->counts[DNS_SECTION_AUTHORITY] == 0))
9307	{
9308		rctx->negative = true;
9309	}
9310
9311	/*
9312	 * Process the authority section
9313	 */
9314	result = rctx_authority_negative(rctx);
9315	if (result == ISC_R_COMPLETE) {
9316		return (rctx->result);
9317	}
9318
9319	log_ns_ttl(fctx, "rctx_answer_none");
9320
9321	if (rctx->ns_rdataset != NULL &&
9322	    dns_name_equal(&fctx->domain, rctx->ns_name) &&
9323	    !dns_name_equal(rctx->ns_name, dns_rootname))
9324	{
9325		trim_ns_ttl(fctx, rctx->ns_name, rctx->ns_rdataset);
9326	}
9327
9328	/*
9329	 * A negative response has a SOA record (Type 2)
9330	 * and a optional NS RRset (Type 1) or it has neither
9331	 * a SOA or a NS RRset (Type 3, handled above) or
9332	 * rcode is NXDOMAIN (handled above) in which case
9333	 * the NS RRset is allowed (Type 4).
9334	 */
9335	if (rctx->soa_name != NULL) {
9336		rctx->negative = true;
9337	}
9338
9339	if (!rctx->ns_in_answer && !rctx->glue_in_answer) {
9340		/*
9341		 * Process DNSSEC records in the authority section.
9342		 */
9343		result = rctx_authority_dnssec(rctx);
9344		if (result == ISC_R_COMPLETE) {
9345			return (rctx->result);
9346		}
9347	}
9348
9349	/*
9350	 * Trigger lookups for DNS nameservers.
9351	 */
9352	if (rctx->negative &&
9353	    rctx->query->rmessage->rcode == dns_rcode_noerror &&
9354	    fctx->type == dns_rdatatype_ds && rctx->soa_name != NULL &&
9355	    dns_name_equal(rctx->soa_name, &fctx->name) &&
9356	    !dns_name_equal(&fctx->name, dns_rootname))
9357	{
9358		return (DNS_R_CHASEDSSERVERS);
9359	}
9360
9361	/*
9362	 * Did we find anything?
9363	 */
9364	if (!rctx->negative && rctx->ns_name == NULL) {
9365		/*
9366		 * The responder is insane.
9367		 */
9368		if (rctx->found_name == NULL) {
9369			log_formerr(fctx, "invalid response");
9370			return (DNS_R_FORMERR);
9371		}
9372		if (!dns_name_issubdomain(rctx->found_name, &fctx->domain)) {
9373			char nbuf[DNS_NAME_FORMATSIZE];
9374			char dbuf[DNS_NAME_FORMATSIZE];
9375			char tbuf[DNS_RDATATYPE_FORMATSIZE];
9376
9377			dns_rdatatype_format(rctx->found_type, tbuf,
9378					     sizeof(tbuf));
9379			dns_name_format(rctx->found_name, nbuf, sizeof(nbuf));
9380			dns_name_format(&fctx->domain, dbuf, sizeof(dbuf));
9381
9382			log_formerr(fctx,
9383				    "Name %s (%s) not subdomain"
9384				    " of zone %s -- invalid response",
9385				    nbuf, tbuf, dbuf);
9386		} else {
9387			log_formerr(fctx, "invalid response");
9388		}
9389		return (DNS_R_FORMERR);
9390	}
9391
9392	/*
9393	 * If we found both NS and SOA, they should be the same name.
9394	 */
9395	if (rctx->ns_name != NULL && rctx->soa_name != NULL &&
9396	    rctx->ns_name != rctx->soa_name)
9397	{
9398		log_formerr(fctx, "NS/SOA mismatch");
9399		return (DNS_R_FORMERR);
9400	}
9401
9402	/*
9403	 * Handle a referral.
9404	 */
9405	result = rctx_referral(rctx);
9406	if (result == ISC_R_COMPLETE) {
9407		return (rctx->result);
9408	}
9409
9410	/*
9411	 * Since we're not doing a referral, we don't want to cache any
9412	 * NS RRs we may have found.
9413	 */
9414	if (rctx->ns_name != NULL) {
9415		rctx->ns_name->attributes &= ~DNS_NAMEATTR_CACHE;
9416	}
9417
9418	if (rctx->negative) {
9419		FCTX_ATTR_SET(fctx, FCTX_ATTR_WANTNCACHE);
9420	}
9421
9422	return (ISC_R_SUCCESS);
9423}
9424
9425/*
9426 * rctx_authority_negative():
9427 * Scan the authority section of a negative answer, handling
9428 * NS and SOA records. (Note that this function does *not* handle
9429 * DNSSEC records; those are addressed separately in
9430 * rctx_authority_dnssec() below.)
9431 */
9432static isc_result_t
9433rctx_authority_negative(respctx_t *rctx) {
9434	isc_result_t result;
9435	fetchctx_t *fctx = rctx->fctx;
9436	dns_section_t section;
9437	dns_rdataset_t *rdataset = NULL;
9438	bool finished = false;
9439
9440	if (rctx->ns_in_answer) {
9441		INSIST(fctx->type == dns_rdatatype_ns);
9442		section = DNS_SECTION_ANSWER;
9443	} else {
9444		section = DNS_SECTION_AUTHORITY;
9445	}
9446
9447	result = dns_message_firstname(rctx->query->rmessage, section);
9448	if (result != ISC_R_SUCCESS) {
9449		return (ISC_R_SUCCESS);
9450	}
9451
9452	while (!finished) {
9453		dns_name_t *name = NULL;
9454
9455		dns_message_currentname(rctx->query->rmessage, section, &name);
9456		result = dns_message_nextname(rctx->query->rmessage, section);
9457		if (result != ISC_R_SUCCESS) {
9458			finished = true;
9459		}
9460
9461		if (!dns_name_issubdomain(name, &fctx->domain)) {
9462			continue;
9463		}
9464
9465		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
9466		     rdataset = ISC_LIST_NEXT(rdataset, link))
9467		{
9468			dns_rdatatype_t type = rdataset->type;
9469			if (type == dns_rdatatype_rrsig) {
9470				type = rdataset->covers;
9471			}
9472			if (((type == dns_rdatatype_ns ||
9473			      type == dns_rdatatype_soa) &&
9474			     !dns_name_issubdomain(&fctx->name, name)))
9475			{
9476				char qbuf[DNS_NAME_FORMATSIZE];
9477				char nbuf[DNS_NAME_FORMATSIZE];
9478				char tbuf[DNS_RDATATYPE_FORMATSIZE];
9479				dns_rdatatype_format(type, tbuf, sizeof(tbuf));
9480				dns_name_format(name, nbuf, sizeof(nbuf));
9481				dns_name_format(&fctx->name, qbuf,
9482						sizeof(qbuf));
9483				log_formerr(fctx,
9484					    "unrelated %s %s in "
9485					    "%s authority section",
9486					    tbuf, nbuf, qbuf);
9487				break;
9488			}
9489
9490			switch (type) {
9491			case dns_rdatatype_ns:
9492				/*
9493				 * NS or RRSIG NS.
9494				 *
9495				 * Only one set of NS RRs is allowed.
9496				 */
9497				if (rdataset->type == dns_rdatatype_ns) {
9498					if (rctx->ns_name != NULL &&
9499					    name != rctx->ns_name)
9500					{
9501						log_formerr(fctx, "multiple NS "
9502								  "RRsets "
9503								  "in "
9504								  "authority "
9505								  "section");
9506						rctx->result = DNS_R_FORMERR;
9507						return (ISC_R_COMPLETE);
9508					}
9509					rctx->ns_name = name;
9510					rctx->ns_rdataset = rdataset;
9511				}
9512				name->attributes |= DNS_NAMEATTR_CACHE;
9513				rdataset->attributes |= DNS_RDATASETATTR_CACHE;
9514				rdataset->trust = dns_trust_glue;
9515				break;
9516			case dns_rdatatype_soa:
9517				/*
9518				 * SOA, or RRSIG SOA.
9519				 *
9520				 * Only one SOA is allowed.
9521				 */
9522				if (rdataset->type == dns_rdatatype_soa) {
9523					if (rctx->soa_name != NULL &&
9524					    name != rctx->soa_name)
9525					{
9526						log_formerr(fctx, "multiple "
9527								  "SOA RRs "
9528								  "in "
9529								  "authority "
9530								  "section");
9531						rctx->result = DNS_R_FORMERR;
9532						return (ISC_R_COMPLETE);
9533					}
9534					rctx->soa_name = name;
9535				}
9536				name->attributes |= DNS_NAMEATTR_NCACHE;
9537				rdataset->attributes |= DNS_RDATASETATTR_NCACHE;
9538				if (rctx->aa) {
9539					rdataset->trust =
9540						dns_trust_authauthority;
9541				} else if (ISFORWARDER(fctx->addrinfo)) {
9542					rdataset->trust = dns_trust_answer;
9543				} else {
9544					rdataset->trust = dns_trust_additional;
9545				}
9546				break;
9547			default:
9548				continue;
9549			}
9550		}
9551	}
9552
9553	return (ISC_R_SUCCESS);
9554}
9555
9556/*
9557 * rctx_ncache():
9558 * Cache the negatively cacheable parts of the message.  This may
9559 * also cause work to be queued to the DNSSEC validator.
9560 */
9561static void
9562rctx_ncache(respctx_t *rctx) {
9563	isc_result_t result;
9564	dns_rdatatype_t covers;
9565	fetchctx_t *fctx = rctx->fctx;
9566
9567	if (!WANTNCACHE(fctx)) {
9568		return;
9569	}
9570
9571	/*
9572	 * Cache DS NXDOMAIN separately to other types.
9573	 */
9574	if (rctx->query->rmessage->rcode == dns_rcode_nxdomain &&
9575	    fctx->type != dns_rdatatype_ds)
9576	{
9577		covers = dns_rdatatype_any;
9578	} else {
9579		covers = fctx->type;
9580	}
9581
9582	/*
9583	 * Cache any negative cache entries in the message.
9584	 */
9585	result = ncache_message(fctx, rctx->query->rmessage,
9586				rctx->query->addrinfo, covers, rctx->now);
9587	if (result != ISC_R_SUCCESS) {
9588		FCTXTRACE3("ncache_message complete", result);
9589	}
9590}
9591
9592/*
9593 * rctx_authority_dnssec():
9594 *
9595 * Scan the authority section of a negative answer or referral,
9596 * handling DNSSEC records (i.e. NSEC, NSEC3, DS).
9597 */
9598static isc_result_t
9599rctx_authority_dnssec(respctx_t *rctx) {
9600	isc_result_t result;
9601	fetchctx_t *fctx = rctx->fctx;
9602	dns_rdataset_t *rdataset = NULL;
9603	bool finished = false;
9604
9605	REQUIRE(!rctx->ns_in_answer && !rctx->glue_in_answer);
9606
9607	result = dns_message_firstname(rctx->query->rmessage,
9608				       DNS_SECTION_AUTHORITY);
9609	if (result != ISC_R_SUCCESS) {
9610		return (ISC_R_SUCCESS);
9611	}
9612
9613	while (!finished) {
9614		dns_name_t *name = NULL;
9615
9616		dns_message_currentname(rctx->query->rmessage,
9617					DNS_SECTION_AUTHORITY, &name);
9618		result = dns_message_nextname(rctx->query->rmessage,
9619					      DNS_SECTION_AUTHORITY);
9620		if (result != ISC_R_SUCCESS) {
9621			finished = true;
9622		}
9623
9624		if (!dns_name_issubdomain(name, &fctx->domain)) {
9625			/*
9626			 * Invalid name found; preserve it for logging
9627			 * later.
9628			 */
9629			rctx->found_name = name;
9630			rctx->found_type = ISC_LIST_HEAD(name->list)->type;
9631			continue;
9632		}
9633
9634		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
9635		     rdataset = ISC_LIST_NEXT(rdataset, link))
9636		{
9637			bool checknta = true;
9638			bool secure_domain = false;
9639			dns_rdatatype_t type = rdataset->type;
9640
9641			if (type == dns_rdatatype_rrsig) {
9642				type = rdataset->covers;
9643			}
9644
9645			switch (type) {
9646			case dns_rdatatype_nsec:
9647			case dns_rdatatype_nsec3:
9648				if (rctx->negative) {
9649					name->attributes |= DNS_NAMEATTR_NCACHE;
9650					rdataset->attributes |=
9651						DNS_RDATASETATTR_NCACHE;
9652				} else if (type == dns_rdatatype_nsec) {
9653					name->attributes |= DNS_NAMEATTR_CACHE;
9654					rdataset->attributes |=
9655						DNS_RDATASETATTR_CACHE;
9656				}
9657
9658				if (rctx->aa) {
9659					rdataset->trust =
9660						dns_trust_authauthority;
9661				} else if (ISFORWARDER(fctx->addrinfo)) {
9662					rdataset->trust = dns_trust_answer;
9663				} else {
9664					rdataset->trust = dns_trust_additional;
9665				}
9666				/*
9667				 * No additional data needs to be marked.
9668				 */
9669				break;
9670			case dns_rdatatype_ds:
9671				/*
9672				 * DS or SIG DS.
9673				 *
9674				 * These should only be here if this is a
9675				 * referral, and there should only be one
9676				 * DS RRset.
9677				 */
9678				if (rctx->ns_name == NULL) {
9679					log_formerr(fctx, "DS with no "
9680							  "referral");
9681					rctx->result = DNS_R_FORMERR;
9682					return (ISC_R_COMPLETE);
9683				}
9684
9685				if (rdataset->type == dns_rdatatype_ds) {
9686					if (rctx->ds_name != NULL &&
9687					    name != rctx->ds_name)
9688					{
9689						log_formerr(fctx, "DS doesn't "
9690								  "match "
9691								  "referral "
9692								  "(NS)");
9693						rctx->result = DNS_R_FORMERR;
9694						return (ISC_R_COMPLETE);
9695					}
9696					rctx->ds_name = name;
9697				}
9698
9699				name->attributes |= DNS_NAMEATTR_CACHE;
9700				rdataset->attributes |= DNS_RDATASETATTR_CACHE;
9701
9702				if ((fctx->options & DNS_FETCHOPT_NONTA) != 0) {
9703					checknta = false;
9704				}
9705				if (fctx->res->view->enablevalidation) {
9706					result = issecuredomain(
9707						fctx->res->view, name,
9708						dns_rdatatype_ds, fctx->now,
9709						checknta, NULL, &secure_domain);
9710					if (result != ISC_R_SUCCESS) {
9711						return (result);
9712					}
9713				}
9714				if (secure_domain) {
9715					rdataset->trust =
9716						dns_trust_pending_answer;
9717				} else if (rctx->aa) {
9718					rdataset->trust =
9719						dns_trust_authauthority;
9720				} else if (ISFORWARDER(fctx->addrinfo)) {
9721					rdataset->trust = dns_trust_answer;
9722				} else {
9723					rdataset->trust = dns_trust_additional;
9724				}
9725				break;
9726			default:
9727				continue;
9728			}
9729		}
9730	}
9731
9732	return (ISC_R_SUCCESS);
9733}
9734
9735/*
9736 * rctx_referral():
9737 * Handles referral responses. Check for sanity, find glue as needed,
9738 * and update the fetch context to follow the delegation.
9739 */
9740static isc_result_t
9741rctx_referral(respctx_t *rctx) {
9742	isc_result_t result;
9743	fetchctx_t *fctx = rctx->fctx;
9744
9745	if (rctx->negative || rctx->ns_name == NULL) {
9746		return (ISC_R_SUCCESS);
9747	}
9748
9749	/*
9750	 * We already know ns_name is a subdomain of fctx->domain.
9751	 * If ns_name is equal to fctx->domain, we're not making
9752	 * progress.  We return DNS_R_FORMERR so that we'll keep
9753	 * trying other servers.
9754	 */
9755	if (dns_name_equal(rctx->ns_name, &fctx->domain)) {
9756		log_formerr(fctx, "non-improving referral");
9757		rctx->result = DNS_R_FORMERR;
9758		return (ISC_R_COMPLETE);
9759	}
9760
9761	/*
9762	 * If the referral name is not a parent of the query
9763	 * name, consider the responder insane.
9764	 */
9765	if (!dns_name_issubdomain(&fctx->name, rctx->ns_name)) {
9766		/* Logged twice */
9767		log_formerr(fctx, "referral to non-parent");
9768		FCTXTRACE("referral to non-parent");
9769		rctx->result = DNS_R_FORMERR;
9770		return (ISC_R_COMPLETE);
9771	}
9772
9773	/*
9774	 * Mark any additional data related to this rdataset.
9775	 * It's important that we do this before we change the
9776	 * query domain.
9777	 */
9778	INSIST(rctx->ns_rdataset != NULL);
9779	FCTX_ATTR_SET(fctx, FCTX_ATTR_GLUING);
9780	(void)dns_rdataset_additionaldata(rctx->ns_rdataset, check_related,
9781					  rctx);
9782#if CHECK_FOR_GLUE_IN_ANSWER
9783	/*
9784	 * Look in the answer section for "glue" that is incorrectly
9785	 * returned as a answer.  This is needed if the server also
9786	 * minimizes the response size by not adding records to the
9787	 * additional section that are in the answer section or if
9788	 * the record gets dropped due to message size constraints.
9789	 */
9790	if (rctx->glue_in_answer &&
9791	    (fctx->type == dns_rdatatype_aaaa || fctx->type == dns_rdatatype_a))
9792	{
9793		(void)dns_rdataset_additionaldata(rctx->ns_rdataset,
9794						  check_answer, fctx);
9795	}
9796#endif /* if CHECK_FOR_GLUE_IN_ANSWER */
9797	FCTX_ATTR_CLR(fctx, FCTX_ATTR_GLUING);
9798
9799	/*
9800	 * NS rdatasets with 0 TTL cause problems.
9801	 * dns_view_findzonecut() will not find them when we
9802	 * try to follow the referral, and we'll SERVFAIL
9803	 * because the best nameservers are now above QDOMAIN.
9804	 * We force the TTL to 1 second to prevent this.
9805	 */
9806	if (rctx->ns_rdataset->ttl == 0) {
9807		rctx->ns_rdataset->ttl = 1;
9808	}
9809
9810	/*
9811	 * Set the current query domain to the referral name.
9812	 *
9813	 * XXXRTH  We should check if we're in forward-only mode, and
9814	 *		if so we should bail out.
9815	 */
9816	INSIST(dns_name_countlabels(&fctx->domain) > 0);
9817	fcount_decr(fctx);
9818
9819	dns_name_free(&fctx->domain, fctx->mctx);
9820	if (dns_rdataset_isassociated(&fctx->nameservers)) {
9821		dns_rdataset_disassociate(&fctx->nameservers);
9822	}
9823
9824	dns_name_init(&fctx->domain, NULL);
9825	dns_name_dup(rctx->ns_name, fctx->mctx, &fctx->domain);
9826
9827	if ((fctx->options & DNS_FETCHOPT_QMINIMIZE) != 0) {
9828		dns_name_free(&fctx->qmindcname, fctx->mctx);
9829		dns_name_init(&fctx->qmindcname, NULL);
9830		dns_name_dup(rctx->ns_name, fctx->mctx, &fctx->qmindcname);
9831
9832		result = fctx_minimize_qname(fctx);
9833		if (result != ISC_R_SUCCESS) {
9834			rctx->result = result;
9835			return (ISC_R_COMPLETE);
9836		}
9837	}
9838
9839	result = fcount_incr(fctx, true);
9840	if (result != ISC_R_SUCCESS) {
9841		rctx->result = result;
9842		return (ISC_R_COMPLETE);
9843	}
9844
9845	FCTX_ATTR_SET(fctx, FCTX_ATTR_WANTCACHE);
9846	fctx->ns_ttl_ok = false;
9847	log_ns_ttl(fctx, "DELEGATION");
9848	rctx->result = DNS_R_DELEGATION;
9849
9850	/*
9851	 * Reinitialize 'rctx' to prepare for following the delegation:
9852	 * set the get_nameservers and next_server flags appropriately and
9853	 * reset the fetch context counters.
9854	 *
9855	 */
9856	if ((rctx->fctx->options & DNS_FETCHOPT_NOFOLLOW) == 0) {
9857		rctx->get_nameservers = true;
9858		rctx->next_server = true;
9859		rctx->fctx->restarts = 0;
9860		rctx->fctx->referrals++;
9861		rctx->fctx->querysent = 0;
9862		rctx->fctx->lamecount = 0;
9863		rctx->fctx->quotacount = 0;
9864		rctx->fctx->neterr = 0;
9865		rctx->fctx->badresp = 0;
9866		rctx->fctx->adberr = 0;
9867	}
9868
9869	return (ISC_R_COMPLETE);
9870}
9871
9872/*
9873 * rctx_additional():
9874 * Scan the additional section of a response to find records related
9875 * to answers we were interested in.
9876 */
9877static void
9878rctx_additional(respctx_t *rctx) {
9879	bool rescan;
9880	dns_section_t section = DNS_SECTION_ADDITIONAL;
9881	isc_result_t result;
9882
9883again:
9884	rescan = false;
9885
9886	for (result = dns_message_firstname(rctx->query->rmessage, section);
9887	     result == ISC_R_SUCCESS;
9888	     result = dns_message_nextname(rctx->query->rmessage, section))
9889	{
9890		dns_name_t *name = NULL;
9891		dns_rdataset_t *rdataset;
9892		dns_message_currentname(rctx->query->rmessage,
9893					DNS_SECTION_ADDITIONAL, &name);
9894		if ((name->attributes & DNS_NAMEATTR_CHASE) == 0) {
9895			continue;
9896		}
9897		name->attributes &= ~DNS_NAMEATTR_CHASE;
9898		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
9899		     rdataset = ISC_LIST_NEXT(rdataset, link))
9900		{
9901			if (CHASE(rdataset)) {
9902				rdataset->attributes &= ~DNS_RDATASETATTR_CHASE;
9903				(void)dns_rdataset_additionaldata(
9904					rdataset, check_related, rctx);
9905				rescan = true;
9906			}
9907		}
9908	}
9909	if (rescan) {
9910		goto again;
9911	}
9912}
9913
9914/*
9915 * rctx_nextserver():
9916 * We found something wrong with the remote server, but it may be
9917 * useful to try another one.
9918 */
9919static void
9920rctx_nextserver(respctx_t *rctx, dns_message_t *message,
9921		dns_adbaddrinfo_t *addrinfo, isc_result_t result) {
9922	fetchctx_t *fctx = rctx->fctx;
9923
9924	if (result == DNS_R_FORMERR) {
9925		rctx->broken_server = DNS_R_FORMERR;
9926	}
9927	if (rctx->broken_server != ISC_R_SUCCESS) {
9928		/*
9929		 * Add this server to the list of bad servers for
9930		 * this fctx.
9931		 */
9932		add_bad(fctx, message, addrinfo, rctx->broken_server,
9933			rctx->broken_type);
9934	}
9935
9936	if (rctx->get_nameservers) {
9937		dns_fixedname_t foundname, founddc;
9938		dns_name_t *name, *fname, *dcname;
9939		unsigned int findoptions = 0;
9940
9941		fname = dns_fixedname_initname(&foundname);
9942		dcname = dns_fixedname_initname(&founddc);
9943
9944		if (result != ISC_R_SUCCESS) {
9945			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
9946			return;
9947		}
9948		if (dns_rdatatype_atparent(fctx->type)) {
9949			findoptions |= DNS_DBFIND_NOEXACT;
9950		}
9951		if ((rctx->retryopts & DNS_FETCHOPT_UNSHARED) == 0) {
9952			name = &fctx->name;
9953		} else {
9954			name = &fctx->domain;
9955		}
9956		result = dns_view_findzonecut(
9957			fctx->res->view, name, fname, dcname, fctx->now,
9958			findoptions, true, true, &fctx->nameservers, NULL);
9959		if (result != ISC_R_SUCCESS) {
9960			FCTXTRACE("couldn't find a zonecut");
9961			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
9962			return;
9963		}
9964		if (!dns_name_issubdomain(fname, &fctx->domain)) {
9965			/*
9966			 * The best nameservers are now above our QDOMAIN.
9967			 */
9968			FCTXTRACE("nameservers now above QDOMAIN");
9969			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
9970			return;
9971		}
9972
9973		fcount_decr(fctx);
9974
9975		dns_name_free(&fctx->domain, fctx->mctx);
9976		dns_name_init(&fctx->domain, NULL);
9977		dns_name_dup(fname, fctx->mctx, &fctx->domain);
9978		dns_name_free(&fctx->qmindcname, fctx->mctx);
9979		dns_name_init(&fctx->qmindcname, NULL);
9980		dns_name_dup(dcname, fctx->mctx, &fctx->qmindcname);
9981
9982		result = fcount_incr(fctx, true);
9983		if (result != ISC_R_SUCCESS) {
9984			fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
9985			return;
9986		}
9987		fctx->ns_ttl = fctx->nameservers.ttl;
9988		fctx->ns_ttl_ok = true;
9989		fctx_cancelqueries(fctx, true, false);
9990		fctx_cleanupall(fctx);
9991	}
9992
9993	/*
9994	 * Try again.
9995	 */
9996	fctx_try(fctx, !rctx->get_nameservers, false);
9997}
9998
9999/*
10000 * rctx_resend():
10001 *
10002 * Resend the query, probably with the options changed. Calls fctx_query(),
10003 * passing rctx->retryopts (which is based on query->options, but may have been
10004 * updated since the last time fctx_query() was called).
10005 */
10006static void
10007rctx_resend(respctx_t *rctx, dns_adbaddrinfo_t *addrinfo) {
10008	isc_result_t result;
10009	fetchctx_t *fctx = rctx->fctx;
10010	bool bucket_empty;
10011	dns_resolver_t *res = fctx->res;
10012	unsigned int bucketnum;
10013
10014	FCTXTRACE("resend");
10015	inc_stats(fctx->res, dns_resstatscounter_retry);
10016	fctx_increference(fctx);
10017	result = fctx_query(fctx, addrinfo, rctx->retryopts);
10018	if (result == ISC_R_SUCCESS) {
10019		return;
10020	}
10021
10022	bucketnum = fctx->bucketnum;
10023	fctx_done(fctx, result, __LINE__);
10024	LOCK(&res->buckets[bucketnum].lock);
10025	bucket_empty = fctx_decreference(fctx);
10026	UNLOCK(&res->buckets[bucketnum].lock);
10027	if (bucket_empty) {
10028		empty_bucket(res);
10029	}
10030}
10031
10032/*
10033 * rctx_next():
10034 * We got what appeared to be a response but it didn't match the question
10035 * or the cookie; it may have been meant for someone else, or it may be a
10036 * spoofing attack. Drop it and continue listening for the response we
10037 * wanted.
10038 */
10039static void
10040rctx_next(respctx_t *rctx) {
10041#ifdef WANT_QUERYTRACE
10042	fetchctx_t *fctx = rctx->fctx;
10043#endif /* ifdef WANT_QUERYTRACE */
10044	isc_result_t result;
10045
10046	FCTXTRACE("nextitem");
10047	inc_stats(rctx->fctx->res, dns_resstatscounter_nextitem);
10048	INSIST(rctx->query->dispentry != NULL);
10049	dns_message_reset(rctx->query->rmessage, DNS_MESSAGE_INTENTPARSE);
10050	result = dns_dispatch_getnext(rctx->query->dispentry, &rctx->devent);
10051	if (result != ISC_R_SUCCESS) {
10052		fctx_done(rctx->fctx, result, __LINE__);
10053	}
10054}
10055
10056/*
10057 * rctx_chaseds():
10058 * Look up the parent zone's NS records so that DS records can be fetched.
10059 */
10060static void
10061rctx_chaseds(respctx_t *rctx, dns_message_t *message,
10062	     dns_adbaddrinfo_t *addrinfo, isc_result_t result) {
10063	fetchctx_t *fctx = rctx->fctx;
10064	unsigned int n;
10065
10066	add_bad(fctx, message, addrinfo, result, rctx->broken_type);
10067	fctx_cancelqueries(fctx, true, false);
10068	fctx_cleanupfinds(fctx);
10069	fctx_cleanupforwaddrs(fctx);
10070
10071	n = dns_name_countlabels(&fctx->name);
10072	dns_name_getlabelsequence(&fctx->name, 1, n - 1, &fctx->nsname);
10073
10074	FCTXTRACE("suspending DS lookup to find parent's NS records");
10075
10076	result = dns_resolver_createfetch(
10077		fctx->res, &fctx->nsname, dns_rdatatype_ns, NULL, NULL, NULL,
10078		NULL, 0, fctx->options, 0, NULL, rctx->task, resume_dslookup,
10079		fctx, &fctx->nsrrset, NULL, &fctx->nsfetch);
10080	if (result != ISC_R_SUCCESS) {
10081		if (result == DNS_R_DUPLICATE) {
10082			result = DNS_R_SERVFAIL;
10083		}
10084		fctx_done(fctx, result, __LINE__);
10085	} else {
10086		fctx_increference(fctx);
10087		result = fctx_stopidletimer(fctx);
10088		if (result != ISC_R_SUCCESS) {
10089			fctx_done(fctx, result, __LINE__);
10090		}
10091	}
10092}
10093
10094/*
10095 * rctx_done():
10096 * This resolver query response is finished, either because we encountered
10097 * a problem or because we've gotten all the information from it that we
10098 * can.  We either wait for another response, resend the query to the
10099 * same server, resend to a new server, or clean up and shut down the fetch.
10100 */
10101static void
10102rctx_done(respctx_t *rctx, isc_result_t result) {
10103	resquery_t *query = rctx->query;
10104	fetchctx_t *fctx = rctx->fctx;
10105	dns_adbaddrinfo_t *addrinfo = query->addrinfo;
10106	/*
10107	 * Need to attach to the message until the scope
10108	 * of this function ends, since there are many places
10109	 * where te message is used and/or may be destroyed
10110	 * before this function ends.
10111	 */
10112	dns_message_t *message = NULL;
10113	dns_message_attach(query->rmessage, &message);
10114
10115	FCTXTRACE4("query canceled in response(); ",
10116		   rctx->no_response ? "no response" : "responding", result);
10117
10118	/*
10119	 * Cancel the query.
10120	 *
10121	 * XXXRTH  Don't cancel the query if waiting for validation?
10122	 */
10123	if (!rctx->nextitem) {
10124		fctx_cancelquery(&query, &rctx->devent, rctx->finish,
10125				 rctx->no_response, false);
10126	}
10127
10128#ifdef ENABLE_AFL
10129	if (dns_fuzzing_resolver &&
10130	    (rctx->next_server || rctx->resend || rctx->nextitem))
10131	{
10132		if (rctx->nextitem) {
10133			fctx_cancelquery(&query, &rctx->devent, rctx->finish,
10134					 rctx->no_response, false);
10135		}
10136		fctx_done(fctx, DNS_R_SERVFAIL, __LINE__);
10137		return;
10138	} else
10139#endif /* ifdef ENABLE_AFL */
10140		if (rctx->next_server) {
10141			rctx_nextserver(rctx, message, addrinfo, result);
10142		} else if (rctx->resend) {
10143			rctx_resend(rctx, addrinfo);
10144		} else if (rctx->nextitem) {
10145			rctx_next(rctx);
10146		} else if (result == DNS_R_CHASEDSSERVERS) {
10147			rctx_chaseds(rctx, message, addrinfo, result);
10148		} else if (result == ISC_R_SUCCESS && !HAVE_ANSWER(fctx)) {
10149			/*
10150			 * All has gone well so far, but we are waiting for the
10151			 * DNSSEC validator to validate the answer.
10152			 */
10153			FCTXTRACE("wait for validator");
10154			fctx_cancelqueries(fctx, true, false);
10155			/*
10156			 * We must not retransmit while the validator is
10157			 * working; it has references to the current rmessage.
10158			 */
10159			result = fctx_stopidletimer(fctx);
10160			if (result != ISC_R_SUCCESS) {
10161				fctx_done(fctx, result, __LINE__);
10162			}
10163		} else {
10164			/*
10165			 * We're done.
10166			 */
10167			fctx_done(fctx, result, __LINE__);
10168		}
10169
10170	dns_message_detach(&message);
10171}
10172
10173/*
10174 * rctx_logpacket():
10175 * Log the incoming packet; also log to DNSTAP if configured.
10176 */
10177static void
10178rctx_logpacket(respctx_t *rctx) {
10179#ifdef HAVE_DNSTAP
10180	isc_result_t result;
10181	fetchctx_t *fctx = rctx->fctx;
10182	isc_socket_t *sock = NULL;
10183	isc_sockaddr_t localaddr, *la = NULL;
10184	unsigned char zone[DNS_NAME_MAXWIRE];
10185	dns_dtmsgtype_t dtmsgtype;
10186	dns_compress_t cctx;
10187	isc_region_t zr;
10188	isc_buffer_t zb;
10189#endif /* HAVE_DNSTAP */
10190
10191	dns_message_logfmtpacket(
10192		rctx->query->rmessage, "received packet from",
10193		&rctx->query->addrinfo->sockaddr, DNS_LOGCATEGORY_RESOLVER,
10194		DNS_LOGMODULE_PACKETS, &dns_master_style_comment,
10195		ISC_LOG_DEBUG(10), rctx->fctx->res->mctx);
10196
10197#ifdef HAVE_DNSTAP
10198	/*
10199	 * Log the response via dnstap.
10200	 */
10201	memset(&zr, 0, sizeof(zr));
10202	result = dns_compress_init(&cctx, -1, fctx->res->mctx);
10203	if (result == ISC_R_SUCCESS) {
10204		isc_buffer_init(&zb, zone, sizeof(zone));
10205		dns_compress_setmethods(&cctx, DNS_COMPRESS_NONE);
10206		result = dns_name_towire(&fctx->domain, &cctx, &zb);
10207		if (result == ISC_R_SUCCESS) {
10208			isc_buffer_usedregion(&zb, &zr);
10209		}
10210		dns_compress_invalidate(&cctx);
10211	}
10212
10213	if ((fctx->qmessage->flags & DNS_MESSAGEFLAG_RD) != 0) {
10214		dtmsgtype = DNS_DTTYPE_FR;
10215	} else {
10216		dtmsgtype = DNS_DTTYPE_RR;
10217	}
10218
10219	sock = query2sock(rctx->query);
10220
10221	if (sock != NULL) {
10222		result = isc_socket_getsockname(sock, &localaddr);
10223		if (result == ISC_R_SUCCESS) {
10224			la = &localaddr;
10225		}
10226	}
10227
10228	dns_dt_send(fctx->res->view, dtmsgtype, la,
10229		    &rctx->query->addrinfo->sockaddr,
10230		    ((rctx->query->options & DNS_FETCHOPT_TCP) != 0), &zr,
10231		    &rctx->query->start, NULL, &rctx->devent->buffer);
10232#endif /* HAVE_DNSTAP */
10233}
10234
10235/*
10236 * rctx_badserver():
10237 * Is the remote server broken, or does it dislike us?
10238 */
10239static isc_result_t
10240rctx_badserver(respctx_t *rctx, isc_result_t result) {
10241	fetchctx_t *fctx = rctx->fctx;
10242	resquery_t *query = rctx->query;
10243	isc_buffer_t b;
10244	char code[64];
10245	dns_rcode_t rcode = rctx->query->rmessage->rcode;
10246
10247	if (rcode == dns_rcode_noerror || rcode == dns_rcode_yxdomain ||
10248	    rcode == dns_rcode_nxdomain)
10249	{
10250		return (ISC_R_SUCCESS);
10251	}
10252
10253	if ((rcode == dns_rcode_formerr) &&
10254	    (rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0)
10255	{
10256		/*
10257		 * It's very likely they don't like EDNS0.
10258		 * If the response code is SERVFAIL, also check if the
10259		 * response contains an OPT RR and don't cache the
10260		 * failure since it can be returned for various other
10261		 * reasons.
10262		 *
10263		 * XXXRTH  We should check if the question
10264		 *		we're asking requires EDNS0, and
10265		 *		if so, we should bail out.
10266		 */
10267		rctx->retryopts |= DNS_FETCHOPT_NOEDNS0;
10268		rctx->resend = true;
10269		/*
10270		 * Remember that they may not like EDNS0.
10271		 */
10272		add_bad_edns(fctx, &query->addrinfo->sockaddr);
10273		inc_stats(fctx->res, dns_resstatscounter_edns0fail);
10274	} else if (rcode == dns_rcode_formerr) {
10275		if (ISFORWARDER(query->addrinfo)) {
10276			/*
10277			 * This forwarder doesn't understand us,
10278			 * but other forwarders might.  Keep trying.
10279			 */
10280			rctx->broken_server = DNS_R_REMOTEFORMERR;
10281			rctx->next_server = true;
10282		} else {
10283			/*
10284			 * The server doesn't understand us.  Since
10285			 * all servers for a zone need similar
10286			 * capabilities, we assume that we will get
10287			 * FORMERR from all servers, and thus we
10288			 * cannot make any more progress with this
10289			 * fetch.
10290			 */
10291			log_formerr(fctx, "server sent FORMERR");
10292			result = DNS_R_FORMERR;
10293		}
10294	} else if (rcode == dns_rcode_badvers) {
10295		unsigned int version;
10296#if DNS_EDNS_VERSION > 0
10297		unsigned int flags, mask;
10298#endif /* if DNS_EDNS_VERSION > 0 */
10299
10300		INSIST(rctx->opt != NULL);
10301		version = (rctx->opt->ttl >> 16) & 0xff;
10302#if DNS_EDNS_VERSION > 0
10303		flags = (version << DNS_FETCHOPT_EDNSVERSIONSHIFT) |
10304			DNS_FETCHOPT_EDNSVERSIONSET;
10305		mask = DNS_FETCHOPT_EDNSVERSIONMASK |
10306		       DNS_FETCHOPT_EDNSVERSIONSET;
10307#endif /* if DNS_EDNS_VERSION > 0 */
10308
10309		/*
10310		 * Record that we got a good EDNS response.
10311		 */
10312		if (query->ednsversion > (int)version &&
10313		    !EDNSOK(query->addrinfo))
10314		{
10315			dns_adb_changeflags(fctx->adb, query->addrinfo,
10316					    FCTX_ADDRINFO_EDNSOK,
10317					    FCTX_ADDRINFO_EDNSOK);
10318		}
10319
10320		/*
10321		 * RFC 2671 was not clear that unknown options should
10322		 * be ignored.  RFC 6891 is clear that that they
10323		 * should be ignored. If we are supporting the
10324		 * experimental EDNS > 0 then perform strict
10325		 * version checking of badvers responses.  We won't
10326		 * be sending COOKIE etc. in that case.
10327		 */
10328#if DNS_EDNS_VERSION > 0
10329		if ((int)version < query->ednsversion) {
10330			dns_adb_changeflags(fctx->adb, query->addrinfo, flags,
10331					    mask);
10332			rctx->resend = true;
10333		} else {
10334			rctx->broken_server = DNS_R_BADVERS;
10335			rctx->next_server = true;
10336		}
10337#else  /* if DNS_EDNS_VERSION > 0 */
10338		rctx->broken_server = DNS_R_BADVERS;
10339		rctx->next_server = true;
10340#endif /* if DNS_EDNS_VERSION > 0 */
10341	} else if (rcode == dns_rcode_badcookie && rctx->query->rmessage->cc_ok)
10342	{
10343		/*
10344		 * We have recorded the new cookie.
10345		 */
10346		if (BADCOOKIE(query->addrinfo)) {
10347			rctx->retryopts |= DNS_FETCHOPT_TCP;
10348		}
10349		query->addrinfo->flags |= FCTX_ADDRINFO_BADCOOKIE;
10350		rctx->resend = true;
10351	} else {
10352		rctx->broken_server = DNS_R_UNEXPECTEDRCODE;
10353		rctx->next_server = true;
10354	}
10355
10356	isc_buffer_init(&b, code, sizeof(code) - 1);
10357	dns_rcode_totext(rcode, &b);
10358	code[isc_buffer_usedlength(&b)] = '\0';
10359	FCTXTRACE2("remote server broken: returned ", code);
10360	rctx_done(rctx, result);
10361
10362	return (ISC_R_COMPLETE);
10363}
10364
10365/*
10366 * rctx_lameserver():
10367 * Is the server lame?
10368 */
10369static isc_result_t
10370rctx_lameserver(respctx_t *rctx) {
10371	isc_result_t result = ISC_R_SUCCESS;
10372	fetchctx_t *fctx = rctx->fctx;
10373	resquery_t *query = rctx->query;
10374
10375	if (ISFORWARDER(query->addrinfo) || !is_lame(fctx, query->rmessage)) {
10376		return (ISC_R_SUCCESS);
10377	}
10378
10379	inc_stats(fctx->res, dns_resstatscounter_lame);
10380	log_lame(fctx, query->addrinfo);
10381	if (fctx->res->lame_ttl != 0) {
10382		result = dns_adb_marklame(fctx->adb, query->addrinfo,
10383					  &fctx->name, fctx->type,
10384					  rctx->now + fctx->res->lame_ttl);
10385		if (result != ISC_R_SUCCESS) {
10386			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
10387				      DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR,
10388				      "could not mark server as lame: %s",
10389				      isc_result_totext(result));
10390		}
10391	}
10392	rctx->broken_server = DNS_R_LAME;
10393	rctx->next_server = true;
10394	FCTXTRACE("lame server");
10395	rctx_done(rctx, result);
10396
10397	return (ISC_R_COMPLETE);
10398}
10399
10400/*
10401 * rctx_delonly_zone():
10402 * Handle delegation-only zones like NET and COM.
10403 */
10404static void
10405rctx_delonly_zone(respctx_t *rctx) {
10406	fetchctx_t *fctx = rctx->fctx;
10407	char namebuf[DNS_NAME_FORMATSIZE];
10408	char domainbuf[DNS_NAME_FORMATSIZE];
10409	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
10410	char classbuf[64];
10411	char typebuf[64];
10412
10413	if (ISFORWARDER(rctx->query->addrinfo) ||
10414	    !dns_view_isdelegationonly(fctx->res->view, &fctx->domain) ||
10415	    dns_name_equal(&fctx->domain, &fctx->name) ||
10416	    !fix_mustbedelegationornxdomain(rctx->query->rmessage, fctx))
10417	{
10418		return;
10419	}
10420
10421	dns_name_format(&fctx->name, namebuf, sizeof(namebuf));
10422	dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
10423	dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
10424	dns_rdataclass_format(fctx->res->rdclass, classbuf, sizeof(classbuf));
10425	isc_sockaddr_format(&rctx->query->addrinfo->sockaddr, addrbuf,
10426			    sizeof(addrbuf));
10427
10428	isc_log_write(dns_lctx, DNS_LOGCATEGORY_DELEGATION_ONLY,
10429		      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
10430		      "enforced delegation-only for '%s' (%s/%s/%s) from %s",
10431		      domainbuf, namebuf, typebuf, classbuf, addrbuf);
10432}
10433
10434/***
10435 *** Resolver Methods
10436 ***/
10437static void
10438destroy(dns_resolver_t *res) {
10439	unsigned int i;
10440	alternate_t *a;
10441
10442	isc_refcount_destroy(&res->references);
10443	REQUIRE(!atomic_load_acquire(&res->priming));
10444	REQUIRE(res->primefetch == NULL);
10445
10446	RTRACE("destroy");
10447
10448	REQUIRE(atomic_load_acquire(&res->nfctx) == 0);
10449
10450	isc_mutex_destroy(&res->primelock);
10451	isc_mutex_destroy(&res->lock);
10452	for (i = 0; i < res->nbuckets; i++) {
10453		INSIST(ISC_LIST_EMPTY(res->buckets[i].fctxs));
10454		isc_task_shutdown(res->buckets[i].task);
10455		isc_task_detach(&res->buckets[i].task);
10456		isc_mutex_destroy(&res->buckets[i].lock);
10457		isc_mem_detach(&res->buckets[i].mctx);
10458	}
10459	isc_mem_put(res->mctx, res->buckets,
10460		    res->nbuckets * sizeof(fctxbucket_t));
10461	for (i = 0; i < RES_DOMAIN_BUCKETS; i++) {
10462		INSIST(ISC_LIST_EMPTY(res->dbuckets[i].list));
10463		isc_mem_detach(&res->dbuckets[i].mctx);
10464		isc_mutex_destroy(&res->dbuckets[i].lock);
10465	}
10466	isc_mem_put(res->mctx, res->dbuckets,
10467		    RES_DOMAIN_BUCKETS * sizeof(zonebucket_t));
10468	if (res->dispatches4 != NULL) {
10469		dns_dispatchset_destroy(&res->dispatches4);
10470	}
10471	if (res->dispatches6 != NULL) {
10472		dns_dispatchset_destroy(&res->dispatches6);
10473	}
10474	while ((a = ISC_LIST_HEAD(res->alternates)) != NULL) {
10475		ISC_LIST_UNLINK(res->alternates, a, link);
10476		if (!a->isaddress) {
10477			dns_name_free(&a->_u._n.name, res->mctx);
10478		}
10479		isc_mem_put(res->mctx, a, sizeof(*a));
10480	}
10481	dns_resolver_reset_algorithms(res);
10482	dns_resolver_reset_ds_digests(res);
10483	dns_badcache_destroy(&res->badcache);
10484	dns_resolver_resetmustbesecure(res);
10485#if USE_ALGLOCK
10486	isc_rwlock_destroy(&res->alglock);
10487#endif /* if USE_ALGLOCK */
10488#if USE_MBSLOCK
10489	isc_rwlock_destroy(&res->mbslock);
10490#endif /* if USE_MBSLOCK */
10491	isc_timer_destroy(&res->spillattimer);
10492	res->magic = 0;
10493	isc_mem_put(res->mctx, res, sizeof(*res));
10494}
10495
10496static void
10497send_shutdown_events(dns_resolver_t *res) {
10498	isc_event_t *event, *next_event;
10499	isc_task_t *etask;
10500
10501	/*
10502	 * Caller must be holding the resolver lock.
10503	 */
10504
10505	for (event = ISC_LIST_HEAD(res->whenshutdown); event != NULL;
10506	     event = next_event)
10507	{
10508		next_event = ISC_LIST_NEXT(event, ev_link);
10509		ISC_LIST_UNLINK(res->whenshutdown, event, ev_link);
10510		etask = event->ev_sender;
10511		event->ev_sender = res;
10512		isc_task_sendanddetach(&etask, &event);
10513	}
10514}
10515
10516static void
10517empty_bucket(dns_resolver_t *res) {
10518	RTRACE("empty_bucket");
10519
10520	LOCK(&res->lock);
10521
10522	INSIST(res->activebuckets > 0);
10523	res->activebuckets--;
10524	if (res->activebuckets == 0) {
10525		send_shutdown_events(res);
10526	}
10527
10528	UNLOCK(&res->lock);
10529}
10530
10531static void
10532spillattimer_countdown(isc_task_t *task, isc_event_t *event) {
10533	dns_resolver_t *res = event->ev_arg;
10534	isc_result_t result;
10535	unsigned int count;
10536	bool logit = false;
10537
10538	REQUIRE(VALID_RESOLVER(res));
10539
10540	UNUSED(task);
10541
10542	LOCK(&res->lock);
10543	INSIST(!atomic_load_acquire(&res->exiting));
10544	if (res->spillat > res->spillatmin) {
10545		res->spillat--;
10546		logit = true;
10547	}
10548	if (res->spillat <= res->spillatmin) {
10549		result = isc_timer_reset(res->spillattimer,
10550					 isc_timertype_inactive, NULL, NULL,
10551					 true);
10552		RUNTIME_CHECK(result == ISC_R_SUCCESS);
10553	}
10554	count = res->spillat;
10555	UNLOCK(&res->lock);
10556	if (logit) {
10557		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
10558			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
10559			      "clients-per-query decreased to %u", count);
10560	}
10561
10562	isc_event_free(&event);
10563}
10564
10565isc_result_t
10566dns_resolver_create(dns_view_t *view, isc_taskmgr_t *taskmgr,
10567		    unsigned int ntasks, unsigned int ndisp,
10568		    isc_socketmgr_t *socketmgr, isc_timermgr_t *timermgr,
10569		    unsigned int options, dns_dispatchmgr_t *dispatchmgr,
10570		    dns_dispatch_t *dispatchv4, dns_dispatch_t *dispatchv6,
10571		    dns_resolver_t **resp) {
10572	dns_resolver_t *res;
10573	isc_result_t result = ISC_R_SUCCESS;
10574	unsigned int i, buckets_created = 0, dbuckets_created = 0;
10575	isc_task_t *task = NULL;
10576	char name[16];
10577	unsigned dispattr;
10578
10579	/*
10580	 * Create a resolver.
10581	 */
10582
10583	REQUIRE(DNS_VIEW_VALID(view));
10584	REQUIRE(ntasks > 0);
10585	REQUIRE(ndisp > 0);
10586	REQUIRE(resp != NULL && *resp == NULL);
10587	REQUIRE(dispatchmgr != NULL);
10588	REQUIRE(dispatchv4 != NULL || dispatchv6 != NULL);
10589
10590	res = isc_mem_get(view->mctx, sizeof(*res));
10591	RTRACE("create");
10592	res->mctx = view->mctx;
10593	res->rdclass = view->rdclass;
10594	res->socketmgr = socketmgr;
10595	res->timermgr = timermgr;
10596	res->taskmgr = taskmgr;
10597	res->dispatchmgr = dispatchmgr;
10598	res->view = view;
10599	res->options = options;
10600	res->lame_ttl = 0;
10601	ISC_LIST_INIT(res->alternates);
10602	res->udpsize = RECV_BUFFER_SIZE;
10603	res->algorithms = NULL;
10604	res->digests = NULL;
10605	res->badcache = NULL;
10606	result = dns_badcache_init(res->mctx, DNS_RESOLVER_BADCACHESIZE,
10607				   &res->badcache);
10608	if (result != ISC_R_SUCCESS) {
10609		goto cleanup_res;
10610	}
10611	res->mustbesecure = NULL;
10612	res->spillatmin = res->spillat = 10;
10613	res->spillatmax = 100;
10614	res->spillattimer = NULL;
10615	atomic_init(&res->zspill, 0);
10616	res->zero_no_soa_ttl = false;
10617	res->retryinterval = 30000;
10618	res->nonbackofftries = 3;
10619	res->query_timeout = DEFAULT_QUERY_TIMEOUT;
10620	res->maxdepth = DEFAULT_RECURSION_DEPTH;
10621	res->maxqueries = DEFAULT_MAX_QUERIES;
10622	res->quotaresp[dns_quotatype_zone] = DNS_R_DROP;
10623	res->quotaresp[dns_quotatype_server] = DNS_R_SERVFAIL;
10624	res->nbuckets = ntasks;
10625	if (view->resstats != NULL) {
10626		isc_stats_set(view->resstats, ntasks,
10627			      dns_resstatscounter_buckets);
10628	}
10629	res->activebuckets = ntasks;
10630	res->buckets = isc_mem_get(view->mctx, ntasks * sizeof(fctxbucket_t));
10631	for (i = 0; i < ntasks; i++) {
10632		isc_mutex_init(&res->buckets[i].lock);
10633
10634		res->buckets[i].task = NULL;
10635		/*
10636		 * Since we have a pool of tasks we bind them to task queues
10637		 * to spread the load evenly
10638		 */
10639		result = isc_task_create_bound(
10640			taskmgr, 0, &res->buckets[i].task, ISC_NM_TASK_SLOW(i));
10641		if (result != ISC_R_SUCCESS) {
10642			isc_mutex_destroy(&res->buckets[i].lock);
10643			goto cleanup_buckets;
10644		}
10645		res->buckets[i].mctx = NULL;
10646		snprintf(name, sizeof(name), "res%u", i);
10647		/*
10648		 * Use a separate memory context for each bucket to reduce
10649		 * contention among multiple threads.  Do this only when
10650		 * enabling threads because it will be require more memory.
10651		 */
10652		isc_mem_create(&res->buckets[i].mctx);
10653		isc_mem_setname(res->buckets[i].mctx, name, NULL);
10654		isc_task_setname(res->buckets[i].task, name, res);
10655		ISC_LIST_INIT(res->buckets[i].fctxs);
10656		atomic_init(&res->buckets[i].exiting, false);
10657		buckets_created++;
10658	}
10659
10660	res->dbuckets = isc_mem_get(view->mctx,
10661				    RES_DOMAIN_BUCKETS * sizeof(zonebucket_t));
10662	for (i = 0; i < RES_DOMAIN_BUCKETS; i++) {
10663		ISC_LIST_INIT(res->dbuckets[i].list);
10664		res->dbuckets[i].mctx = NULL;
10665		isc_mem_attach(view->mctx, &res->dbuckets[i].mctx);
10666		isc_mutex_init(&res->dbuckets[i].lock);
10667		dbuckets_created++;
10668	}
10669
10670	res->dispatches4 = NULL;
10671	if (dispatchv4 != NULL) {
10672		dns_dispatchset_create(view->mctx, socketmgr, taskmgr,
10673				       dispatchv4, &res->dispatches4, ndisp);
10674		dispattr = dns_dispatch_getattributes(dispatchv4);
10675		res->exclusivev4 = (dispattr & DNS_DISPATCHATTR_EXCLUSIVE);
10676	}
10677
10678	res->dispatches6 = NULL;
10679	if (dispatchv6 != NULL) {
10680		dns_dispatchset_create(view->mctx, socketmgr, taskmgr,
10681				       dispatchv6, &res->dispatches6, ndisp);
10682		dispattr = dns_dispatch_getattributes(dispatchv6);
10683		res->exclusivev6 = (dispattr & DNS_DISPATCHATTR_EXCLUSIVE);
10684	}
10685
10686	res->querydscp4 = -1;
10687	res->querydscp6 = -1;
10688	isc_refcount_init(&res->references, 1);
10689	atomic_init(&res->exiting, false);
10690	res->frozen = false;
10691	ISC_LIST_INIT(res->whenshutdown);
10692	atomic_init(&res->priming, false);
10693	res->primefetch = NULL;
10694
10695	atomic_init(&res->nfctx, 0);
10696
10697	isc_mutex_init(&res->lock);
10698	isc_mutex_init(&res->primelock);
10699
10700	task = NULL;
10701	result = isc_task_create(taskmgr, 0, &task);
10702	if (result != ISC_R_SUCCESS) {
10703		goto cleanup_primelock;
10704	}
10705	isc_task_setname(task, "resolver_task", NULL);
10706
10707	result = isc_timer_create(timermgr, isc_timertype_inactive, NULL, NULL,
10708				  task, spillattimer_countdown, res,
10709				  &res->spillattimer);
10710	isc_task_detach(&task);
10711	if (result != ISC_R_SUCCESS) {
10712		goto cleanup_primelock;
10713	}
10714
10715#if USE_ALGLOCK
10716	isc_rwlock_init(&res->alglock, 0, 0);
10717#endif /* if USE_ALGLOCK */
10718#if USE_MBSLOCK
10719	isc_rwlock_init(&res->mbslock, 0, 0);
10720#endif /* if USE_MBSLOCK */
10721
10722	res->magic = RES_MAGIC;
10723
10724	*resp = res;
10725
10726	return (ISC_R_SUCCESS);
10727
10728cleanup_primelock:
10729	isc_mutex_destroy(&res->primelock);
10730	isc_mutex_destroy(&res->lock);
10731
10732	if (res->dispatches6 != NULL) {
10733		dns_dispatchset_destroy(&res->dispatches6);
10734	}
10735	if (res->dispatches4 != NULL) {
10736		dns_dispatchset_destroy(&res->dispatches4);
10737	}
10738
10739	for (i = 0; i < dbuckets_created; i++) {
10740		isc_mutex_destroy(&res->dbuckets[i].lock);
10741		isc_mem_detach(&res->dbuckets[i].mctx);
10742	}
10743	isc_mem_put(view->mctx, res->dbuckets,
10744		    RES_DOMAIN_BUCKETS * sizeof(zonebucket_t));
10745
10746cleanup_buckets:
10747	for (i = 0; i < buckets_created; i++) {
10748		isc_mem_detach(&res->buckets[i].mctx);
10749		isc_mutex_destroy(&res->buckets[i].lock);
10750		isc_task_shutdown(res->buckets[i].task);
10751		isc_task_detach(&res->buckets[i].task);
10752	}
10753	isc_mem_put(view->mctx, res->buckets,
10754		    res->nbuckets * sizeof(fctxbucket_t));
10755
10756	dns_badcache_destroy(&res->badcache);
10757
10758cleanup_res:
10759	isc_mem_put(view->mctx, res, sizeof(*res));
10760
10761	return (result);
10762}
10763
10764static void
10765prime_done(isc_task_t *task, isc_event_t *event) {
10766	dns_resolver_t *res;
10767	dns_fetchevent_t *fevent;
10768	dns_fetch_t *fetch;
10769	dns_db_t *db = NULL;
10770
10771	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
10772	fevent = (dns_fetchevent_t *)event;
10773	res = event->ev_arg;
10774	REQUIRE(VALID_RESOLVER(res));
10775
10776	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
10777		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
10778		      "resolver priming query complete");
10779
10780	UNUSED(task);
10781
10782	LOCK(&res->primelock);
10783	fetch = res->primefetch;
10784	res->primefetch = NULL;
10785	UNLOCK(&res->primelock);
10786
10787	INSIST(atomic_compare_exchange_strong_acq_rel(&res->priming,
10788						      &(bool){ true }, false));
10789
10790	if (fevent->result == ISC_R_SUCCESS && res->view->cache != NULL &&
10791	    res->view->hints != NULL)
10792	{
10793		dns_cache_attachdb(res->view->cache, &db);
10794		dns_root_checkhints(res->view, res->view->hints, db);
10795		dns_db_detach(&db);
10796	}
10797
10798	if (fevent->node != NULL) {
10799		dns_db_detachnode(fevent->db, &fevent->node);
10800	}
10801	if (fevent->db != NULL) {
10802		dns_db_detach(&fevent->db);
10803	}
10804	if (dns_rdataset_isassociated(fevent->rdataset)) {
10805		dns_rdataset_disassociate(fevent->rdataset);
10806	}
10807	INSIST(fevent->sigrdataset == NULL);
10808
10809	isc_mem_put(res->mctx, fevent->rdataset, sizeof(*fevent->rdataset));
10810
10811	isc_event_free(&event);
10812	dns_resolver_destroyfetch(&fetch);
10813}
10814
10815void
10816dns_resolver_prime(dns_resolver_t *res) {
10817	bool want_priming = false;
10818	dns_rdataset_t *rdataset;
10819	isc_result_t result;
10820
10821	REQUIRE(VALID_RESOLVER(res));
10822	REQUIRE(res->frozen);
10823
10824	RTRACE("dns_resolver_prime");
10825
10826	if (!atomic_load_acquire(&res->exiting)) {
10827		want_priming = atomic_compare_exchange_strong_acq_rel(
10828			&res->priming, &(bool){ false }, true);
10829	}
10830
10831	if (want_priming) {
10832		/*
10833		 * To avoid any possible recursive locking problems, we
10834		 * start the priming fetch like any other fetch, and holding
10835		 * no resolver locks.  No one else will try to start it
10836		 * because we're the ones who set res->priming to true.
10837		 * Any other callers of dns_resolver_prime() while we're
10838		 * running will see that res->priming is already true and
10839		 * do nothing.
10840		 */
10841		RTRACE("priming");
10842		rdataset = isc_mem_get(res->mctx, sizeof(*rdataset));
10843		dns_rdataset_init(rdataset);
10844
10845		LOCK(&res->primelock);
10846		INSIST(res->primefetch == NULL);
10847		result = dns_resolver_createfetch(
10848			res, dns_rootname, dns_rdatatype_ns, NULL, NULL, NULL,
10849			NULL, 0, DNS_FETCHOPT_NOFORWARD, 0, NULL,
10850			res->buckets[0].task, prime_done, res, rdataset, NULL,
10851			&res->primefetch);
10852		UNLOCK(&res->primelock);
10853
10854		if (result != ISC_R_SUCCESS) {
10855			isc_mem_put(res->mctx, rdataset, sizeof(*rdataset));
10856			INSIST(atomic_compare_exchange_strong_acq_rel(
10857				&res->priming, &(bool){ true }, false));
10858		}
10859		inc_stats(res, dns_resstatscounter_priming);
10860	}
10861}
10862
10863void
10864dns_resolver_freeze(dns_resolver_t *res) {
10865	/*
10866	 * Freeze resolver.
10867	 */
10868
10869	REQUIRE(VALID_RESOLVER(res));
10870
10871	res->frozen = true;
10872}
10873
10874void
10875dns_resolver_attach(dns_resolver_t *source, dns_resolver_t **targetp) {
10876	REQUIRE(VALID_RESOLVER(source));
10877	REQUIRE(targetp != NULL && *targetp == NULL);
10878
10879	RRTRACE(source, "attach");
10880
10881	LOCK(&source->lock);
10882	REQUIRE(!atomic_load_acquire(&source->exiting));
10883	isc_refcount_increment(&source->references);
10884	UNLOCK(&source->lock);
10885
10886	*targetp = source;
10887}
10888
10889void
10890dns_resolver_whenshutdown(dns_resolver_t *res, isc_task_t *task,
10891			  isc_event_t **eventp) {
10892	isc_task_t *tclone;
10893	isc_event_t *event;
10894
10895	REQUIRE(VALID_RESOLVER(res));
10896	REQUIRE(eventp != NULL);
10897
10898	event = *eventp;
10899	*eventp = NULL;
10900
10901	LOCK(&res->lock);
10902
10903	if (atomic_load_acquire(&res->exiting) && res->activebuckets == 0) {
10904		/*
10905		 * We're already shutdown.  Send the event.
10906		 */
10907		event->ev_sender = res;
10908		isc_task_send(task, &event);
10909	} else {
10910		tclone = NULL;
10911		isc_task_attach(task, &tclone);
10912		event->ev_sender = tclone;
10913		ISC_LIST_APPEND(res->whenshutdown, event, ev_link);
10914	}
10915
10916	UNLOCK(&res->lock);
10917}
10918
10919void
10920dns_resolver_shutdown(dns_resolver_t *res) {
10921	unsigned int i;
10922	fetchctx_t *fctx;
10923	isc_result_t result;
10924	bool is_false = false;
10925
10926	REQUIRE(VALID_RESOLVER(res));
10927
10928	RTRACE("shutdown");
10929
10930	LOCK(&res->lock);
10931	if (atomic_compare_exchange_strong(&res->exiting, &is_false, true)) {
10932		RTRACE("exiting");
10933
10934		for (i = 0; i < res->nbuckets; i++) {
10935			LOCK(&res->buckets[i].lock);
10936			for (fctx = ISC_LIST_HEAD(res->buckets[i].fctxs);
10937			     fctx != NULL; fctx = ISC_LIST_NEXT(fctx, link))
10938			{
10939				fctx_shutdown(fctx);
10940			}
10941			if (res->dispatches4 != NULL && !res->exclusivev4) {
10942				dns_dispatchset_cancelall(res->dispatches4,
10943							  res->buckets[i].task);
10944			}
10945			if (res->dispatches6 != NULL && !res->exclusivev6) {
10946				dns_dispatchset_cancelall(res->dispatches6,
10947							  res->buckets[i].task);
10948			}
10949			atomic_store(&res->buckets[i].exiting, true);
10950			if (ISC_LIST_EMPTY(res->buckets[i].fctxs)) {
10951				INSIST(res->activebuckets > 0);
10952				res->activebuckets--;
10953			}
10954			UNLOCK(&res->buckets[i].lock);
10955		}
10956		if (res->activebuckets == 0) {
10957			send_shutdown_events(res);
10958		}
10959		result = isc_timer_reset(res->spillattimer,
10960					 isc_timertype_inactive, NULL, NULL,
10961					 true);
10962		RUNTIME_CHECK(result == ISC_R_SUCCESS);
10963	}
10964	UNLOCK(&res->lock);
10965}
10966
10967void
10968dns_resolver_detach(dns_resolver_t **resp) {
10969	dns_resolver_t *res;
10970
10971	REQUIRE(resp != NULL);
10972	res = *resp;
10973	*resp = NULL;
10974	REQUIRE(VALID_RESOLVER(res));
10975
10976	RTRACE("detach");
10977
10978	if (isc_refcount_decrement(&res->references) == 1) {
10979		LOCK(&res->lock);
10980		INSIST(atomic_load_acquire(&res->exiting));
10981		INSIST(res->activebuckets == 0);
10982		UNLOCK(&res->lock);
10983		destroy(res);
10984	}
10985}
10986
10987static bool
10988fctx_match(fetchctx_t *fctx, const dns_name_t *name, dns_rdatatype_t type,
10989	   unsigned int options) {
10990	/*
10991	 * Don't match fetch contexts that are shutting down.
10992	 */
10993	if (fctx->cloned || fctx->state == fetchstate_done ||
10994	    ISC_LIST_EMPTY(fctx->events))
10995	{
10996		return (false);
10997	}
10998
10999	if (fctx->type != type || fctx->options != options) {
11000		return (false);
11001	}
11002	return (dns_name_equal(&fctx->name, name));
11003}
11004
11005static void
11006log_fetch(const dns_name_t *name, dns_rdatatype_t type) {
11007	char namebuf[DNS_NAME_FORMATSIZE];
11008	char typebuf[DNS_RDATATYPE_FORMATSIZE];
11009	int level = ISC_LOG_DEBUG(1);
11010
11011	/*
11012	 * If there's no chance of logging it, don't render (format) the
11013	 * name and RDATA type (further below), and return early.
11014	 */
11015	if (!isc_log_wouldlog(dns_lctx, level)) {
11016		return;
11017	}
11018
11019	dns_name_format(name, namebuf, sizeof(namebuf));
11020	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
11021
11022	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
11023		      DNS_LOGMODULE_RESOLVER, level, "fetch: %s/%s", namebuf,
11024		      typebuf);
11025}
11026
11027static isc_result_t
11028fctx_minimize_qname(fetchctx_t *fctx) {
11029	isc_result_t result = ISC_R_SUCCESS;
11030	unsigned int dlabels, nlabels;
11031
11032	REQUIRE(VALID_FCTX(fctx));
11033
11034	dlabels = dns_name_countlabels(&fctx->qmindcname);
11035	nlabels = dns_name_countlabels(&fctx->name);
11036	dns_name_free(&fctx->qminname, fctx->mctx);
11037	dns_name_init(&fctx->qminname, NULL);
11038
11039	if (dlabels > fctx->qmin_labels) {
11040		fctx->qmin_labels = dlabels + 1;
11041	} else {
11042		fctx->qmin_labels++;
11043	}
11044
11045	if (fctx->ip6arpaskip) {
11046		/*
11047		 * For ip6.arpa we want to skip some of the labels, with
11048		 * boundaries at /16, /32, /48, /56, /64 and /128
11049		 * In 'label count' terms that's equal to
11050		 *    7    11   15   17   19      35
11051		 * We fix fctx->qmin_labels to point to the nearest boundary
11052		 */
11053		if (fctx->qmin_labels < 7) {
11054			fctx->qmin_labels = 7;
11055		} else if (fctx->qmin_labels < 11) {
11056			fctx->qmin_labels = 11;
11057		} else if (fctx->qmin_labels < 15) {
11058			fctx->qmin_labels = 15;
11059		} else if (fctx->qmin_labels < 17) {
11060			fctx->qmin_labels = 17;
11061		} else if (fctx->qmin_labels < 19) {
11062			fctx->qmin_labels = 19;
11063		} else if (fctx->qmin_labels < 35) {
11064			fctx->qmin_labels = 35;
11065		} else {
11066			fctx->qmin_labels = nlabels;
11067		}
11068	} else if (fctx->qmin_labels > DNS_QMIN_MAXLABELS) {
11069		fctx->qmin_labels = DNS_MAX_LABELS + 1;
11070	}
11071
11072	if (fctx->qmin_labels < nlabels) {
11073		/*
11074		 * We want to query for qmin_labels from fctx->name
11075		 */
11076		dns_fixedname_t fname;
11077		dns_name_t *name = dns_fixedname_initname(&fname);
11078		dns_name_split(&fctx->name, fctx->qmin_labels, NULL,
11079			       dns_fixedname_name(&fname));
11080		if ((fctx->options & DNS_FETCHOPT_QMIN_USE_A) != 0) {
11081			isc_buffer_t dbuf;
11082			dns_fixedname_t tmpname;
11083			dns_name_t *tname = dns_fixedname_initname(&tmpname);
11084			char ndata[DNS_NAME_MAXWIRE];
11085
11086			isc_buffer_init(&dbuf, ndata, DNS_NAME_MAXWIRE);
11087			dns_fixedname_init(&tmpname);
11088			result = dns_name_concatenate(&underscore_name, name,
11089						      tname, &dbuf);
11090			if (result == ISC_R_SUCCESS) {
11091				dns_name_dup(tname, fctx->mctx,
11092					     &fctx->qminname);
11093			}
11094			fctx->qmintype = dns_rdatatype_a;
11095		} else {
11096			dns_name_dup(dns_fixedname_name(&fname), fctx->mctx,
11097				     &fctx->qminname);
11098			fctx->qmintype = dns_rdatatype_ns;
11099		}
11100		fctx->minimized = true;
11101	} else {
11102		/* Minimization is done, we'll ask for whole qname */
11103		fctx->qmintype = fctx->type;
11104		dns_name_dup(&fctx->name, fctx->mctx, &fctx->qminname);
11105		fctx->minimized = false;
11106	}
11107
11108	char domainbuf[DNS_NAME_FORMATSIZE];
11109	dns_name_format(&fctx->qminname, domainbuf, sizeof(domainbuf));
11110	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
11111		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(5),
11112		      "QNAME minimization - %s minimized, qmintype %d "
11113		      "qminname %s",
11114		      fctx->minimized ? "" : "not", fctx->qmintype, domainbuf);
11115
11116	return (result);
11117}
11118
11119isc_result_t
11120dns_resolver_createfetch(dns_resolver_t *res, const dns_name_t *name,
11121			 dns_rdatatype_t type, const dns_name_t *domain,
11122			 dns_rdataset_t *nameservers,
11123			 dns_forwarders_t *forwarders,
11124			 const isc_sockaddr_t *client, dns_messageid_t id,
11125			 unsigned int options, unsigned int depth,
11126			 isc_counter_t *qc, isc_task_t *task,
11127			 isc_taskaction_t action, void *arg,
11128			 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
11129			 dns_fetch_t **fetchp) {
11130	dns_fetch_t *fetch;
11131	fetchctx_t *fctx = NULL;
11132	isc_result_t result = ISC_R_SUCCESS;
11133	unsigned int bucketnum;
11134	bool new_fctx = false;
11135	isc_event_t *event;
11136	unsigned int count = 0;
11137	unsigned int spillat;
11138	unsigned int spillatmin;
11139	bool dodestroy = false;
11140
11141	UNUSED(forwarders);
11142
11143	REQUIRE(VALID_RESOLVER(res));
11144	REQUIRE(res->frozen);
11145	/* XXXRTH  Check for meta type */
11146	if (domain != NULL) {
11147		REQUIRE(DNS_RDATASET_VALID(nameservers));
11148		REQUIRE(nameservers->type == dns_rdatatype_ns);
11149	} else {
11150		REQUIRE(nameservers == NULL);
11151	}
11152	REQUIRE(forwarders == NULL);
11153	REQUIRE(!dns_rdataset_isassociated(rdataset));
11154	REQUIRE(sigrdataset == NULL || !dns_rdataset_isassociated(sigrdataset));
11155	REQUIRE(fetchp != NULL && *fetchp == NULL);
11156
11157	log_fetch(name, type);
11158
11159	/*
11160	 * XXXRTH  use a mempool?
11161	 */
11162	fetch = isc_mem_get(res->mctx, sizeof(*fetch));
11163	fetch->mctx = NULL;
11164	isc_mem_attach(res->mctx, &fetch->mctx);
11165
11166	bucketnum = dns_name_fullhash(name, false) % res->nbuckets;
11167
11168	LOCK(&res->lock);
11169	spillat = res->spillat;
11170	spillatmin = res->spillatmin;
11171	UNLOCK(&res->lock);
11172	LOCK(&res->buckets[bucketnum].lock);
11173
11174	if (atomic_load(&res->buckets[bucketnum].exiting)) {
11175		result = ISC_R_SHUTTINGDOWN;
11176		goto unlock;
11177	}
11178
11179	if ((options & DNS_FETCHOPT_UNSHARED) == 0) {
11180		for (fctx = ISC_LIST_HEAD(res->buckets[bucketnum].fctxs);
11181		     fctx != NULL; fctx = ISC_LIST_NEXT(fctx, link))
11182		{
11183			if (fctx_match(fctx, name, type, options)) {
11184				break;
11185			}
11186		}
11187	}
11188
11189	/*
11190	 * Is this a duplicate?
11191	 */
11192	if (fctx != NULL && client != NULL) {
11193		dns_fetchevent_t *fevent;
11194		for (fevent = ISC_LIST_HEAD(fctx->events); fevent != NULL;
11195		     fevent = ISC_LIST_NEXT(fevent, ev_link))
11196		{
11197			if (fevent->client != NULL && fevent->id == id &&
11198			    isc_sockaddr_equal(fevent->client, client))
11199			{
11200				result = DNS_R_DUPLICATE;
11201				goto unlock;
11202			}
11203			count++;
11204		}
11205	}
11206	if (count >= spillatmin && spillatmin != 0) {
11207		INSIST(fctx != NULL);
11208		if (count >= spillat) {
11209			fctx->spilled = true;
11210		}
11211		if (fctx->spilled) {
11212			result = DNS_R_DROP;
11213			goto unlock;
11214		}
11215	}
11216
11217	if (fctx == NULL) {
11218		result = fctx_create(res, name, type, domain, nameservers,
11219				     client, id, options, bucketnum, depth, qc,
11220				     &fctx);
11221		if (result != ISC_R_SUCCESS) {
11222			goto unlock;
11223		}
11224		new_fctx = true;
11225	} else if (fctx->depth > depth) {
11226		fctx->depth = depth;
11227	}
11228
11229	result = fctx_join(fctx, task, client, id, action, arg, rdataset,
11230			   sigrdataset, fetch);
11231
11232	if (result == ISC_R_SUCCESS &&
11233	    ((options & DNS_FETCHOPT_TRYSTALE_ONTIMEOUT) != 0))
11234	{
11235		fctx_add_event(fctx, task, client, id, action, arg, fetch,
11236			       DNS_EVENT_TRYSTALE);
11237	}
11238
11239	if (new_fctx) {
11240		if (result == ISC_R_SUCCESS) {
11241			/*
11242			 * Launch this fctx.
11243			 */
11244			event = &fctx->control_event;
11245			ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
11246				       DNS_EVENT_FETCHCONTROL, fctx_start, fctx,
11247				       NULL, NULL, NULL);
11248			isc_task_send(res->buckets[bucketnum].task, &event);
11249		} else {
11250			/*
11251			 * We don't care about the result of fctx_unlink()
11252			 * since we know we're not exiting.
11253			 */
11254			(void)fctx_unlink(fctx);
11255			dodestroy = true;
11256		}
11257	}
11258
11259unlock:
11260	UNLOCK(&res->buckets[bucketnum].lock);
11261
11262	if (dodestroy) {
11263		fctx_destroy(fctx);
11264	}
11265
11266	if (result == ISC_R_SUCCESS) {
11267		FTRACE("created");
11268		*fetchp = fetch;
11269	} else {
11270		isc_mem_putanddetach(&fetch->mctx, fetch, sizeof(*fetch));
11271	}
11272
11273	return (result);
11274}
11275
11276void
11277dns_resolver_cancelfetch(dns_fetch_t *fetch) {
11278	fetchctx_t *fctx;
11279	dns_resolver_t *res;
11280	dns_fetchevent_t *event = NULL;
11281	dns_fetchevent_t *event_trystale = NULL;
11282	dns_fetchevent_t *event_fetchdone = NULL;
11283
11284	REQUIRE(DNS_FETCH_VALID(fetch));
11285	fctx = fetch->private;
11286	REQUIRE(VALID_FCTX(fctx));
11287	res = fctx->res;
11288
11289	FTRACE("cancelfetch");
11290
11291	LOCK(&res->buckets[fctx->bucketnum].lock);
11292
11293	/*
11294	 * Find the events for this fetch (as opposed
11295	 * to those for other fetches that have joined the same
11296	 * fctx) and send them with result = ISC_R_CANCELED.
11297	 */
11298	if (fctx->state != fetchstate_done) {
11299		dns_fetchevent_t *next_event = NULL;
11300		for (event = ISC_LIST_HEAD(fctx->events); event != NULL;
11301		     event = next_event)
11302		{
11303			next_event = ISC_LIST_NEXT(event, ev_link);
11304			if (event->fetch == fetch) {
11305				ISC_LIST_UNLINK(fctx->events, event, ev_link);
11306				switch (event->ev_type) {
11307				case DNS_EVENT_TRYSTALE:
11308					INSIST(event_trystale == NULL);
11309					event_trystale = event;
11310					break;
11311				case DNS_EVENT_FETCHDONE:
11312					INSIST(event_fetchdone == NULL);
11313					event_fetchdone = event;
11314					break;
11315				default:
11316					UNREACHABLE();
11317				}
11318				if (event_trystale != NULL &&
11319				    event_fetchdone != NULL)
11320				{
11321					break;
11322				}
11323			}
11324		}
11325	}
11326	/*
11327	 * The "trystale" event must be sent before the "fetchdone" event,
11328	 * because the latter clears the "recursing" query attribute, which is
11329	 * required by both events (handled by the same callback function).
11330	 */
11331	if (event_trystale != NULL) {
11332		isc_task_t *etask = event_trystale->ev_sender;
11333		event_trystale->ev_sender = fctx;
11334		event_trystale->result = ISC_R_CANCELED;
11335		isc_task_sendanddetach(&etask, ISC_EVENT_PTR(&event_trystale));
11336	}
11337	if (event_fetchdone != NULL) {
11338		isc_task_t *etask = event_fetchdone->ev_sender;
11339		event_fetchdone->ev_sender = fctx;
11340		event_fetchdone->result = ISC_R_CANCELED;
11341		isc_task_sendanddetach(&etask, ISC_EVENT_PTR(&event_fetchdone));
11342	}
11343
11344	/*
11345	 * The fctx continues running even if no fetches remain;
11346	 * the answer is still cached.
11347	 */
11348	UNLOCK(&res->buckets[fctx->bucketnum].lock);
11349}
11350
11351void
11352dns_resolver_destroyfetch(dns_fetch_t **fetchp) {
11353	dns_fetch_t *fetch;
11354	dns_resolver_t *res;
11355	dns_fetchevent_t *event, *next_event;
11356	fetchctx_t *fctx;
11357	unsigned int bucketnum;
11358	bool bucket_empty;
11359
11360	REQUIRE(fetchp != NULL);
11361	fetch = *fetchp;
11362	*fetchp = NULL;
11363	REQUIRE(DNS_FETCH_VALID(fetch));
11364	fctx = fetch->private;
11365	REQUIRE(VALID_FCTX(fctx));
11366	res = fctx->res;
11367
11368	FTRACE("destroyfetch");
11369
11370	bucketnum = fctx->bucketnum;
11371	LOCK(&res->buckets[bucketnum].lock);
11372
11373	/*
11374	 * Sanity check: the caller should have gotten its event before
11375	 * trying to destroy the fetch.
11376	 */
11377	event = NULL;
11378	if (fctx->state != fetchstate_done) {
11379		for (event = ISC_LIST_HEAD(fctx->events); event != NULL;
11380		     event = next_event)
11381		{
11382			next_event = ISC_LIST_NEXT(event, ev_link);
11383			RUNTIME_CHECK(event->fetch != fetch);
11384		}
11385	}
11386
11387	bucket_empty = fctx_decreference(fctx);
11388	UNLOCK(&res->buckets[bucketnum].lock);
11389
11390	isc_mem_putanddetach(&fetch->mctx, fetch, sizeof(*fetch));
11391
11392	if (bucket_empty) {
11393		empty_bucket(res);
11394	}
11395}
11396
11397void
11398dns_resolver_logfetch(dns_fetch_t *fetch, isc_log_t *lctx,
11399		      isc_logcategory_t *category, isc_logmodule_t *module,
11400		      int level, bool duplicateok) {
11401	fetchctx_t *fctx;
11402	dns_resolver_t *res;
11403	char domainbuf[DNS_NAME_FORMATSIZE];
11404
11405	REQUIRE(DNS_FETCH_VALID(fetch));
11406	fctx = fetch->private;
11407	REQUIRE(VALID_FCTX(fctx));
11408	res = fctx->res;
11409
11410	LOCK(&res->buckets[fctx->bucketnum].lock);
11411
11412	INSIST(fctx->exitline >= 0);
11413	if (!fctx->logged || duplicateok) {
11414		dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf));
11415		isc_log_write(lctx, category, module, level,
11416			      "fetch completed at %s:%d for %s in "
11417			      "%" PRIu64 "."
11418			      "%06" PRIu64 ": %s/%s "
11419			      "[domain:%s,referral:%u,restart:%u,qrysent:%u,"
11420			      "timeout:%u,lame:%u,quota:%u,neterr:%u,"
11421			      "badresp:%u,adberr:%u,findfail:%u,valfail:%u]",
11422			      __FILE__, fctx->exitline, fctx->info,
11423			      fctx->duration / US_PER_SEC,
11424			      fctx->duration % US_PER_SEC,
11425			      isc_result_totext(fctx->result),
11426			      isc_result_totext(fctx->vresult), domainbuf,
11427			      fctx->referrals, fctx->restarts, fctx->querysent,
11428			      fctx->timeouts, fctx->lamecount, fctx->quotacount,
11429			      fctx->neterr, fctx->badresp, fctx->adberr,
11430			      fctx->findfail, fctx->valfail);
11431		fctx->logged = true;
11432	}
11433
11434	UNLOCK(&res->buckets[fctx->bucketnum].lock);
11435}
11436
11437dns_dispatchmgr_t *
11438dns_resolver_dispatchmgr(dns_resolver_t *resolver) {
11439	REQUIRE(VALID_RESOLVER(resolver));
11440	return (resolver->dispatchmgr);
11441}
11442
11443dns_dispatch_t *
11444dns_resolver_dispatchv4(dns_resolver_t *resolver) {
11445	REQUIRE(VALID_RESOLVER(resolver));
11446	return (dns_dispatchset_get(resolver->dispatches4));
11447}
11448
11449dns_dispatch_t *
11450dns_resolver_dispatchv6(dns_resolver_t *resolver) {
11451	REQUIRE(VALID_RESOLVER(resolver));
11452	return (dns_dispatchset_get(resolver->dispatches6));
11453}
11454
11455isc_socketmgr_t *
11456dns_resolver_socketmgr(dns_resolver_t *resolver) {
11457	REQUIRE(VALID_RESOLVER(resolver));
11458	return (resolver->socketmgr);
11459}
11460
11461isc_taskmgr_t *
11462dns_resolver_taskmgr(dns_resolver_t *resolver) {
11463	REQUIRE(VALID_RESOLVER(resolver));
11464	return (resolver->taskmgr);
11465}
11466
11467uint32_t
11468dns_resolver_getlamettl(dns_resolver_t *resolver) {
11469	REQUIRE(VALID_RESOLVER(resolver));
11470	return (resolver->lame_ttl);
11471}
11472
11473void
11474dns_resolver_setlamettl(dns_resolver_t *resolver, uint32_t lame_ttl) {
11475	REQUIRE(VALID_RESOLVER(resolver));
11476	resolver->lame_ttl = lame_ttl;
11477}
11478
11479isc_result_t
11480dns_resolver_addalternate(dns_resolver_t *resolver, const isc_sockaddr_t *alt,
11481			  const dns_name_t *name, in_port_t port) {
11482	alternate_t *a;
11483
11484	REQUIRE(VALID_RESOLVER(resolver));
11485	REQUIRE(!resolver->frozen);
11486	REQUIRE((alt == NULL) ^ (name == NULL));
11487
11488	a = isc_mem_get(resolver->mctx, sizeof(*a));
11489	if (alt != NULL) {
11490		a->isaddress = true;
11491		a->_u.addr = *alt;
11492	} else {
11493		a->isaddress = false;
11494		a->_u._n.port = port;
11495		dns_name_init(&a->_u._n.name, NULL);
11496		dns_name_dup(name, resolver->mctx, &a->_u._n.name);
11497	}
11498	ISC_LINK_INIT(a, link);
11499	ISC_LIST_APPEND(resolver->alternates, a, link);
11500
11501	return (ISC_R_SUCCESS);
11502}
11503
11504void
11505dns_resolver_setudpsize(dns_resolver_t *resolver, uint16_t udpsize) {
11506	REQUIRE(VALID_RESOLVER(resolver));
11507	resolver->udpsize = udpsize;
11508}
11509
11510uint16_t
11511dns_resolver_getudpsize(dns_resolver_t *resolver) {
11512	REQUIRE(VALID_RESOLVER(resolver));
11513	return (resolver->udpsize);
11514}
11515
11516void
11517dns_resolver_flushbadcache(dns_resolver_t *resolver, const dns_name_t *name) {
11518	if (name != NULL) {
11519		dns_badcache_flushname(resolver->badcache, name);
11520	} else {
11521		dns_badcache_flush(resolver->badcache);
11522	}
11523}
11524
11525void
11526dns_resolver_flushbadnames(dns_resolver_t *resolver, const dns_name_t *name) {
11527	dns_badcache_flushtree(resolver->badcache, name);
11528}
11529
11530void
11531dns_resolver_addbadcache(dns_resolver_t *resolver, const dns_name_t *name,
11532			 dns_rdatatype_t type, isc_time_t *expire) {
11533#ifdef ENABLE_AFL
11534	if (!dns_fuzzing_resolver)
11535#endif /* ifdef ENABLE_AFL */
11536	{
11537		dns_badcache_add(resolver->badcache, name, type, false, 0,
11538				 expire);
11539	}
11540}
11541
11542bool
11543dns_resolver_getbadcache(dns_resolver_t *resolver, const dns_name_t *name,
11544			 dns_rdatatype_t type, isc_time_t *now) {
11545	return (dns_badcache_find(resolver->badcache, name, type, NULL, now));
11546}
11547
11548void
11549dns_resolver_printbadcache(dns_resolver_t *resolver, FILE *fp) {
11550	(void)dns_badcache_print(resolver->badcache, "Bad cache", fp);
11551}
11552
11553static void
11554free_algorithm(void *node, void *arg) {
11555	unsigned char *algorithms = node;
11556	isc_mem_t *mctx = arg;
11557
11558	isc_mem_put(mctx, algorithms, *algorithms);
11559}
11560
11561void
11562dns_resolver_reset_algorithms(dns_resolver_t *resolver) {
11563	REQUIRE(VALID_RESOLVER(resolver));
11564
11565#if USE_ALGLOCK
11566	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
11567#endif /* if USE_ALGLOCK */
11568	if (resolver->algorithms != NULL) {
11569		dns_rbt_destroy(&resolver->algorithms);
11570	}
11571#if USE_ALGLOCK
11572	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
11573#endif /* if USE_ALGLOCK */
11574}
11575
11576isc_result_t
11577dns_resolver_disable_algorithm(dns_resolver_t *resolver, const dns_name_t *name,
11578			       unsigned int alg) {
11579	unsigned int len, mask;
11580	unsigned char *tmp;
11581	unsigned char *algorithms;
11582	isc_result_t result;
11583	dns_rbtnode_t *node = NULL;
11584
11585	/*
11586	 * Whether an algorithm is disabled (or not) is stored in a
11587	 * per-name bitfield that is stored as the node data of an
11588	 * RBT.
11589	 */
11590
11591	REQUIRE(VALID_RESOLVER(resolver));
11592	if (alg > 255) {
11593		return (ISC_R_RANGE);
11594	}
11595
11596#if USE_ALGLOCK
11597	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
11598#endif /* if USE_ALGLOCK */
11599	if (resolver->algorithms == NULL) {
11600		result = dns_rbt_create(resolver->mctx, free_algorithm,
11601					resolver->mctx, &resolver->algorithms);
11602		if (result != ISC_R_SUCCESS) {
11603			goto cleanup;
11604		}
11605	}
11606
11607	len = alg / 8 + 2;
11608	mask = 1 << (alg % 8);
11609
11610	result = dns_rbt_addnode(resolver->algorithms, name, &node);
11611
11612	if (result == ISC_R_SUCCESS || result == ISC_R_EXISTS) {
11613		algorithms = node->data;
11614		/*
11615		 * If algorithms is set, algorithms[0] contains its
11616		 * length.
11617		 */
11618		if (algorithms == NULL || len > *algorithms) {
11619			/*
11620			 * If no bitfield exists in the node data, or if
11621			 * it is not long enough, allocate a new
11622			 * bitfield and copy the old (smaller) bitfield
11623			 * into it if one exists.
11624			 */
11625			tmp = isc_mem_get(resolver->mctx, len);
11626			memset(tmp, 0, len);
11627			if (algorithms != NULL) {
11628				memmove(tmp, algorithms, *algorithms);
11629			}
11630			tmp[len - 1] |= mask;
11631			/* 'tmp[0]' should contain the length of 'tmp'. */
11632			*tmp = len;
11633			node->data = tmp;
11634			/* Free the older bitfield. */
11635			if (algorithms != NULL) {
11636				isc_mem_put(resolver->mctx, algorithms,
11637					    *algorithms);
11638			}
11639		} else {
11640			algorithms[len - 1] |= mask;
11641		}
11642	}
11643	result = ISC_R_SUCCESS;
11644cleanup:
11645#if USE_ALGLOCK
11646	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
11647#endif /* if USE_ALGLOCK */
11648	return (result);
11649}
11650
11651bool
11652dns_resolver_algorithm_supported(dns_resolver_t *resolver,
11653				 const dns_name_t *name, unsigned int alg) {
11654	unsigned int len, mask;
11655	unsigned char *algorithms;
11656	void *data = NULL;
11657	isc_result_t result;
11658	bool found = false;
11659
11660	REQUIRE(VALID_RESOLVER(resolver));
11661
11662	/*
11663	 * DH is unsupported for DNSKEYs, see RFC 4034 sec. A.1.
11664	 */
11665	if ((alg == DST_ALG_DH) || (alg == DST_ALG_INDIRECT)) {
11666		return (false);
11667	}
11668
11669#if USE_ALGLOCK
11670	RWLOCK(&resolver->alglock, isc_rwlocktype_read);
11671#endif /* if USE_ALGLOCK */
11672	if (resolver->algorithms == NULL) {
11673		goto unlock;
11674	}
11675	result = dns_rbt_findname(resolver->algorithms, name, 0, NULL, &data);
11676	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
11677		len = alg / 8 + 2;
11678		mask = 1 << (alg % 8);
11679		algorithms = data;
11680		if (len <= *algorithms && (algorithms[len - 1] & mask) != 0) {
11681			found = true;
11682		}
11683	}
11684unlock:
11685#if USE_ALGLOCK
11686	RWUNLOCK(&resolver->alglock, isc_rwlocktype_read);
11687#endif /* if USE_ALGLOCK */
11688	if (found) {
11689		return (false);
11690	}
11691
11692	return (dst_algorithm_supported(alg));
11693}
11694
11695static void
11696free_digest(void *node, void *arg) {
11697	unsigned char *digests = node;
11698	isc_mem_t *mctx = arg;
11699
11700	isc_mem_put(mctx, digests, *digests);
11701}
11702
11703void
11704dns_resolver_reset_ds_digests(dns_resolver_t *resolver) {
11705	REQUIRE(VALID_RESOLVER(resolver));
11706
11707#if USE_ALGLOCK
11708	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
11709#endif /* if USE_ALGLOCK */
11710	if (resolver->digests != NULL) {
11711		dns_rbt_destroy(&resolver->digests);
11712	}
11713#if USE_ALGLOCK
11714	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
11715#endif /* if USE_ALGLOCK */
11716}
11717
11718isc_result_t
11719dns_resolver_disable_ds_digest(dns_resolver_t *resolver, const dns_name_t *name,
11720			       unsigned int digest_type) {
11721	unsigned int len, mask;
11722	unsigned char *tmp;
11723	unsigned char *digests;
11724	isc_result_t result;
11725	dns_rbtnode_t *node = NULL;
11726
11727	/*
11728	 * Whether a digest is disabled (or not) is stored in a per-name
11729	 * bitfield that is stored as the node data of an RBT.
11730	 */
11731
11732	REQUIRE(VALID_RESOLVER(resolver));
11733	if (digest_type > 255) {
11734		return (ISC_R_RANGE);
11735	}
11736
11737#if USE_ALGLOCK
11738	RWLOCK(&resolver->alglock, isc_rwlocktype_write);
11739#endif /* if USE_ALGLOCK */
11740	if (resolver->digests == NULL) {
11741		result = dns_rbt_create(resolver->mctx, free_digest,
11742					resolver->mctx, &resolver->digests);
11743		if (result != ISC_R_SUCCESS) {
11744			goto cleanup;
11745		}
11746	}
11747
11748	len = digest_type / 8 + 2;
11749	mask = 1 << (digest_type % 8);
11750
11751	result = dns_rbt_addnode(resolver->digests, name, &node);
11752
11753	if (result == ISC_R_SUCCESS || result == ISC_R_EXISTS) {
11754		digests = node->data;
11755		/* If digests is set, digests[0] contains its length. */
11756		if (digests == NULL || len > *digests) {
11757			/*
11758			 * If no bitfield exists in the node data, or if
11759			 * it is not long enough, allocate a new
11760			 * bitfield and copy the old (smaller) bitfield
11761			 * into it if one exists.
11762			 */
11763			tmp = isc_mem_get(resolver->mctx, len);
11764			memset(tmp, 0, len);
11765			if (digests != NULL) {
11766				memmove(tmp, digests, *digests);
11767			}
11768			tmp[len - 1] |= mask;
11769			/* tmp[0] should contain the length of 'tmp'. */
11770			*tmp = len;
11771			node->data = tmp;
11772			/* Free the older bitfield. */
11773			if (digests != NULL) {
11774				isc_mem_put(resolver->mctx, digests, *digests);
11775			}
11776		} else {
11777			digests[len - 1] |= mask;
11778		}
11779	}
11780	result = ISC_R_SUCCESS;
11781cleanup:
11782#if USE_ALGLOCK
11783	RWUNLOCK(&resolver->alglock, isc_rwlocktype_write);
11784#endif /* if USE_ALGLOCK */
11785	return (result);
11786}
11787
11788bool
11789dns_resolver_ds_digest_supported(dns_resolver_t *resolver,
11790				 const dns_name_t *name,
11791				 unsigned int digest_type) {
11792	unsigned int len, mask;
11793	unsigned char *digests;
11794	void *data = NULL;
11795	isc_result_t result;
11796	bool found = false;
11797
11798	REQUIRE(VALID_RESOLVER(resolver));
11799
11800#if USE_ALGLOCK
11801	RWLOCK(&resolver->alglock, isc_rwlocktype_read);
11802#endif /* if USE_ALGLOCK */
11803	if (resolver->digests == NULL) {
11804		goto unlock;
11805	}
11806	result = dns_rbt_findname(resolver->digests, name, 0, NULL, &data);
11807	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
11808		len = digest_type / 8 + 2;
11809		mask = 1 << (digest_type % 8);
11810		digests = data;
11811		if (len <= *digests && (digests[len - 1] & mask) != 0) {
11812			found = true;
11813		}
11814	}
11815unlock:
11816#if USE_ALGLOCK
11817	RWUNLOCK(&resolver->alglock, isc_rwlocktype_read);
11818#endif /* if USE_ALGLOCK */
11819	if (found) {
11820		return (false);
11821	}
11822	return (dst_ds_digest_supported(digest_type));
11823}
11824
11825void
11826dns_resolver_resetmustbesecure(dns_resolver_t *resolver) {
11827	REQUIRE(VALID_RESOLVER(resolver));
11828
11829#if USE_MBSLOCK
11830	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
11831#endif /* if USE_MBSLOCK */
11832	if (resolver->mustbesecure != NULL) {
11833		dns_rbt_destroy(&resolver->mustbesecure);
11834	}
11835#if USE_MBSLOCK
11836	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
11837#endif /* if USE_MBSLOCK */
11838}
11839
11840static bool yes = true, no = false;
11841
11842isc_result_t
11843dns_resolver_setmustbesecure(dns_resolver_t *resolver, const dns_name_t *name,
11844			     bool value) {
11845	isc_result_t result;
11846
11847	REQUIRE(VALID_RESOLVER(resolver));
11848
11849#if USE_MBSLOCK
11850	RWLOCK(&resolver->mbslock, isc_rwlocktype_write);
11851#endif /* if USE_MBSLOCK */
11852	if (resolver->mustbesecure == NULL) {
11853		result = dns_rbt_create(resolver->mctx, NULL, NULL,
11854					&resolver->mustbesecure);
11855		if (result != ISC_R_SUCCESS) {
11856			goto cleanup;
11857		}
11858	}
11859	result = dns_rbt_addname(resolver->mustbesecure, name,
11860				 value ? &yes : &no);
11861cleanup:
11862#if USE_MBSLOCK
11863	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_write);
11864#endif /* if USE_MBSLOCK */
11865	return (result);
11866}
11867
11868bool
11869dns_resolver_getmustbesecure(dns_resolver_t *resolver, const dns_name_t *name) {
11870	void *data = NULL;
11871	bool value = false;
11872	isc_result_t result;
11873
11874	REQUIRE(VALID_RESOLVER(resolver));
11875
11876#if USE_MBSLOCK
11877	RWLOCK(&resolver->mbslock, isc_rwlocktype_read);
11878#endif /* if USE_MBSLOCK */
11879	if (resolver->mustbesecure == NULL) {
11880		goto unlock;
11881	}
11882	result = dns_rbt_findname(resolver->mustbesecure, name, 0, NULL, &data);
11883	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
11884		value = *(bool *)data;
11885	}
11886unlock:
11887#if USE_MBSLOCK
11888	RWUNLOCK(&resolver->mbslock, isc_rwlocktype_read);
11889#endif /* if USE_MBSLOCK */
11890	return (value);
11891}
11892
11893void
11894dns_resolver_getclientsperquery(dns_resolver_t *resolver, uint32_t *cur,
11895				uint32_t *min, uint32_t *max) {
11896	REQUIRE(VALID_RESOLVER(resolver));
11897
11898	LOCK(&resolver->lock);
11899	if (cur != NULL) {
11900		*cur = resolver->spillat;
11901	}
11902	if (min != NULL) {
11903		*min = resolver->spillatmin;
11904	}
11905	if (max != NULL) {
11906		*max = resolver->spillatmax;
11907	}
11908	UNLOCK(&resolver->lock);
11909}
11910
11911void
11912dns_resolver_setclientsperquery(dns_resolver_t *resolver, uint32_t min,
11913				uint32_t max) {
11914	REQUIRE(VALID_RESOLVER(resolver));
11915
11916	LOCK(&resolver->lock);
11917	resolver->spillatmin = resolver->spillat = min;
11918	resolver->spillatmax = max;
11919	UNLOCK(&resolver->lock);
11920}
11921
11922void
11923dns_resolver_setfetchesperzone(dns_resolver_t *resolver, uint32_t clients) {
11924	REQUIRE(VALID_RESOLVER(resolver));
11925
11926	atomic_store_release(&resolver->zspill, clients);
11927}
11928
11929bool
11930dns_resolver_getzeronosoattl(dns_resolver_t *resolver) {
11931	REQUIRE(VALID_RESOLVER(resolver));
11932
11933	return (resolver->zero_no_soa_ttl);
11934}
11935
11936void
11937dns_resolver_setzeronosoattl(dns_resolver_t *resolver, bool state) {
11938	REQUIRE(VALID_RESOLVER(resolver));
11939
11940	resolver->zero_no_soa_ttl = state;
11941}
11942
11943unsigned int
11944dns_resolver_getoptions(dns_resolver_t *resolver) {
11945	REQUIRE(VALID_RESOLVER(resolver));
11946
11947	return (resolver->options);
11948}
11949
11950unsigned int
11951dns_resolver_gettimeout(dns_resolver_t *resolver) {
11952	REQUIRE(VALID_RESOLVER(resolver));
11953
11954	return (resolver->query_timeout);
11955}
11956
11957void
11958dns_resolver_settimeout(dns_resolver_t *resolver, unsigned int timeout) {
11959	REQUIRE(VALID_RESOLVER(resolver));
11960
11961	if (timeout <= 300) {
11962		timeout *= 1000;
11963	}
11964
11965	if (timeout == 0) {
11966		timeout = DEFAULT_QUERY_TIMEOUT;
11967	}
11968	if (timeout > MAXIMUM_QUERY_TIMEOUT) {
11969		timeout = MAXIMUM_QUERY_TIMEOUT;
11970	}
11971	if (timeout < MINIMUM_QUERY_TIMEOUT) {
11972		timeout = MINIMUM_QUERY_TIMEOUT;
11973	}
11974
11975	resolver->query_timeout = timeout;
11976}
11977
11978void
11979dns_resolver_setquerydscp4(dns_resolver_t *resolver, isc_dscp_t dscp) {
11980	REQUIRE(VALID_RESOLVER(resolver));
11981
11982	resolver->querydscp4 = dscp;
11983}
11984
11985isc_dscp_t
11986dns_resolver_getquerydscp4(dns_resolver_t *resolver) {
11987	REQUIRE(VALID_RESOLVER(resolver));
11988	return (resolver->querydscp4);
11989}
11990
11991void
11992dns_resolver_setquerydscp6(dns_resolver_t *resolver, isc_dscp_t dscp) {
11993	REQUIRE(VALID_RESOLVER(resolver));
11994
11995	resolver->querydscp6 = dscp;
11996}
11997
11998isc_dscp_t
11999dns_resolver_getquerydscp6(dns_resolver_t *resolver) {
12000	REQUIRE(VALID_RESOLVER(resolver));
12001	return (resolver->querydscp6);
12002}
12003
12004void
12005dns_resolver_setmaxdepth(dns_resolver_t *resolver, unsigned int maxdepth) {
12006	REQUIRE(VALID_RESOLVER(resolver));
12007	resolver->maxdepth = maxdepth;
12008}
12009
12010unsigned int
12011dns_resolver_getmaxdepth(dns_resolver_t *resolver) {
12012	REQUIRE(VALID_RESOLVER(resolver));
12013	return (resolver->maxdepth);
12014}
12015
12016void
12017dns_resolver_setmaxqueries(dns_resolver_t *resolver, unsigned int queries) {
12018	REQUIRE(VALID_RESOLVER(resolver));
12019	resolver->maxqueries = queries;
12020}
12021
12022unsigned int
12023dns_resolver_getmaxqueries(dns_resolver_t *resolver) {
12024	REQUIRE(VALID_RESOLVER(resolver));
12025	return (resolver->maxqueries);
12026}
12027
12028void
12029dns_resolver_dumpfetches(dns_resolver_t *resolver, isc_statsformat_t format,
12030			 FILE *fp) {
12031	int i;
12032
12033	REQUIRE(VALID_RESOLVER(resolver));
12034	REQUIRE(fp != NULL);
12035	REQUIRE(format == isc_statsformat_file);
12036
12037	for (i = 0; i < RES_DOMAIN_BUCKETS; i++) {
12038		fctxcount_t *fc;
12039		LOCK(&resolver->dbuckets[i].lock);
12040		for (fc = ISC_LIST_HEAD(resolver->dbuckets[i].list); fc != NULL;
12041		     fc = ISC_LIST_NEXT(fc, link))
12042		{
12043			dns_name_print(fc->domain, fp);
12044			fprintf(fp, ": %u active (%u spilled, %u allowed)\n",
12045				fc->count, fc->dropped, fc->allowed);
12046		}
12047		UNLOCK(&resolver->dbuckets[i].lock);
12048	}
12049}
12050
12051void
12052dns_resolver_setquotaresponse(dns_resolver_t *resolver, dns_quotatype_t which,
12053			      isc_result_t resp) {
12054	REQUIRE(VALID_RESOLVER(resolver));
12055	REQUIRE(which == dns_quotatype_zone || which == dns_quotatype_server);
12056	REQUIRE(resp == DNS_R_DROP || resp == DNS_R_SERVFAIL);
12057
12058	resolver->quotaresp[which] = resp;
12059}
12060
12061isc_result_t
12062dns_resolver_getquotaresponse(dns_resolver_t *resolver, dns_quotatype_t which) {
12063	REQUIRE(VALID_RESOLVER(resolver));
12064	REQUIRE(which == dns_quotatype_zone || which == dns_quotatype_server);
12065
12066	return (resolver->quotaresp[which]);
12067}
12068
12069unsigned int
12070dns_resolver_getretryinterval(dns_resolver_t *resolver) {
12071	REQUIRE(VALID_RESOLVER(resolver));
12072
12073	return (resolver->retryinterval);
12074}
12075
12076void
12077dns_resolver_setretryinterval(dns_resolver_t *resolver, unsigned int interval) {
12078	REQUIRE(VALID_RESOLVER(resolver));
12079	REQUIRE(interval > 0);
12080
12081	resolver->retryinterval = ISC_MIN(interval, 2000);
12082}
12083
12084unsigned int
12085dns_resolver_getnonbackofftries(dns_resolver_t *resolver) {
12086	REQUIRE(VALID_RESOLVER(resolver));
12087
12088	return (resolver->nonbackofftries);
12089}
12090
12091void
12092dns_resolver_setnonbackofftries(dns_resolver_t *resolver, unsigned int tries) {
12093	REQUIRE(VALID_RESOLVER(resolver));
12094	REQUIRE(tries > 0);
12095
12096	resolver->nonbackofftries = tries;
12097}
12098