1/*	$NetBSD: resolver.c,v 1.19 2024/02/21 22:52:08 christos Exp $	*/
2
3/*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16/*! \file */
17
18#include <ctype.h>
19#include <inttypes.h>
20#include <stdbool.h>
21
22#include <isc/atomic.h>
23#include <isc/counter.h>
24#include <isc/log.h>
25#include <isc/print.h>
26#include <isc/random.h>
27#include <isc/refcount.h>
28#include <isc/result.h>
29#include <isc/siphash.h>
30#include <isc/stats.h>
31#include <isc/string.h>
32#include <isc/task.h>
33#include <isc/time.h>
34#include <isc/timer.h>
35#include <isc/util.h>
36
37#include <dns/acl.h>
38#include <dns/adb.h>
39#include <dns/badcache.h>
40#include <dns/cache.h>
41#include <dns/db.h>
42#include <dns/dispatch.h>
43#include <dns/dnstap.h>
44#include <dns/ds.h>
45#include <dns/edns.h>
46#include <dns/events.h>
47#include <dns/forward.h>
48#include <dns/keytable.h>
49#include <dns/log.h>
50#include <dns/message.h>
51#include <dns/ncache.h>
52#include <dns/nsec.h>
53#include <dns/nsec3.h>
54#include <dns/opcode.h>
55#include <dns/peer.h>
56#include <dns/rbt.h>
57#include <dns/rcode.h>
58#include <dns/rdata.h>
59#include <dns/rdataclass.h>
60#include <dns/rdatalist.h>
61#include <dns/rdataset.h>
62#include <dns/rdatastruct.h>
63#include <dns/rdatatype.h>
64#include <dns/resolver.h>
65#include <dns/rootns.h>
66#include <dns/stats.h>
67#include <dns/tsig.h>
68#include <dns/validator.h>
69#include <dns/zone.h>
70
71/* Detailed logging of fctx attach/detach */
72#ifndef FCTX_TRACE
73#undef FCTX_TRACE
74#endif
75
76#ifdef WANT_QUERYTRACE
77#define RTRACE(m)                                                             \
78	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                     \
79		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), "res %p: %s", \
80		      res, (m))
81#define RRTRACE(r, m)                                                         \
82	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                     \
83		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), "res %p: %s", \
84		      (r), (m))
85#define FCTXTRACE(m)                                            \
86	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,       \
87		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), \
88		      "fctx %p(%s): %s", fctx, fctx->info, (m))
89#define FCTXTRACE2(m1, m2)                                      \
90	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,       \
91		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), \
92		      "fctx %p(%s): %s %s", fctx, fctx->info, (m1), (m2))
93#define FCTXTRACE3(m, res)                                              \
94	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,               \
95		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),         \
96		      "fctx %p(%s): [result: %s] %s", fctx, fctx->info, \
97		      isc_result_totext(res), (m))
98#define FCTXTRACE4(m1, m2, res)                                            \
99	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                  \
100		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),            \
101		      "fctx %p(%s): [result: %s] %s %s", fctx, fctx->info, \
102		      isc_result_totext(res), (m1), (m2))
103#define FCTXTRACE5(m1, m2, v)                                               \
104	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                   \
105		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),             \
106		      "fctx %p(%s): %s %s%u", fctx, fctx->info, (m1), (m2), \
107		      (v))
108#define FTRACE(m)                                                          \
109	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                  \
110		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),            \
111		      "fetch %p (fctx %p(%s)): %s", fetch, fetch->private, \
112		      fetch->private->info, (m))
113#define QTRACE(m)                                                          \
114	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,                  \
115		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),            \
116		      "resquery %p (fctx %p(%s)): %s", query, query->fctx, \
117		      query->fctx->info, (m))
118#else /* ifdef WANT_QUERYTRACE */
119#define RTRACE(m)          \
120	do {               \
121		UNUSED(m); \
122	} while (0)
123#define RRTRACE(r, m)      \
124	do {               \
125		UNUSED(r); \
126		UNUSED(m); \
127	} while (0)
128#define FCTXTRACE(m)          \
129	do {                  \
130		UNUSED(fctx); \
131		UNUSED(m);    \
132	} while (0)
133#define FCTXTRACE2(m1, m2)    \
134	do {                  \
135		UNUSED(fctx); \
136		UNUSED(m1);   \
137		UNUSED(m2);   \
138	} while (0)
139#define FCTXTRACE3(m1, res)   \
140	do {                  \
141		UNUSED(fctx); \
142		UNUSED(m1);   \
143		UNUSED(res);  \
144	} while (0)
145#define FCTXTRACE4(m1, m2, res) \
146	do {                    \
147		UNUSED(fctx);   \
148		UNUSED(m1);     \
149		UNUSED(m2);     \
150		UNUSED(res);    \
151	} while (0)
152#define FCTXTRACE5(m1, m2, v) \
153	do {                  \
154		UNUSED(fctx); \
155		UNUSED(m1);   \
156		UNUSED(m2);   \
157		UNUSED(v);    \
158	} while (0)
159#define FTRACE(m)          \
160	do {               \
161		UNUSED(m); \
162	} while (0)
163#define QTRACE(m)          \
164	do {               \
165		UNUSED(m); \
166	} while (0)
167#endif /* WANT_QUERYTRACE */
168
169/*
170 * Add or remove an extra fctx reference without setting or clearing
171 * the pointer.
172 */
173#define fctx_addref(f) fctx_attach((f), &(fetchctx_t *){ NULL })
174#define fctx_unref(f)  fctx_detach(&(fetchctx_t *){ (f) })
175
176/*
177 * The maximum time we will wait for a single query.
178 */
179#define MAX_SINGLE_QUERY_TIMEOUT    9000U
180#define MAX_SINGLE_QUERY_TIMEOUT_US (MAX_SINGLE_QUERY_TIMEOUT * US_PER_MS)
181
182/*
183 * We need to allow a individual query time to complete / timeout.
184 */
185#define MINIMUM_QUERY_TIMEOUT (MAX_SINGLE_QUERY_TIMEOUT + 1000U)
186
187/* The default time in seconds for the whole query to live. */
188#ifndef DEFAULT_QUERY_TIMEOUT
189#define DEFAULT_QUERY_TIMEOUT MINIMUM_QUERY_TIMEOUT
190#endif /* ifndef DEFAULT_QUERY_TIMEOUT */
191
192/* The maximum time in seconds for the whole query to live. */
193#ifndef MAXIMUM_QUERY_TIMEOUT
194#define MAXIMUM_QUERY_TIMEOUT 30000
195#endif /* ifndef MAXIMUM_QUERY_TIMEOUT */
196
197/* The default maximum number of recursions to follow before giving up. */
198#ifndef DEFAULT_RECURSION_DEPTH
199#define DEFAULT_RECURSION_DEPTH 7
200#endif /* ifndef DEFAULT_RECURSION_DEPTH */
201
202/* The default maximum number of iterative queries to allow before giving up. */
203#ifndef DEFAULT_MAX_QUERIES
204#define DEFAULT_MAX_QUERIES 100
205#endif /* ifndef DEFAULT_MAX_QUERIES */
206
207/*
208 * After NS_FAIL_LIMIT attempts to fetch a name server address,
209 * if the number of addresses in the NS RRset exceeds NS_RR_LIMIT,
210 * stop trying to fetch, in order to avoid wasting resources.
211 */
212#define NS_FAIL_LIMIT 4
213#define NS_RR_LIMIT   5
214/*
215 * IP address lookups are performed for at most NS_PROCESSING_LIMIT NS RRs in
216 * any NS RRset encountered, to avoid excessive resource use while processing
217 * large delegations.
218 */
219#define NS_PROCESSING_LIMIT 20
220
221STATIC_ASSERT(NS_PROCESSING_LIMIT > NS_RR_LIMIT,
222	      "The maximum number of NS RRs processed for each delegation "
223	      "(NS_PROCESSING_LIMIT) must be larger than the large delegation "
224	      "threshold (NS_RR_LIMIT).");
225
226/* Hash table for zone counters */
227#ifndef RES_DOMAIN_HASH_BITS
228#define RES_DOMAIN_HASH_BITS 12
229#endif /* ifndef RES_DOMAIN_HASH_BITS */
230#define RES_NOBUCKET 0xffffffff
231
232#define GOLDEN_RATIO_32 0x61C88647
233
234#define HASHSIZE(bits) (UINT64_C(1) << (bits))
235
236#define RES_DOMAIN_MAX_BITS   32
237#define RES_DOMAIN_OVERCOMMIT 3
238
239#define RES_DOMAIN_NEXTTABLE(hindex) ((hindex == 0) ? 1 : 0)
240
241static uint32_t
242hash_32(uint32_t val, unsigned int bits) {
243	REQUIRE(bits <= RES_DOMAIN_MAX_BITS);
244	/* High bits are more random. */
245	return (val * GOLDEN_RATIO_32 >> (32 - bits));
246}
247
248/*%
249 * Maximum EDNS0 input packet size.
250 */
251#define RECV_BUFFER_SIZE 4096 /* XXXRTH  Constant. */
252
253/*%
254 * Default EDNS0 buffer size
255 */
256#define DEFAULT_EDNS_BUFSIZE 1232
257
258/*%
259 * This defines the maximum number of timeouts we will permit before we
260 * disable EDNS0 on the query.
261 */
262#define MAX_EDNS0_TIMEOUTS 3
263
264#define DNS_RESOLVER_BADCACHESIZE 1021
265#define DNS_RESOLVER_BADCACHETTL(fctx) \
266	(((fctx)->res->lame_ttl > 30) ? (fctx)->res->lame_ttl : 30)
267
268typedef struct fetchctx fetchctx_t;
269
270typedef struct query {
271	/* Locked by task event serialization. */
272	unsigned int magic;
273	isc_refcount_t references;
274	fetchctx_t *fctx;
275	dns_message_t *rmessage;
276	isc_mem_t *mctx;
277	dns_dispatchmgr_t *dispatchmgr;
278	dns_dispatch_t *dispatch;
279	dns_adbaddrinfo_t *addrinfo;
280	isc_time_t start;
281	dns_messageid_t id;
282	dns_dispentry_t *dispentry;
283	ISC_LINK(struct query) link;
284	isc_buffer_t buffer;
285	isc_buffer_t *tsig;
286	dns_tsigkey_t *tsigkey;
287	int ednsversion;
288	unsigned int options;
289	unsigned int attributes;
290	unsigned int udpsize;
291	unsigned char data[512];
292} resquery_t;
293
294struct tried {
295	isc_sockaddr_t addr;
296	unsigned int count;
297	ISC_LINK(struct tried) link;
298};
299
300#define QUERY_MAGIC	   ISC_MAGIC('Q', '!', '!', '!')
301#define VALID_QUERY(query) ISC_MAGIC_VALID(query, QUERY_MAGIC)
302
303#define RESQUERY_ATTR_CANCELED 0x02
304
305#define RESQUERY_CONNECTING(q) ((q)->connects > 0)
306#define RESQUERY_CANCELED(q)   (((q)->attributes & RESQUERY_ATTR_CANCELED) != 0)
307#define RESQUERY_SENDING(q)    ((q)->sends > 0)
308
309typedef enum {
310	fetchstate_init = 0, /*%< Start event has not run yet. */
311	fetchstate_active,
312	fetchstate_done /*%< FETCHDONE events posted. */
313} fetchstate_t;
314
315typedef enum {
316	badns_unreachable = 0,
317	badns_response,
318	badns_validation,
319	badns_forwarder,
320} badnstype_t;
321
322struct fetchctx {
323	/*% Not locked. */
324	unsigned int magic;
325	dns_resolver_t *res;
326	dns_fixedname_t fname;
327	dns_name_t *name;
328	dns_rdatatype_t type;
329	unsigned int options;
330	unsigned int bucketnum;
331	unsigned int dbucketnum;
332	char *info;
333	isc_mem_t *mctx;
334	isc_stdtime_t now;
335	isc_task_t *task;
336
337	/* Atomic */
338	isc_refcount_t references;
339
340	/*% Locked by appropriate bucket lock. */
341	fetchstate_t state;
342	atomic_bool want_shutdown;
343	bool cloned;
344	bool spilled;
345	isc_event_t control_event;
346	ISC_LINK(struct fetchctx) link;
347	ISC_LIST(dns_fetchevent_t) events;
348
349	/*% Locked by task event serialization. */
350	dns_fixedname_t dfname;
351	dns_name_t *domain;
352	dns_rdataset_t nameservers;
353	atomic_uint_fast32_t attributes;
354	isc_timer_t *timer;
355	isc_time_t expires;
356	isc_time_t expires_try_stale;
357	isc_time_t next_timeout;
358	isc_time_t final;
359	isc_interval_t interval;
360	dns_message_t *qmessage;
361	ISC_LIST(resquery_t) queries;
362	dns_adbfindlist_t finds;
363	dns_adbfind_t *find;
364	/*
365	 * altfinds are names and/or addresses of dual stack servers that
366	 * should be used when iterative resolution to a server is not
367	 * possible because the address family of that server is not usable.
368	 */
369	dns_adbfindlist_t altfinds;
370	dns_adbfind_t *altfind;
371	dns_adbaddrinfolist_t forwaddrs;
372	dns_adbaddrinfolist_t altaddrs;
373	dns_forwarderlist_t forwarders;
374	dns_fwdpolicy_t fwdpolicy;
375	isc_sockaddrlist_t bad;
376	ISC_LIST(struct tried) edns;
377	isc_sockaddrlist_t bad_edns;
378	dns_validator_t *validator;
379	ISC_LIST(dns_validator_t) validators;
380	dns_db_t *cache;
381	dns_adb_t *adb;
382	bool ns_ttl_ok;
383	uint32_t ns_ttl;
384	isc_counter_t *qc;
385	bool minimized;
386	unsigned int qmin_labels;
387	isc_result_t qmin_warning;
388	bool ip6arpaskip;
389	bool forwarding;
390	dns_fixedname_t qminfname;
391	dns_name_t *qminname;
392	dns_rdatatype_t qmintype;
393	dns_fetch_t *qminfetch;
394	dns_rdataset_t qminrrset;
395	dns_fixedname_t qmindcfname;
396	dns_name_t *qmindcname;
397	dns_fixedname_t fwdfname;
398	dns_name_t *fwdname;
399
400	/*%
401	 * The number of events we're waiting for.
402	 */
403	atomic_uint_fast32_t pending; /* Bucket lock. */
404
405	/*%
406	 * The number of times we've "restarted" the current
407	 * nameserver set.  This acts as a failsafe to prevent
408	 * us from pounding constantly on a particular set of
409	 * servers that, for whatever reason, are not giving
410	 * us useful responses, but are responding in such a
411	 * way that they are not marked "bad".
412	 */
413	unsigned int restarts;
414
415	/*%
416	 * The number of timeouts that have occurred since we
417	 * last successfully received a response packet.  This
418	 * is used for EDNS0 black hole detection.
419	 */
420	unsigned int timeouts;
421
422	/*%
423	 * Look aside state for DS lookups.
424	 */
425	dns_fixedname_t nsfname;
426	dns_name_t *nsname;
427
428	dns_fetch_t *nsfetch;
429	dns_rdataset_t nsrrset;
430
431	/*%
432	 * Number of queries that reference this context.
433	 */
434	atomic_uint_fast32_t nqueries; /* Bucket lock. */
435
436	/*%
437	 * Random numbers to use for mixing up server addresses.
438	 */
439	uint32_t rand_buf;
440	uint32_t rand_bits;
441
442	/*%
443	 * Fetch-local statistics for detailed logging.
444	 */
445	isc_result_t result;  /*%< fetch result */
446	isc_result_t vresult; /*%< validation result */
447	int exitline;
448	isc_time_t start;
449	uint64_t duration;
450	bool logged;
451	unsigned int querysent;
452	unsigned int referrals;
453	unsigned int lamecount;
454	unsigned int quotacount;
455	unsigned int neterr;
456	unsigned int badresp;
457	unsigned int adberr;
458	unsigned int findfail;
459	unsigned int valfail;
460	bool timeout;
461	dns_adbaddrinfo_t *addrinfo;
462	unsigned int depth;
463	char clientstr[ISC_SOCKADDR_FORMATSIZE];
464};
465
466#define FCTX_MAGIC	 ISC_MAGIC('F', '!', '!', '!')
467#define VALID_FCTX(fctx) ISC_MAGIC_VALID(fctx, FCTX_MAGIC)
468
469#define FCTX_ATTR_HAVEANSWER   0x0001
470#define FCTX_ATTR_GLUING       0x0002
471#define FCTX_ATTR_ADDRWAIT     0x0004
472#define FCTX_ATTR_SHUTTINGDOWN 0x0008 /* Bucket lock */
473#define FCTX_ATTR_WANTCACHE    0x0010
474#define FCTX_ATTR_WANTNCACHE   0x0020
475#define FCTX_ATTR_NEEDEDNS0    0x0040
476#define FCTX_ATTR_TRIEDFIND    0x0080
477#define FCTX_ATTR_TRIEDALT     0x0100
478
479#define HAVE_ANSWER(f) \
480	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_HAVEANSWER) != 0)
481#define GLUING(f) \
482	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_GLUING) != 0)
483#define ADDRWAIT(f) \
484	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_ADDRWAIT) != 0)
485#define SHUTTINGDOWN(f) \
486	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_SHUTTINGDOWN) != 0)
487#define WANTCACHE(f) \
488	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_WANTCACHE) != 0)
489#define WANTNCACHE(f) \
490	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_WANTNCACHE) != 0)
491#define NEEDEDNS0(f) \
492	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_NEEDEDNS0) != 0)
493#define TRIEDFIND(f) \
494	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_TRIEDFIND) != 0)
495#define TRIEDALT(f) \
496	((atomic_load_acquire(&(f)->attributes) & FCTX_ATTR_TRIEDALT) != 0)
497
498#define FCTX_ATTR_SET(f, a) atomic_fetch_or_release(&(f)->attributes, (a))
499#define FCTX_ATTR_CLR(f, a) atomic_fetch_and_release(&(f)->attributes, ~(a))
500
501typedef struct {
502	dns_adbaddrinfo_t *addrinfo;
503	fetchctx_t *fctx;
504	dns_message_t *message;
505} dns_valarg_t;
506
507struct dns_fetch {
508	unsigned int magic;
509	isc_mem_t *mctx;
510	dns_resolver_t *res;
511	fetchctx_t *private;
512};
513
514#define DNS_FETCH_MAGIC	       ISC_MAGIC('F', 't', 'c', 'h')
515#define DNS_FETCH_VALID(fetch) ISC_MAGIC_VALID(fetch, DNS_FETCH_MAGIC)
516
517typedef struct fctxbucket {
518	isc_task_t *task;
519	isc_mutex_t lock;
520	ISC_LIST(fetchctx_t) fctxs;
521	atomic_bool exiting;
522} fctxbucket_t;
523
524typedef struct fctxcount fctxcount_t;
525struct fctxcount {
526	dns_fixedname_t dfname;
527	dns_name_t *domain;
528	uint32_t count;
529	uint32_t allowed;
530	uint32_t dropped;
531	isc_stdtime_t logged;
532	ISC_LINK(fctxcount_t) link;
533};
534
535typedef struct zonebucket {
536	isc_mutex_t lock;
537	ISC_LIST(fctxcount_t) list;
538} zonebucket_t;
539
540typedef struct alternate {
541	bool isaddress;
542	union {
543		isc_sockaddr_t addr;
544		struct {
545			dns_name_t name;
546			in_port_t port;
547		} _n;
548	} _u;
549	ISC_LINK(struct alternate) link;
550} alternate_t;
551
552struct dns_resolver {
553	/* Unlocked. */
554	unsigned int magic;
555	isc_mem_t *mctx;
556	isc_mutex_t lock;
557	isc_mutex_t primelock;
558	dns_rdataclass_t rdclass;
559	isc_nm_t *nm;
560	isc_timermgr_t *timermgr;
561	isc_taskmgr_t *taskmgr;
562	dns_view_t *view;
563	bool frozen;
564	unsigned int options;
565	dns_dispatchmgr_t *dispatchmgr;
566	dns_dispatchset_t *dispatches4;
567	dns_dispatchset_t *dispatches6;
568	unsigned int nbuckets;
569	fctxbucket_t *buckets;
570	uint8_t dhashbits;
571	zonebucket_t *dbuckets;
572	uint32_t lame_ttl;
573	ISC_LIST(alternate_t) alternates;
574	uint16_t udpsize;
575	dns_rbt_t *algorithms;
576	dns_rbt_t *digests;
577	dns_rbt_t *mustbesecure;
578	unsigned int spillatmax;
579	unsigned int spillatmin;
580	isc_timer_t *spillattimer;
581	bool zero_no_soa_ttl;
582	unsigned int query_timeout;
583	unsigned int maxdepth;
584	unsigned int maxqueries;
585	isc_result_t quotaresp[2];
586
587	/* Additions for serve-stale feature. */
588	unsigned int retryinterval; /* in milliseconds */
589	unsigned int nonbackofftries;
590
591	/* Atomic */
592	isc_refcount_t references;
593	atomic_uint_fast32_t zspill; /* fetches-per-zone */
594	atomic_bool exiting;
595	atomic_bool priming;
596
597	/* Locked by lock. */
598	isc_eventlist_t whenshutdown;
599	isc_refcount_t activebuckets;
600	unsigned int spillat; /* clients-per-query */
601
602	dns_badcache_t *badcache; /* Bad cache. */
603
604	/* Locked by primelock. */
605	dns_fetch_t *primefetch;
606
607	/* Atomic. */
608	atomic_uint_fast32_t nfctx;
609};
610
611#define RES_MAGIC	    ISC_MAGIC('R', 'e', 's', '!')
612#define VALID_RESOLVER(res) ISC_MAGIC_VALID(res, RES_MAGIC)
613
614/*%
615 * Private addrinfo flags.
616 */
617enum {
618	FCTX_ADDRINFO_MARK = 1 << 0,
619	FCTX_ADDRINFO_FORWARDER = 1 << 1,
620	FCTX_ADDRINFO_EDNSOK = 1 << 2,
621	FCTX_ADDRINFO_NOCOOKIE = 1 << 3,
622	FCTX_ADDRINFO_BADCOOKIE = 1 << 4,
623	FCTX_ADDRINFO_DUALSTACK = 1 << 5,
624	FCTX_ADDRINFO_NOEDNS0 = 1 << 6,
625};
626
627#define UNMARKED(a)    (((a)->flags & FCTX_ADDRINFO_MARK) == 0)
628#define ISFORWARDER(a) (((a)->flags & FCTX_ADDRINFO_FORWARDER) != 0)
629#define NOCOOKIE(a)    (((a)->flags & FCTX_ADDRINFO_NOCOOKIE) != 0)
630#define EDNSOK(a)      (((a)->flags & FCTX_ADDRINFO_EDNSOK) != 0)
631#define BADCOOKIE(a)   (((a)->flags & FCTX_ADDRINFO_BADCOOKIE) != 0)
632#define ISDUALSTACK(a) (((a)->flags & FCTX_ADDRINFO_DUALSTACK) != 0)
633
634#define NXDOMAIN(r) (((r)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
635#define NEGATIVE(r) (((r)->attributes & DNS_RDATASETATTR_NEGATIVE) != 0)
636
637#define NXDOMAIN_RESULT(r) \
638	((r) == DNS_R_NXDOMAIN || (r) == DNS_R_NCACHENXDOMAIN)
639#define NXRRSET_RESULT(r)                                      \
640	((r) == DNS_R_NCACHENXRRSET || (r) == DNS_R_NXRRSET || \
641	 (r) == DNS_R_HINTNXRRSET)
642
643#ifdef ENABLE_AFL
644bool dns_fuzzing_resolver = false;
645void
646dns_resolver_setfuzzing(void) {
647	dns_fuzzing_resolver = true;
648}
649#endif /* ifdef ENABLE_AFL */
650
651static unsigned char ip6_arpa_data[] = "\003IP6\004ARPA";
652static unsigned char ip6_arpa_offsets[] = { 0, 4, 9 };
653static const dns_name_t ip6_arpa = DNS_NAME_INITABSOLUTE(ip6_arpa_data,
654							 ip6_arpa_offsets);
655
656static void
657destroy(dns_resolver_t *res);
658static isc_result_t
659resquery_send(resquery_t *query);
660static void
661resquery_response(isc_result_t eresult, isc_region_t *region, void *arg);
662static void
663resquery_connected(isc_result_t eresult, isc_region_t *region, void *arg);
664static void
665fctx_try(fetchctx_t *fctx, bool retrying, bool badcache);
666static void
667fctx_shutdown(fetchctx_t *fctx);
668static void
669fctx_minimize_qname(fetchctx_t *fctx);
670static void
671fctx_destroy(fetchctx_t *fctx, bool exiting);
672static void
673send_shutdown_events(dns_resolver_t *res);
674static isc_result_t
675ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node,
676		  dns_rdatatype_t covers, isc_stdtime_t now, dns_ttl_t minttl,
677		  dns_ttl_t maxttl, bool optout, bool secure,
678		  dns_rdataset_t *ardataset, isc_result_t *eresultp);
679static void
680validated(isc_task_t *task, isc_event_t *event);
681static void
682maybe_cancel_validators(fetchctx_t *fctx, bool locked);
683static void
684add_bad(fetchctx_t *fctx, dns_message_t *rmessage, dns_adbaddrinfo_t *addrinfo,
685	isc_result_t reason, badnstype_t badtype);
686static isc_result_t
687findnoqname(fetchctx_t *fctx, dns_message_t *message, dns_name_t *name,
688	    dns_rdatatype_t type, dns_name_t **noqname);
689
690#define fctx_attach(fctx, fctxp) \
691	fctx__attach(fctx, fctxp, __FILE__, __LINE__, __func__)
692#define fctx_detach(fctxp) fctx__detach(fctxp, __FILE__, __LINE__, __func__)
693#define fctx_done_detach(fctxp, result) \
694	fctx__done_detach(fctxp, result, __FILE__, __LINE__, __func__);
695
696static void
697fctx__attach(fetchctx_t *fctx, fetchctx_t **fctxp, const char *file,
698	     unsigned int line, const char *func);
699static void
700fctx__detach(fetchctx_t **fctxp, const char *file, unsigned int line,
701	     const char *func);
702
703static void
704fctx__done_detach(fetchctx_t **fctxp, isc_result_t result, const char *file,
705		  unsigned int line, const char *func);
706
707static void
708resume_qmin(isc_task_t *task, isc_event_t *event);
709
710/*%
711 * The structure and functions defined below implement the resolver
712 * query (resquery) response handling logic.
713 *
714 * When a resolver query is sent and a response is received, the
715 * resquery_response() event handler is run, which calls the rctx_*()
716 * functions.  The respctx_t structure maintains state from function
717 * to function.
718 *
719 * The call flow is described below:
720 *
721 * 1. resquery_response():
722 *    - Initialize a respctx_t structure (rctx_respinit()).
723 *    - Check for dispatcher failure (rctx_dispfail()).
724 *    - Parse the response (rctx_parse()).
725 *    - Log the response (rctx_logpacket()).
726 *    - Check the parsed response for an OPT record and handle
727 *      EDNS (rctx_opt(), rctx_edns()).
728 *    - Check for a bad or lame server (rctx_badserver(), rctx_lameserver()).
729 *    - Handle delegation-only zones (rctx_delonly_zone()).
730 *    - If RCODE and ANCOUNT suggest this is a positive answer, and
731 *      if so, call rctx_answer(): go to step 2.
732 *    - If RCODE and NSCOUNT suggest this is a negative answer or a
733 *      referral, call rctx_answer_none(): go to step 4.
734 *    - Check the additional section for data that should be cached
735 *      (rctx_additional()).
736 *    - Clean up and finish by calling rctx_done(): go to step 5.
737 *
738 * 2. rctx_answer():
739 *    - If the answer appears to be positive, call rctx_answer_positive():
740 *      go to step 3.
741 *    - If the response is a malformed delegation (with glue or NS records
742 *      in the answer section), call rctx_answer_none(): go to step 4.
743 *
744 * 3. rctx_answer_positive():
745 *    - Initialize the portions of respctx_t needed for processing an answer
746 *      (rctx_answer_init()).
747 *    - Scan the answer section to find records that are responsive to the
748 *      query (rctx_answer_scan()).
749 *    - For whichever type of response was found, call a separate routine
750 *      to handle it: matching QNAME/QTYPE (rctx_answer_match()),
751 *      CNAME (rctx_answer_cname()), covering DNAME (rctx_answer_dname()),
752 *      or any records returned in response to a query of type ANY
753 *      (rctx_answer_any()).
754 *    - Scan the authority section for NS or other records that may be
755 *      included with a positive answer (rctx_authority_scan()).
756 *
757 * 4. rctx_answer_none():
758 *    - Determine whether this is an NXDOMAIN, NXRRSET, or referral.
759 *    - If referral, set up the resolver to follow the delegation
760 *      (rctx_referral()).
761 *    - If NXDOMAIN/NXRRSET, scan the authority section for NS and SOA
762 *      records included with a negative response (rctx_authority_negative()),
763 *      then for DNSSEC proof of nonexistence (rctx_authority_dnssec()).
764 *
765 * 5. rctx_done():
766 *    - Set up chasing of DS records if needed (rctx_chaseds()).
767 *    - If the response wasn't intended for us, wait for another response
768 *      from the dispatcher (rctx_next()).
769 *    - If there is a problem with the responding server, set up another
770 *      query to a different server (rctx_nextserver()).
771 *    - If there is a problem that might be temporary or dependent on
772 *      EDNS options, set up another query to the same server with changed
773 *      options (rctx_resend()).
774 *    - Shut down the fetch context.
775 */
776
777typedef struct respctx {
778	resquery_t *query;
779	fetchctx_t *fctx;
780	isc_result_t result;
781	isc_buffer_t buffer;
782	unsigned int retryopts; /* updated options to pass to
783				 * fctx_query() when resending */
784
785	dns_rdatatype_t type; /* type being sought (set to
786			       * ANY if qtype was SIG or RRSIG) */
787	bool aa;	      /* authoritative answer? */
788	dns_trust_t trust;    /* answer trust level */
789	bool chaining;	      /* CNAME/DNAME processing? */
790	bool next_server;     /* give up, try the next server
791			       * */
792
793	badnstype_t broken_type; /* type of name server problem
794				  * */
795	isc_result_t broken_server;
796
797	bool get_nameservers; /* get a new NS rrset at
798			       * zone cut? */
799	bool resend;	      /* resend this query? */
800	bool nextitem;	      /* invalid response; keep
801			       * listening for the correct one */
802	bool truncated;	      /* response was truncated */
803	bool no_response;     /* no response was received */
804	bool glue_in_answer;  /* glue may be in the answer
805			       * section */
806	bool ns_in_answer;    /* NS may be in the answer
807			       * section */
808	bool negative;	      /* is this a negative response? */
809
810	isc_stdtime_t now; /* time info */
811	isc_time_t tnow;
812	isc_time_t *finish;
813
814	unsigned int dname_labels;
815	unsigned int domain_labels; /* range of permissible number
816				     * of
817				     * labels in a DNAME */
818
819	dns_name_t *aname;	   /* answer name */
820	dns_rdataset_t *ardataset; /* answer rdataset */
821
822	dns_name_t *cname;	   /* CNAME name */
823	dns_rdataset_t *crdataset; /* CNAME rdataset */
824
825	dns_name_t *dname;	   /* DNAME name */
826	dns_rdataset_t *drdataset; /* DNAME rdataset */
827
828	dns_name_t *ns_name;	     /* NS name */
829	dns_rdataset_t *ns_rdataset; /* NS rdataset */
830
831	dns_name_t *soa_name; /* SOA name in a negative answer */
832	dns_name_t *ds_name;  /* DS name in a negative answer */
833
834	dns_name_t *found_name;	    /* invalid name in negative
835				     * response */
836	dns_rdatatype_t found_type; /* invalid type in negative
837				     * response */
838
839	dns_rdataset_t *opt; /* OPT rdataset */
840} respctx_t;
841
842static void
843rctx_respinit(resquery_t *query, fetchctx_t *fctx, isc_result_t result,
844	      isc_region_t *region, respctx_t *rctx);
845
846static void
847rctx_answer_init(respctx_t *rctx);
848
849static void
850rctx_answer_scan(respctx_t *rctx);
851
852static void
853rctx_authority_positive(respctx_t *rctx);
854
855static isc_result_t
856rctx_answer_any(respctx_t *rctx);
857
858static isc_result_t
859rctx_answer_match(respctx_t *rctx);
860
861static isc_result_t
862rctx_answer_cname(respctx_t *rctx);
863
864static isc_result_t
865rctx_answer_dname(respctx_t *rctx);
866
867static isc_result_t
868rctx_answer_positive(respctx_t *rctx);
869
870static isc_result_t
871rctx_authority_negative(respctx_t *rctx);
872
873static isc_result_t
874rctx_authority_dnssec(respctx_t *rctx);
875
876static void
877rctx_additional(respctx_t *rctx);
878
879static isc_result_t
880rctx_referral(respctx_t *rctx);
881
882static isc_result_t
883rctx_answer_none(respctx_t *rctx);
884
885static void
886rctx_nextserver(respctx_t *rctx, dns_message_t *message,
887		dns_adbaddrinfo_t *addrinfo, isc_result_t result);
888
889static void
890rctx_resend(respctx_t *rctx, dns_adbaddrinfo_t *addrinfo);
891
892static isc_result_t
893rctx_next(respctx_t *rctx);
894
895static void
896rctx_chaseds(respctx_t *rctx, dns_message_t *message,
897	     dns_adbaddrinfo_t *addrinfo, isc_result_t result);
898
899static void
900rctx_done(respctx_t *rctx, isc_result_t result);
901
902static void
903rctx_logpacket(respctx_t *rctx);
904
905static void
906rctx_opt(respctx_t *rctx);
907
908static void
909rctx_edns(respctx_t *rctx);
910
911static isc_result_t
912rctx_parse(respctx_t *rctx);
913
914static isc_result_t
915rctx_badserver(respctx_t *rctx, isc_result_t result);
916
917static isc_result_t
918rctx_answer(respctx_t *rctx);
919
920static isc_result_t
921rctx_lameserver(respctx_t *rctx);
922
923static isc_result_t
924rctx_dispfail(respctx_t *rctx);
925
926static isc_result_t
927rctx_timedout(respctx_t *rctx);
928
929static void
930rctx_delonly_zone(respctx_t *rctx);
931
932static void
933rctx_ncache(respctx_t *rctx);
934
935/*%
936 * Increment resolver-related statistics counters.
937 */
938static void
939inc_stats(dns_resolver_t *res, isc_statscounter_t counter) {
940	if (res->view->resstats != NULL) {
941		isc_stats_increment(res->view->resstats, counter);
942	}
943}
944
945static void
946dec_stats(dns_resolver_t *res, isc_statscounter_t counter) {
947	if (res->view->resstats != NULL) {
948		isc_stats_decrement(res->view->resstats, counter);
949	}
950}
951
952static isc_result_t
953valcreate(fetchctx_t *fctx, dns_message_t *message, dns_adbaddrinfo_t *addrinfo,
954	  dns_name_t *name, dns_rdatatype_t type, dns_rdataset_t *rdataset,
955	  dns_rdataset_t *sigrdataset, unsigned int valoptions,
956	  isc_task_t *task) {
957	dns_validator_t *validator = NULL;
958	dns_valarg_t *valarg;
959	isc_result_t result;
960
961	valarg = isc_mem_get(fctx->mctx, sizeof(*valarg));
962
963	*valarg = (dns_valarg_t){
964		.addrinfo = addrinfo,
965	};
966
967	fctx_attach(fctx, &valarg->fctx);
968	dns_message_attach(message, &valarg->message);
969
970	if (!ISC_LIST_EMPTY(fctx->validators)) {
971		valoptions |= DNS_VALIDATOR_DEFER;
972	} else {
973		valoptions &= ~DNS_VALIDATOR_DEFER;
974	}
975
976	result = dns_validator_create(fctx->res->view, name, type, rdataset,
977				      sigrdataset, message, valoptions, task,
978				      validated, valarg, &validator);
979	RUNTIME_CHECK(result == ISC_R_SUCCESS);
980	if (result == ISC_R_SUCCESS) {
981		inc_stats(fctx->res, dns_resstatscounter_val);
982		if ((valoptions & DNS_VALIDATOR_DEFER) == 0) {
983			INSIST(fctx->validator == NULL);
984			fctx->validator = validator;
985		}
986		ISC_LIST_APPEND(fctx->validators, validator, link);
987	} else {
988		dns_message_detach(&valarg->message);
989		fctx_detach(&valarg->fctx);
990		isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
991	}
992	return (result);
993}
994
995static bool
996rrsig_fromchildzone(fetchctx_t *fctx, dns_rdataset_t *rdataset) {
997	dns_namereln_t namereln;
998	dns_rdata_rrsig_t rrsig;
999	dns_rdata_t rdata = DNS_RDATA_INIT;
1000	int order;
1001	isc_result_t result;
1002	unsigned int labels;
1003
1004	for (result = dns_rdataset_first(rdataset); result == ISC_R_SUCCESS;
1005	     result = dns_rdataset_next(rdataset))
1006	{
1007		dns_rdataset_current(rdataset, &rdata);
1008		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
1009		RUNTIME_CHECK(result == ISC_R_SUCCESS);
1010		namereln = dns_name_fullcompare(&rrsig.signer, fctx->domain,
1011						&order, &labels);
1012		if (namereln == dns_namereln_subdomain) {
1013			return (true);
1014		}
1015		dns_rdata_reset(&rdata);
1016	}
1017	return (false);
1018}
1019
1020static bool
1021fix_mustbedelegationornxdomain(dns_message_t *message, fetchctx_t *fctx) {
1022	dns_name_t *name;
1023	dns_name_t *domain = fctx->domain;
1024	dns_rdataset_t *rdataset;
1025	dns_rdatatype_t type;
1026	isc_result_t result;
1027	bool keep_auth = false;
1028
1029	if (message->rcode == dns_rcode_nxdomain) {
1030		return (false);
1031	}
1032
1033	/*
1034	 * A DS RRset can appear anywhere in a zone, even for a delegation-only
1035	 * zone.  So a response to an explicit query for this type should be
1036	 * excluded from delegation-only fixup.
1037	 *
1038	 * SOA, NS, and DNSKEY can only exist at a zone apex, so a positive
1039	 * response to a query for these types can never violate the
1040	 * delegation-only assumption: if the query name is below a
1041	 * zone cut, the response should normally be a referral, which should
1042	 * be accepted; if the query name is below a zone cut but the server
1043	 * happens to have authority for the zone of the query name, the
1044	 * response is a (non-referral) answer.  But this does not violate
1045	 * delegation-only because the query name must be in a different zone
1046	 * due to the "apex-only" nature of these types.  Note that if the
1047	 * remote server happens to have authority for a child zone of a
1048	 * delegation-only zone, we may still incorrectly "fix" the response
1049	 * with NXDOMAIN for queries for other types.  Unfortunately it's
1050	 * generally impossible to differentiate this case from violation of
1051	 * the delegation-only assumption.  Once the resolver learns the
1052	 * correct zone cut, possibly via a separate query for an "apex-only"
1053	 * type, queries for other types will be resolved correctly.
1054	 *
1055	 * A query for type ANY will be accepted if it hits an exceptional
1056	 * type above in the answer section as it should be from a child
1057	 * zone.
1058	 *
1059	 * Also accept answers with RRSIG records from the child zone.
1060	 * Direct queries for RRSIG records should not be answered from
1061	 * the parent zone.
1062	 */
1063
1064	if (message->counts[DNS_SECTION_ANSWER] != 0 &&
1065	    (fctx->type == dns_rdatatype_ns || fctx->type == dns_rdatatype_ds ||
1066	     fctx->type == dns_rdatatype_soa ||
1067	     fctx->type == dns_rdatatype_any ||
1068	     fctx->type == dns_rdatatype_rrsig ||
1069	     fctx->type == dns_rdatatype_dnskey))
1070	{
1071		result = dns_message_firstname(message, DNS_SECTION_ANSWER);
1072		while (result == ISC_R_SUCCESS) {
1073			name = NULL;
1074			dns_message_currentname(message, DNS_SECTION_ANSWER,
1075						&name);
1076			for (rdataset = ISC_LIST_HEAD(name->list);
1077			     rdataset != NULL;
1078			     rdataset = ISC_LIST_NEXT(rdataset, link))
1079			{
1080				if (!dns_name_equal(name, fctx->name)) {
1081					continue;
1082				}
1083				type = rdataset->type;
1084				/*
1085				 * RRsig from child?
1086				 */
1087				if (type == dns_rdatatype_rrsig &&
1088				    rrsig_fromchildzone(fctx, rdataset))
1089				{
1090					return (false);
1091				}
1092				/*
1093				 * Direct query for apex records or DS.
1094				 */
1095				if (fctx->type == type &&
1096				    (type == dns_rdatatype_ds ||
1097				     type == dns_rdatatype_ns ||
1098				     type == dns_rdatatype_soa ||
1099				     type == dns_rdatatype_dnskey))
1100				{
1101					return (false);
1102				}
1103				/*
1104				 * Indirect query for apex records or DS.
1105				 */
1106				if (fctx->type == dns_rdatatype_any &&
1107				    (type == dns_rdatatype_ns ||
1108				     type == dns_rdatatype_ds ||
1109				     type == dns_rdatatype_soa ||
1110				     type == dns_rdatatype_dnskey))
1111				{
1112					return (false);
1113				}
1114			}
1115			result = dns_message_nextname(message,
1116						      DNS_SECTION_ANSWER);
1117		}
1118	}
1119
1120	/*
1121	 * A NODATA response to a DS query?
1122	 */
1123	if (fctx->type == dns_rdatatype_ds &&
1124	    message->counts[DNS_SECTION_ANSWER] == 0)
1125	{
1126		return (false);
1127	}
1128
1129	/* Look for referral or indication of answer from child zone? */
1130	if (message->counts[DNS_SECTION_AUTHORITY] == 0) {
1131		goto munge;
1132	}
1133
1134	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
1135	while (result == ISC_R_SUCCESS) {
1136		name = NULL;
1137		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
1138		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
1139		     rdataset = ISC_LIST_NEXT(rdataset, link))
1140		{
1141			type = rdataset->type;
1142			if (type == dns_rdatatype_soa &&
1143			    dns_name_equal(name, domain))
1144			{
1145				keep_auth = true;
1146			}
1147
1148			if (type != dns_rdatatype_ns &&
1149			    type != dns_rdatatype_soa &&
1150			    type != dns_rdatatype_rrsig)
1151			{
1152				continue;
1153			}
1154
1155			if (type == dns_rdatatype_rrsig) {
1156				if (rrsig_fromchildzone(fctx, rdataset)) {
1157					return (false);
1158				} else {
1159					continue;
1160				}
1161			}
1162
1163			/* NS or SOA records. */
1164			if (dns_name_equal(name, domain)) {
1165				/*
1166				 * If a query for ANY causes a negative
1167				 * response, we can be sure that this is
1168				 * an empty node.  For other type of queries
1169				 * we cannot differentiate an empty node
1170				 * from a node that just doesn't have that
1171				 * type of record.  We only accept the former
1172				 * case.
1173				 */
1174				if (message->counts[DNS_SECTION_ANSWER] == 0 &&
1175				    fctx->type == dns_rdatatype_any)
1176				{
1177					return (false);
1178				}
1179			} else if (dns_name_issubdomain(name, domain)) {
1180				/* Referral or answer from child zone. */
1181				return (false);
1182			}
1183		}
1184		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
1185	}
1186
1187munge:
1188	message->rcode = dns_rcode_nxdomain;
1189	message->counts[DNS_SECTION_ANSWER] = 0;
1190	if (!keep_auth) {
1191		message->counts[DNS_SECTION_AUTHORITY] = 0;
1192	}
1193	message->counts[DNS_SECTION_ADDITIONAL] = 0;
1194	return (true);
1195}
1196
1197static void
1198resquery_destroy(resquery_t *query) {
1199	fetchctx_t *fctx = query->fctx;
1200	dns_resolver_t *res = fctx->res;
1201	unsigned int bucket = fctx->bucketnum;
1202
1203	if (ISC_LINK_LINKED(query, link)) {
1204		ISC_LIST_UNLINK(fctx->queries, query, link);
1205	}
1206
1207	if (query->tsig != NULL) {
1208		isc_buffer_free(&query->tsig);
1209	}
1210
1211	if (query->tsigkey != NULL) {
1212		dns_tsigkey_detach(&query->tsigkey);
1213	}
1214
1215	if (query->dispentry != NULL) {
1216		dns_dispatch_done(&query->dispentry);
1217	}
1218
1219	if (query->dispatch != NULL) {
1220		dns_dispatch_detach(&query->dispatch);
1221	}
1222
1223	isc_refcount_destroy(&query->references);
1224
1225	LOCK(&res->buckets[bucket].lock);
1226	atomic_fetch_sub_release(&fctx->nqueries, 1);
1227	UNLOCK(&res->buckets[bucket].lock);
1228	fctx_detach(&query->fctx);
1229
1230	if (query->rmessage != NULL) {
1231		dns_message_detach(&query->rmessage);
1232	}
1233
1234	query->magic = 0;
1235	isc_mem_put(query->mctx, query, sizeof(*query));
1236}
1237
1238static void
1239resquery_attach(resquery_t *source, resquery_t **targetp) {
1240	REQUIRE(VALID_QUERY(source));
1241	REQUIRE(targetp != NULL && *targetp == NULL);
1242
1243	isc_refcount_increment(&source->references);
1244
1245	*targetp = source;
1246}
1247
1248static void
1249resquery_detach(resquery_t **queryp) {
1250	uint_fast32_t ref;
1251	resquery_t *query = NULL;
1252
1253	REQUIRE(queryp != NULL && VALID_QUERY(*queryp));
1254
1255	query = *queryp;
1256	*queryp = NULL;
1257
1258	ref = isc_refcount_decrement(&query->references);
1259	if (ref == 1) {
1260		resquery_destroy(query);
1261	}
1262}
1263
1264/*%
1265 * Update EDNS statistics for a server after not getting a response to a UDP
1266 * query sent to it.
1267 */
1268static void
1269update_edns_stats(resquery_t *query) {
1270	fetchctx_t *fctx = query->fctx;
1271
1272	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
1273		return;
1274	}
1275
1276	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
1277		dns_adb_ednsto(fctx->adb, query->addrinfo);
1278	} else {
1279		dns_adb_timeout(fctx->adb, query->addrinfo);
1280	}
1281}
1282
1283/*
1284 * Start the maximum lifetime timer for the fetch. This will
1285 * trigger if, for example, some ADB or validator dependency
1286 * loop occurs and causes a fetch to hang.
1287 */
1288static isc_result_t
1289fctx_starttimer(fetchctx_t *fctx) {
1290	return (isc_timer_reset(fctx->timer, isc_timertype_once, &fctx->final,
1291				NULL, true));
1292}
1293
1294static void
1295fctx_stoptimer(fetchctx_t *fctx) {
1296	isc_result_t result;
1297
1298	/*
1299	 * We don't return a result if resetting the timer to inactive fails
1300	 * since there's nothing to be done about it.  Resetting to inactive
1301	 * should never fail anyway, since the code as currently written
1302	 * cannot fail in that case.
1303	 */
1304	result = isc_timer_reset(fctx->timer, isc_timertype_inactive, NULL,
1305				 NULL, true);
1306	if (result != ISC_R_SUCCESS) {
1307		UNEXPECTED_ERROR("isc_timer_reset(): %s",
1308				 isc_result_totext(result));
1309	}
1310}
1311
1312static void
1313fctx_cancelquery(resquery_t **queryp, isc_time_t *finish, bool no_response,
1314		 bool age_untried) {
1315	resquery_t *query = NULL;
1316	fetchctx_t *fctx = NULL;
1317	unsigned int rtt, rttms;
1318	unsigned int factor;
1319	dns_adbfind_t *find = NULL;
1320	dns_adbaddrinfo_t *addrinfo;
1321	isc_stdtime_t now;
1322
1323	REQUIRE(queryp != NULL);
1324
1325	query = *queryp;
1326	fctx = query->fctx;
1327
1328	if (RESQUERY_CANCELED(query)) {
1329		return;
1330	}
1331
1332	FCTXTRACE("cancelquery");
1333
1334	query->attributes |= RESQUERY_ATTR_CANCELED;
1335
1336	/*
1337	 * Should we update the RTT?
1338	 */
1339	if (finish != NULL || no_response) {
1340		if (finish != NULL) {
1341			/*
1342			 * We have both the start and finish times for this
1343			 * packet, so we can compute a real RTT.
1344			 */
1345			rtt = (unsigned int)isc_time_microdiff(finish,
1346							       &query->start);
1347			factor = DNS_ADB_RTTADJDEFAULT;
1348
1349			rttms = rtt / US_PER_MS;
1350			if (rttms < DNS_RESOLVER_QRYRTTCLASS0) {
1351				inc_stats(fctx->res,
1352					  dns_resstatscounter_queryrtt0);
1353			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS1) {
1354				inc_stats(fctx->res,
1355					  dns_resstatscounter_queryrtt1);
1356			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS2) {
1357				inc_stats(fctx->res,
1358					  dns_resstatscounter_queryrtt2);
1359			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS3) {
1360				inc_stats(fctx->res,
1361					  dns_resstatscounter_queryrtt3);
1362			} else if (rttms < DNS_RESOLVER_QRYRTTCLASS4) {
1363				inc_stats(fctx->res,
1364					  dns_resstatscounter_queryrtt4);
1365			} else {
1366				inc_stats(fctx->res,
1367					  dns_resstatscounter_queryrtt5);
1368			}
1369		} else {
1370			uint32_t value;
1371			uint32_t mask;
1372
1373			update_edns_stats(query);
1374
1375			/*
1376			 * If "forward first;" is used and a forwarder timed
1377			 * out, do not attempt to query it again in this fetch
1378			 * context.
1379			 */
1380			if (fctx->fwdpolicy == dns_fwdpolicy_first &&
1381			    ISFORWARDER(query->addrinfo))
1382			{
1383				add_bad(fctx, query->rmessage, query->addrinfo,
1384					ISC_R_TIMEDOUT, badns_forwarder);
1385			}
1386
1387			/*
1388			 * We don't have an RTT for this query.  Maybe the
1389			 * packet was lost, or maybe this server is very
1390			 * slow.  We don't know.  Increase the RTT.
1391			 */
1392			INSIST(no_response);
1393			value = isc_random32();
1394			if (query->addrinfo->srtt > 800000) {
1395				mask = 0x3fff;
1396			} else if (query->addrinfo->srtt > 400000) {
1397				mask = 0x7fff;
1398			} else if (query->addrinfo->srtt > 200000) {
1399				mask = 0xffff;
1400			} else if (query->addrinfo->srtt > 100000) {
1401				mask = 0x1ffff;
1402			} else if (query->addrinfo->srtt > 50000) {
1403				mask = 0x3ffff;
1404			} else if (query->addrinfo->srtt > 25000) {
1405				mask = 0x7ffff;
1406			} else {
1407				mask = 0xfffff;
1408			}
1409
1410			/*
1411			 * Don't adjust timeout on EDNS queries unless we have
1412			 * seen a EDNS response.
1413			 */
1414			if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0 &&
1415			    !EDNSOK(query->addrinfo))
1416			{
1417				mask >>= 2;
1418			}
1419
1420			rtt = query->addrinfo->srtt + (value & mask);
1421			if (rtt > MAX_SINGLE_QUERY_TIMEOUT_US) {
1422				rtt = MAX_SINGLE_QUERY_TIMEOUT_US;
1423			}
1424
1425			/*
1426			 * Replace the current RTT with our value.
1427			 */
1428			factor = DNS_ADB_RTTADJREPLACE;
1429		}
1430
1431		dns_adb_adjustsrtt(fctx->adb, query->addrinfo, rtt, factor);
1432	}
1433
1434	if ((query->options & DNS_FETCHOPT_TCP) == 0) {
1435		/* Inform the ADB that we're ending a UDP fetch */
1436		dns_adb_endudpfetch(fctx->adb, query->addrinfo);
1437	}
1438
1439	/*
1440	 * Age RTTs of servers not tried.
1441	 */
1442	isc_stdtime_get(&now);
1443	if (finish != NULL || age_untried) {
1444		for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs);
1445		     addrinfo != NULL;
1446		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
1447		{
1448			if (UNMARKED(addrinfo)) {
1449				dns_adb_agesrtt(fctx->adb, addrinfo, now);
1450			}
1451		}
1452	}
1453
1454	if ((finish != NULL || age_untried) && TRIEDFIND(fctx)) {
1455		for (find = ISC_LIST_HEAD(fctx->finds); find != NULL;
1456		     find = ISC_LIST_NEXT(find, publink))
1457		{
1458			for (addrinfo = ISC_LIST_HEAD(find->list);
1459			     addrinfo != NULL;
1460			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
1461			{
1462				if (UNMARKED(addrinfo)) {
1463					dns_adb_agesrtt(fctx->adb, addrinfo,
1464							now);
1465				}
1466			}
1467		}
1468	}
1469
1470	if ((finish != NULL || age_untried) && TRIEDALT(fctx)) {
1471		for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs); addrinfo != NULL;
1472		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
1473		{
1474			if (UNMARKED(addrinfo)) {
1475				dns_adb_agesrtt(fctx->adb, addrinfo, now);
1476			}
1477		}
1478		for (find = ISC_LIST_HEAD(fctx->altfinds); find != NULL;
1479		     find = ISC_LIST_NEXT(find, publink))
1480		{
1481			for (addrinfo = ISC_LIST_HEAD(find->list);
1482			     addrinfo != NULL;
1483			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
1484			{
1485				if (UNMARKED(addrinfo)) {
1486					dns_adb_agesrtt(fctx->adb, addrinfo,
1487							now);
1488				}
1489			}
1490		}
1491	}
1492
1493	/*
1494	 * Check for any outstanding dispatch responses and if they
1495	 * exist, cancel them.
1496	 */
1497	if (query->dispentry != NULL) {
1498		dns_dispatch_done(&query->dispentry);
1499	}
1500
1501	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
1502	if (ISC_LINK_LINKED(query, link)) {
1503		ISC_LIST_UNLINK(fctx->queries, query, link);
1504	}
1505	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
1506
1507	resquery_detach(queryp);
1508}
1509
1510static void
1511fctx_cleanup(fetchctx_t *fctx) {
1512	dns_adbfind_t *find = NULL, *next_find = NULL;
1513	dns_adbaddrinfo_t *addr = NULL, *next_addr = NULL;
1514
1515	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
1516
1517	for (find = ISC_LIST_HEAD(fctx->finds); find != NULL; find = next_find)
1518	{
1519		next_find = ISC_LIST_NEXT(find, publink);
1520		ISC_LIST_UNLINK(fctx->finds, find, publink);
1521		dns_adb_destroyfind(&find);
1522		fctx_unref(fctx);
1523	}
1524	fctx->find = NULL;
1525
1526	for (find = ISC_LIST_HEAD(fctx->altfinds); find != NULL;
1527	     find = next_find)
1528	{
1529		next_find = ISC_LIST_NEXT(find, publink);
1530		ISC_LIST_UNLINK(fctx->altfinds, find, publink);
1531		dns_adb_destroyfind(&find);
1532		fctx_unref(fctx);
1533	}
1534	fctx->altfind = NULL;
1535
1536	for (addr = ISC_LIST_HEAD(fctx->forwaddrs); addr != NULL;
1537	     addr = next_addr)
1538	{
1539		next_addr = ISC_LIST_NEXT(addr, publink);
1540		ISC_LIST_UNLINK(fctx->forwaddrs, addr, publink);
1541		dns_adb_freeaddrinfo(fctx->adb, &addr);
1542	}
1543
1544	for (addr = ISC_LIST_HEAD(fctx->altaddrs); addr != NULL;
1545	     addr = next_addr)
1546	{
1547		next_addr = ISC_LIST_NEXT(addr, publink);
1548		ISC_LIST_UNLINK(fctx->altaddrs, addr, publink);
1549		dns_adb_freeaddrinfo(fctx->adb, &addr);
1550	}
1551}
1552
1553static void
1554fctx_cancelqueries(fetchctx_t *fctx, bool no_response, bool age_untried) {
1555	resquery_t *query = NULL, *next_query = NULL;
1556	ISC_LIST(resquery_t) queries;
1557
1558	FCTXTRACE("cancelqueries");
1559
1560	ISC_LIST_INIT(queries);
1561
1562	/*
1563	 * Move the queries to a local list so we can cancel
1564	 * them without holding the lock.
1565	 */
1566	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
1567	ISC_LIST_MOVE(queries, fctx->queries);
1568	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
1569
1570	for (query = ISC_LIST_HEAD(queries); query != NULL; query = next_query)
1571	{
1572		next_query = ISC_LIST_NEXT(query, link);
1573
1574		/*
1575		 * Note that we have to unlink the query here,
1576		 * because if it's still linked in fctx_cancelquery(),
1577		 * then it will try to unlink it from fctx->queries.
1578		 */
1579		ISC_LIST_UNLINK(queries, query, link);
1580		fctx_cancelquery(&query, NULL, no_response, age_untried);
1581	}
1582}
1583
1584static void
1585fcount_logspill(fetchctx_t *fctx, fctxcount_t *counter, bool final) {
1586	char dbuf[DNS_NAME_FORMATSIZE];
1587	isc_stdtime_t now;
1588
1589	if (!isc_log_wouldlog(dns_lctx, ISC_LOG_INFO)) {
1590		return;
1591	}
1592
1593	/* Do not log a message if there were no dropped fetches. */
1594	if (counter->dropped == 0) {
1595		return;
1596	}
1597
1598	/* Do not log the cumulative message if the previous log is recent. */
1599	isc_stdtime_get(&now);
1600	if (!final && counter->logged > now - 60) {
1601		return;
1602	}
1603
1604	dns_name_format(fctx->domain, dbuf, sizeof(dbuf));
1605
1606	if (!final) {
1607		isc_log_write(dns_lctx, DNS_LOGCATEGORY_SPILL,
1608			      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1609			      "too many simultaneous fetches for %s "
1610			      "(allowed %d spilled %d)",
1611			      dbuf, counter->allowed, counter->dropped);
1612	} else {
1613		isc_log_write(dns_lctx, DNS_LOGCATEGORY_SPILL,
1614			      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1615			      "fetch counters for %s now being discarded "
1616			      "(allowed %d spilled %d; cumulative since "
1617			      "initial trigger event)",
1618			      dbuf, counter->allowed, counter->dropped);
1619	}
1620
1621	counter->logged = now;
1622}
1623
1624static isc_result_t
1625fcount_incr(fetchctx_t *fctx, bool force) {
1626	isc_result_t result = ISC_R_SUCCESS;
1627	zonebucket_t *dbucket = NULL;
1628	fctxcount_t *counter = NULL;
1629	uint32_t hashval;
1630	uint32_t dbucketnum;
1631
1632	REQUIRE(fctx != NULL);
1633	REQUIRE(fctx->res != NULL);
1634
1635	INSIST(fctx->dbucketnum == RES_NOBUCKET);
1636	hashval = dns_name_fullhash(fctx->domain, false);
1637	dbucketnum = hash_32(hashval, fctx->res->dhashbits);
1638
1639	dbucket = &fctx->res->dbuckets[dbucketnum];
1640
1641	LOCK(&dbucket->lock);
1642	for (counter = ISC_LIST_HEAD(dbucket->list); counter != NULL;
1643	     counter = ISC_LIST_NEXT(counter, link))
1644	{
1645		if (dns_name_equal(counter->domain, fctx->domain)) {
1646			break;
1647		}
1648	}
1649
1650	if (counter == NULL) {
1651		counter = isc_mem_get(fctx->res->mctx, sizeof(*counter));
1652		*counter = (fctxcount_t){
1653			.count = 1,
1654			.allowed = 1,
1655		};
1656
1657		counter->domain = dns_fixedname_initname(&counter->dfname);
1658		ISC_LINK_INIT(counter, link);
1659		dns_name_copy(fctx->domain, counter->domain);
1660		ISC_LIST_APPEND(dbucket->list, counter, link);
1661	} else {
1662		uint_fast32_t spill = atomic_load_acquire(&fctx->res->zspill);
1663		if (!force && spill != 0 && counter->count >= spill) {
1664			counter->dropped++;
1665			fcount_logspill(fctx, counter, false);
1666			result = ISC_R_QUOTA;
1667		} else {
1668			counter->count++;
1669			counter->allowed++;
1670		}
1671	}
1672	UNLOCK(&dbucket->lock);
1673
1674	if (result == ISC_R_SUCCESS) {
1675		fctx->dbucketnum = dbucketnum;
1676	}
1677
1678	return (result);
1679}
1680
1681static void
1682fcount_decr(fetchctx_t *fctx) {
1683	zonebucket_t *dbucket = NULL;
1684	fctxcount_t *counter = NULL;
1685
1686	REQUIRE(fctx != NULL);
1687
1688	if (fctx->dbucketnum == RES_NOBUCKET) {
1689		return;
1690	}
1691
1692	dbucket = &fctx->res->dbuckets[fctx->dbucketnum];
1693
1694	LOCK(&dbucket->lock);
1695	for (counter = ISC_LIST_HEAD(dbucket->list); counter != NULL;
1696	     counter = ISC_LIST_NEXT(counter, link))
1697	{
1698		if (dns_name_equal(counter->domain, fctx->domain)) {
1699			break;
1700		}
1701	}
1702
1703	if (counter != NULL) {
1704		INSIST(counter->count != 0);
1705		counter->count--;
1706		fctx->dbucketnum = RES_NOBUCKET;
1707
1708		if (counter->count == 0) {
1709			fcount_logspill(fctx, counter, true);
1710			ISC_LIST_UNLINK(dbucket->list, counter, link);
1711			isc_mem_put(fctx->res->mctx, counter, sizeof(*counter));
1712		}
1713	}
1714
1715	UNLOCK(&dbucket->lock);
1716}
1717
1718static void
1719fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) {
1720	dns_fetchevent_t *event, *next_event;
1721	isc_task_t *task;
1722	unsigned int count = 0;
1723	isc_interval_t i;
1724	bool logit = false;
1725	isc_time_t now;
1726	unsigned int old_spillat;
1727	unsigned int new_spillat = 0; /* initialized to silence
1728				       * compiler warnings */
1729
1730	/*
1731	 * Caller must be holding the appropriate bucket lock.
1732	 */
1733	REQUIRE(fctx->state == fetchstate_done);
1734
1735	FCTXTRACE("sendevents");
1736
1737	/*
1738	 * Keep some record of fetch result for logging later (if required).
1739	 */
1740	fctx->result = result;
1741	fctx->exitline = line;
1742	TIME_NOW(&now);
1743	fctx->duration = isc_time_microdiff(&now, &fctx->start);
1744
1745	for (event = ISC_LIST_HEAD(fctx->events); event != NULL;
1746	     event = next_event)
1747	{
1748		next_event = ISC_LIST_NEXT(event, ev_link);
1749		ISC_LIST_UNLINK(fctx->events, event, ev_link);
1750
1751		/*
1752		 * Only the regular fetch events should be counted for the
1753		 * clients-per-query limit, in case if there are multiple events
1754		 * registered for a single client.
1755		 */
1756		if (event->ev_type == DNS_EVENT_FETCHDONE) {
1757			count++;
1758		}
1759
1760		if (event->ev_type == DNS_EVENT_TRYSTALE) {
1761			/*
1762			 * Not applicable to TRY STALE events, this function is
1763			 * called when the fetch has either completed or timed
1764			 * out due to resolver-query-timeout being reached.
1765			 */
1766			isc_task_detach((isc_task_t **)&event->ev_sender);
1767			isc_event_free((isc_event_t **)&event);
1768			continue;
1769		}
1770		task = event->ev_sender;
1771		event->ev_sender = fctx;
1772		event->vresult = fctx->vresult;
1773		if (!HAVE_ANSWER(fctx)) {
1774			event->result = result;
1775		}
1776
1777		INSIST(event->result != ISC_R_SUCCESS ||
1778		       dns_rdataset_isassociated(event->rdataset) ||
1779		       fctx->type == dns_rdatatype_any ||
1780		       fctx->type == dns_rdatatype_rrsig ||
1781		       fctx->type == dns_rdatatype_sig);
1782
1783		/*
1784		 * Negative results must be indicated in event->result.
1785		 */
1786		if (dns_rdataset_isassociated(event->rdataset) &&
1787		    NEGATIVE(event->rdataset))
1788		{
1789			INSIST(event->result == DNS_R_NCACHENXDOMAIN ||
1790			       event->result == DNS_R_NCACHENXRRSET);
1791		}
1792
1793		FCTXTRACE("event");
1794		isc_task_sendanddetach(&task, ISC_EVENT_PTR(&event));
1795	}
1796
1797	if (HAVE_ANSWER(fctx) && fctx->spilled &&
1798	    (count < fctx->res->spillatmax || fctx->res->spillatmax == 0))
1799	{
1800		LOCK(&fctx->res->lock);
1801		if (count == fctx->res->spillat &&
1802		    !atomic_load_acquire(&fctx->res->exiting))
1803		{
1804			old_spillat = fctx->res->spillat;
1805			fctx->res->spillat += 5;
1806			if (fctx->res->spillat > fctx->res->spillatmax &&
1807			    fctx->res->spillatmax != 0)
1808			{
1809				fctx->res->spillat = fctx->res->spillatmax;
1810			}
1811			new_spillat = fctx->res->spillat;
1812			if (new_spillat != old_spillat) {
1813				logit = true;
1814			}
1815			isc_interval_set(&i, 20 * 60, 0);
1816			result = isc_timer_reset(fctx->res->spillattimer,
1817						 isc_timertype_ticker, NULL, &i,
1818						 true);
1819			RUNTIME_CHECK(result == ISC_R_SUCCESS);
1820		}
1821		UNLOCK(&fctx->res->lock);
1822		if (logit) {
1823			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
1824				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
1825				      "clients-per-query increased to %u",
1826				      new_spillat);
1827		}
1828	}
1829}
1830
1831static void
1832fctx__done_detach(fetchctx_t **fctxp, isc_result_t result, const char *file,
1833		  unsigned int line, const char *func) {
1834	fetchctx_t *fctx = NULL;
1835	dns_resolver_t *res = NULL;
1836	bool no_response = false;
1837	bool age_untried = false;
1838
1839	REQUIRE(fctxp != NULL && VALID_FCTX(*fctxp));
1840
1841	fctx = *fctxp;
1842	res = fctx->res;
1843
1844	FCTXTRACE("done");
1845
1846#ifdef FCTX_TRACE
1847	fprintf(stderr, "%s:%s:%u:%s(%p, %p): %s\n", func, file, line, __func__,
1848		fctx, fctxp, isc_result_totext(result));
1849#else
1850	UNUSED(file);
1851	UNUSED(line);
1852	UNUSED(func);
1853#endif
1854
1855	LOCK(&res->buckets[fctx->bucketnum].lock);
1856	INSIST(fctx->state != fetchstate_done);
1857	fctx->state = fetchstate_done;
1858	UNLOCK(&res->buckets[fctx->bucketnum].lock);
1859
1860	if (result == ISC_R_SUCCESS) {
1861		if (fctx->qmin_warning != ISC_R_SUCCESS) {
1862			isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
1863				      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
1864				      "success resolving '%s' "
1865				      "after disabling qname minimization due "
1866				      "to '%s'",
1867				      fctx->info,
1868				      isc_result_totext(fctx->qmin_warning));
1869		}
1870
1871		/*
1872		 * A success result indicates we got a response to a
1873		 * query. That query should be canceled already. If
1874		 * there still are any outstanding queries attached to the
1875		 * same fctx, then those have *not* gotten a response,
1876		 * so we set 'no_response' to true here: that way, when
1877		 * we run fctx_cancelqueries() below, the SRTTs will
1878		 * be adjusted.
1879		 */
1880		no_response = true;
1881	} else if (result == ISC_R_TIMEDOUT) {
1882		age_untried = true;
1883	}
1884
1885	fctx->qmin_warning = ISC_R_SUCCESS;
1886
1887	fctx_cancelqueries(fctx, no_response, age_untried);
1888	fctx_stoptimer(fctx);
1889
1890	LOCK(&res->buckets[fctx->bucketnum].lock);
1891	FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
1892	fctx_sendevents(fctx, result, line);
1893	fctx_shutdown(fctx);
1894	UNLOCK(&res->buckets[fctx->bucketnum].lock);
1895
1896	fctx_detach(fctxp);
1897}
1898
1899static void
1900resquery_senddone(isc_result_t eresult, isc_region_t *region, void *arg) {
1901	resquery_t *query = (resquery_t *)arg;
1902	resquery_t *copy = query;
1903	fetchctx_t *fctx = NULL;
1904
1905	QTRACE("senddone");
1906
1907	UNUSED(region);
1908
1909	fctx = query->fctx;
1910
1911	if (RESQUERY_CANCELED(query)) {
1912		goto detach;
1913	}
1914
1915	/*
1916	 * See the note in resquery_connected() about reference
1917	 * counting on error conditions.
1918	 */
1919	switch (eresult) {
1920	case ISC_R_SUCCESS:
1921	case ISC_R_CANCELED:
1922	case ISC_R_SHUTTINGDOWN:
1923		break;
1924
1925	case ISC_R_HOSTUNREACH:
1926	case ISC_R_NETUNREACH:
1927	case ISC_R_NOPERM:
1928	case ISC_R_ADDRNOTAVAIL:
1929	case ISC_R_CONNREFUSED:
1930		/* No route to remote. */
1931		FCTXTRACE3("query canceled in resquery_senddone(): "
1932			   "no route to host; no response",
1933			   eresult);
1934		add_bad(fctx, query->rmessage, query->addrinfo, eresult,
1935			badns_unreachable);
1936		fctx_cancelquery(&copy, NULL, true, false);
1937		FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
1938		fctx_try(fctx, true, false);
1939		break;
1940
1941	default:
1942		FCTXTRACE3("query canceled in resquery_senddone() "
1943			   "due to unexpected result; responding",
1944			   eresult);
1945		fctx_cancelquery(&copy, NULL, false, false);
1946		fctx_done_detach(&fctx, eresult);
1947		break;
1948	}
1949
1950detach:
1951	resquery_detach(&query);
1952}
1953
1954static isc_result_t
1955fctx_addopt(dns_message_t *message, unsigned int version, uint16_t udpsize,
1956	    dns_ednsopt_t *ednsopts, size_t count) {
1957	dns_rdataset_t *rdataset = NULL;
1958	isc_result_t result;
1959
1960	result = dns_message_buildopt(message, &rdataset, version, udpsize,
1961				      DNS_MESSAGEEXTFLAG_DO, ednsopts, count);
1962	if (result != ISC_R_SUCCESS) {
1963		return (result);
1964	}
1965	return (dns_message_setopt(message, rdataset));
1966}
1967
1968static void
1969fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
1970	unsigned int seconds, us;
1971	uint64_t limit;
1972	isc_time_t now;
1973
1974	/*
1975	 * Has this fetch already expired?
1976	 */
1977	isc_time_now(&now);
1978	limit = isc_time_microdiff(&fctx->expires, &now);
1979	if (limit < US_PER_MS) {
1980		FCTXTRACE("fetch already expired");
1981		isc_interval_set(&fctx->interval, 0, 0);
1982		return;
1983	}
1984
1985	us = fctx->res->retryinterval * US_PER_MS;
1986
1987	/*
1988	 * Exponential backoff after the first few tries.
1989	 */
1990	if (fctx->restarts > fctx->res->nonbackofftries) {
1991		int shift = fctx->restarts - fctx->res->nonbackofftries;
1992		if (shift > 6) {
1993			shift = 6;
1994		}
1995		us <<= shift;
1996	}
1997
1998	/*
1999	 * Add a fudge factor to the expected rtt based on the current
2000	 * estimate.
2001	 */
2002	if (rtt < 50000) {
2003		rtt += 50000;
2004	} else if (rtt < 100000) {
2005		rtt += 100000;
2006	} else {
2007		rtt += 200000;
2008	}
2009
2010	/*
2011	 * Always wait for at least the expected rtt.
2012	 */
2013	if (us < rtt) {
2014		us = rtt;
2015	}
2016
2017	/*
2018	 * But don't wait past the stale timeout (if any), the final
2019	 * expiration of the fetch, or for more than 10 seconds total.
2020	 */
2021	if ((fctx->options & DNS_FETCHOPT_TRYSTALE_ONTIMEOUT) != 0) {
2022		uint64_t stale = isc_time_microdiff(&fctx->expires_try_stale,
2023						    &now);
2024		if (stale >= US_PER_MS && us > stale) {
2025			FCTXTRACE("setting stale timeout");
2026			us = stale;
2027		}
2028	}
2029	if (us > limit) {
2030		us = limit;
2031	}
2032	if (us > MAX_SINGLE_QUERY_TIMEOUT_US) {
2033		us = MAX_SINGLE_QUERY_TIMEOUT_US;
2034	}
2035
2036	seconds = us / US_PER_SEC;
2037	us -= seconds * US_PER_SEC;
2038	isc_interval_set(&fctx->interval, seconds, us * NS_PER_US);
2039	isc_time_nowplusinterval(&fctx->next_timeout, &fctx->interval);
2040}
2041
2042static isc_result_t
2043resquery_timeout(resquery_t *query) {
2044	fetchctx_t *fctx = query->fctx;
2045	dns_fetchevent_t *event = NULL, *next = NULL;
2046	uint64_t timeleft;
2047	isc_time_t now;
2048
2049	FCTXTRACE("timeout");
2050
2051	/*
2052	 * If not configured for serve-stale, do nothing.
2053	 */
2054	if ((fctx->options & DNS_FETCHOPT_TRYSTALE_ONTIMEOUT) == 0) {
2055		return (ISC_R_SUCCESS);
2056	}
2057
2058	/*
2059	 * If we haven't reached the serve-stale timeout, do nothing.
2060	 * (Note that netmgr timeouts have millisecond accuracy, so
2061	 * anything less than 1000 microseconds is close enough to zero.)
2062	 */
2063	isc_time_now(&now);
2064	timeleft = isc_time_microdiff(&fctx->expires_try_stale, &now);
2065	if (timeleft >= US_PER_MS) {
2066		return (ISC_R_SUCCESS);
2067	}
2068
2069	/*
2070	 * Send the TRYSTALE events.
2071	 */
2072	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
2073	for (event = ISC_LIST_HEAD(fctx->events); event != NULL; event = next) {
2074		isc_task_t *sender = NULL;
2075
2076		next = ISC_LIST_NEXT(event, ev_link);
2077		if (event->ev_type != DNS_EVENT_TRYSTALE) {
2078			continue;
2079		}
2080
2081		ISC_LIST_UNLINK(fctx->events, event, ev_link);
2082		sender = event->ev_sender;
2083		event->vresult = ISC_R_TIMEDOUT;
2084		event->result = ISC_R_TIMEDOUT;
2085		isc_task_sendanddetach(&sender, ISC_EVENT_PTR(&event));
2086	}
2087	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
2088
2089	/*
2090	 * If the next timeout is more than 1ms in the future,
2091	 * resume waiting.
2092	 */
2093	timeleft = isc_time_microdiff(&fctx->next_timeout, &now);
2094	if (timeleft >= US_PER_MS) {
2095		dns_dispatch_resume(query->dispentry, (timeleft / US_PER_MS));
2096		return (ISC_R_COMPLETE);
2097	}
2098
2099	return (ISC_R_SUCCESS);
2100}
2101
2102static isc_result_t
2103fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
2104	   unsigned int options) {
2105	isc_result_t result;
2106	dns_resolver_t *res = NULL;
2107	resquery_t *query = NULL;
2108	isc_sockaddr_t addr;
2109	bool have_addr = false;
2110	unsigned int srtt;
2111
2112	FCTXTRACE("query");
2113
2114	res = fctx->res;
2115
2116	srtt = addrinfo->srtt;
2117
2118	/*
2119	 * Allow an additional second for the kernel to resend the SYN
2120	 * (or SYN without ECN in the case of stupid firewalls blocking
2121	 * ECN negotiation) over the current RTT estimate.
2122	 */
2123	if ((options & DNS_FETCHOPT_TCP) != 0) {
2124		srtt += US_PER_SEC;
2125	}
2126
2127	/*
2128	 * A forwarder needs to make multiple queries. Give it at least
2129	 * a second to do these in.
2130	 */
2131	if (ISFORWARDER(addrinfo) && srtt < US_PER_SEC) {
2132		srtt = US_PER_SEC;
2133	}
2134
2135	fctx_setretryinterval(fctx, srtt);
2136	if (isc_interval_iszero(&fctx->interval)) {
2137		FCTXTRACE("fetch expired");
2138		return (ISC_R_TIMEDOUT);
2139	}
2140
2141	INSIST(ISC_LIST_EMPTY(fctx->validators));
2142
2143	query = isc_mem_get(fctx->mctx, sizeof(*query));
2144	*query = (resquery_t){
2145		.mctx = fctx->mctx,
2146		.options = options,
2147		.addrinfo = addrinfo,
2148		.dispatchmgr = res->dispatchmgr,
2149		.link = ISC_LINK_INITIALIZER,
2150	};
2151
2152	isc_refcount_init(&query->references, 1);
2153
2154	/*
2155	 * Note that the caller MUST guarantee that 'addrinfo' will
2156	 * remain valid until this query is canceled.
2157	 */
2158
2159	dns_message_create(fctx->mctx, DNS_MESSAGE_INTENTPARSE,
2160			   &query->rmessage);
2161	TIME_NOW(&query->start);
2162
2163	/*
2164	 * If this is a TCP query, then we need to make a socket and
2165	 * a dispatch for it here.  Otherwise we use the resolver's
2166	 * shared dispatch.
2167	 */
2168	if (res->view->peers != NULL) {
2169		dns_peer_t *peer = NULL;
2170		isc_netaddr_t dstip;
2171		bool usetcp = false;
2172		isc_netaddr_fromsockaddr(&dstip, &addrinfo->sockaddr);
2173		result = dns_peerlist_peerbyaddr(res->view->peers, &dstip,
2174						 &peer);
2175		if (result == ISC_R_SUCCESS) {
2176			result = dns_peer_getquerysource(peer, &addr);
2177			if (result == ISC_R_SUCCESS) {
2178				have_addr = true;
2179			}
2180			result = dns_peer_getforcetcp(peer, &usetcp);
2181			if (result == ISC_R_SUCCESS && usetcp) {
2182				query->options |= DNS_FETCHOPT_TCP;
2183			}
2184		}
2185	}
2186
2187	if ((query->options & DNS_FETCHOPT_TCP) != 0) {
2188		int pf;
2189
2190		pf = isc_sockaddr_pf(&addrinfo->sockaddr);
2191		if (!have_addr) {
2192			switch (pf) {
2193			case PF_INET:
2194				result = dns_dispatch_getlocaladdress(
2195					res->dispatches4->dispatches[0], &addr);
2196				break;
2197			case PF_INET6:
2198				result = dns_dispatch_getlocaladdress(
2199					res->dispatches6->dispatches[0], &addr);
2200				break;
2201			default:
2202				result = ISC_R_NOTIMPLEMENTED;
2203				break;
2204			}
2205			if (result != ISC_R_SUCCESS) {
2206				goto cleanup_query;
2207			}
2208		}
2209		isc_sockaddr_setport(&addr, 0);
2210
2211		result = dns_dispatch_createtcp(res->dispatchmgr, &addr,
2212						&addrinfo->sockaddr,
2213						&query->dispatch);
2214		if (result != ISC_R_SUCCESS) {
2215			goto cleanup_query;
2216		}
2217
2218		FCTXTRACE("connecting via TCP");
2219	} else {
2220		if (have_addr) {
2221			result = dns_dispatch_createudp(res->dispatchmgr, &addr,
2222							&query->dispatch);
2223			if (result != ISC_R_SUCCESS) {
2224				goto cleanup_query;
2225			}
2226		} else {
2227			switch (isc_sockaddr_pf(&addrinfo->sockaddr)) {
2228			case PF_INET:
2229				dns_dispatch_attach(
2230					dns_resolver_dispatchv4(res),
2231					&query->dispatch);
2232				break;
2233			case PF_INET6:
2234				dns_dispatch_attach(
2235					dns_resolver_dispatchv6(res),
2236					&query->dispatch);
2237				break;
2238			default:
2239				result = ISC_R_NOTIMPLEMENTED;
2240				goto cleanup_query;
2241			}
2242		}
2243
2244		/*
2245		 * We should always have a valid dispatcher here.  If we
2246		 * don't support a protocol family, then its dispatcher
2247		 * will be NULL, but we shouldn't be finding addresses
2248		 * for protocol types we don't support, so the
2249		 * dispatcher we found should never be NULL.
2250		 */
2251		INSIST(query->dispatch != NULL);
2252	}
2253
2254	fctx_attach(fctx, &query->fctx);
2255	query->magic = QUERY_MAGIC;
2256
2257	if ((query->options & DNS_FETCHOPT_TCP) == 0) {
2258		if (dns_adbentry_overquota(addrinfo->entry)) {
2259			result = ISC_R_QUOTA;
2260			goto cleanup_dispatch;
2261		}
2262
2263		/* Inform the ADB that we're starting a UDP fetch */
2264		dns_adb_beginudpfetch(fctx->adb, addrinfo);
2265	}
2266
2267	LOCK(&res->buckets[fctx->bucketnum].lock);
2268	ISC_LIST_APPEND(fctx->queries, query, link);
2269	atomic_fetch_add_relaxed(&fctx->nqueries, 1);
2270	UNLOCK(&res->buckets[fctx->bucketnum].lock);
2271
2272	/* Set up the dispatch and set the query ID */
2273	result = dns_dispatch_add(
2274		query->dispatch, 0, isc_interval_ms(&fctx->interval),
2275		&query->addrinfo->sockaddr, resquery_connected,
2276		resquery_senddone, resquery_response, query, &query->id,
2277		&query->dispentry);
2278	if (result != ISC_R_SUCCESS) {
2279		goto cleanup_udpfetch;
2280	}
2281
2282	/* Connect the socket */
2283	resquery_attach(query, &(resquery_t *){ NULL });
2284	result = dns_dispatch_connect(query->dispentry);
2285
2286	RUNTIME_CHECK(result == ISC_R_SUCCESS);
2287
2288	return (result);
2289
2290cleanup_udpfetch:
2291	if (!RESQUERY_CANCELED(query)) {
2292		if ((query->options & DNS_FETCHOPT_TCP) == 0) {
2293			/* Inform the ADB that we're ending a UDP fetch */
2294			dns_adb_endudpfetch(fctx->adb, addrinfo);
2295		}
2296	}
2297
2298	LOCK(&res->buckets[fctx->bucketnum].lock);
2299	if (ISC_LINK_LINKED(query, link)) {
2300		atomic_fetch_sub_release(&fctx->nqueries, 1);
2301		ISC_LIST_UNLINK(fctx->queries, query, link);
2302	}
2303	UNLOCK(&res->buckets[fctx->bucketnum].lock);
2304
2305cleanup_dispatch:
2306	fctx_detach(&query->fctx);
2307
2308	if (query->dispatch != NULL) {
2309		dns_dispatch_detach(&query->dispatch);
2310	}
2311
2312cleanup_query:
2313	query->magic = 0;
2314	dns_message_detach(&query->rmessage);
2315	isc_mem_put(fctx->mctx, query, sizeof(*query));
2316
2317	return (result);
2318}
2319
2320static bool
2321bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
2322	isc_sockaddr_t *sa;
2323
2324	for (sa = ISC_LIST_HEAD(fctx->bad_edns); sa != NULL;
2325	     sa = ISC_LIST_NEXT(sa, link))
2326	{
2327		if (isc_sockaddr_equal(sa, address)) {
2328			return (true);
2329		}
2330	}
2331
2332	return (false);
2333}
2334
2335static void
2336add_bad_edns(fetchctx_t *fctx, isc_sockaddr_t *address) {
2337	isc_sockaddr_t *sa;
2338
2339#ifdef ENABLE_AFL
2340	if (dns_fuzzing_resolver) {
2341		return;
2342	}
2343#endif /* ifdef ENABLE_AFL */
2344	if (bad_edns(fctx, address)) {
2345		return;
2346	}
2347
2348	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
2349
2350	*sa = *address;
2351	ISC_LIST_INITANDAPPEND(fctx->bad_edns, sa, link);
2352}
2353
2354static struct tried *
2355triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
2356	struct tried *tried;
2357
2358	for (tried = ISC_LIST_HEAD(fctx->edns); tried != NULL;
2359	     tried = ISC_LIST_NEXT(tried, link))
2360	{
2361		if (isc_sockaddr_equal(&tried->addr, address)) {
2362			return (tried);
2363		}
2364	}
2365
2366	return (NULL);
2367}
2368
2369static void
2370add_triededns(fetchctx_t *fctx, isc_sockaddr_t *address) {
2371	struct tried *tried;
2372
2373	tried = triededns(fctx, address);
2374	if (tried != NULL) {
2375		tried->count++;
2376		return;
2377	}
2378
2379	tried = isc_mem_get(fctx->mctx, sizeof(*tried));
2380
2381	tried->addr = *address;
2382	tried->count = 1;
2383	ISC_LIST_INITANDAPPEND(fctx->edns, tried, link);
2384}
2385
2386static size_t
2387addr2buf(void *buf, const size_t bufsize, const isc_sockaddr_t *sockaddr) {
2388	isc_netaddr_t netaddr;
2389	isc_netaddr_fromsockaddr(&netaddr, sockaddr);
2390	switch (netaddr.family) {
2391	case AF_INET:
2392		INSIST(bufsize >= 4);
2393		memmove(buf, &netaddr.type.in, 4);
2394		return (4);
2395	case AF_INET6:
2396		INSIST(bufsize >= 16);
2397		memmove(buf, &netaddr.type.in6, 16);
2398		return (16);
2399	default:
2400		UNREACHABLE();
2401	}
2402	return (0);
2403}
2404
2405static size_t
2406add_serveraddr(uint8_t *buf, const size_t bufsize, const resquery_t *query) {
2407	return (addr2buf(buf, bufsize, &query->addrinfo->sockaddr));
2408}
2409
2410/*
2411 * Client cookie is 8 octets.
2412 * Server cookie is [8..32] octets.
2413 */
2414#define CLIENT_COOKIE_SIZE 8U
2415#define COOKIE_BUFFER_SIZE (8U + 32U)
2416
2417static void
2418compute_cc(const resquery_t *query, uint8_t *cookie, const size_t len) {
2419	INSIST(len >= CLIENT_COOKIE_SIZE);
2420	STATIC_ASSERT(sizeof(query->fctx->res->view->secret) >=
2421			      ISC_SIPHASH24_KEY_LENGTH,
2422		      "The view->secret size can't fit SipHash 2-4 key "
2423		      "length");
2424
2425	uint8_t buf[16] ISC_NONSTRING = { 0 };
2426	size_t buflen = add_serveraddr(buf, sizeof(buf), query);
2427
2428	uint8_t digest[ISC_SIPHASH24_TAG_LENGTH] ISC_NONSTRING = { 0 };
2429	isc_siphash24(query->fctx->res->view->secret, buf, buflen, digest);
2430	memmove(cookie, digest, CLIENT_COOKIE_SIZE);
2431}
2432
2433static isc_result_t
2434issecuredomain(dns_view_t *view, const dns_name_t *name, dns_rdatatype_t type,
2435	       isc_stdtime_t now, bool checknta, bool *ntap, bool *issecure) {
2436	dns_name_t suffix;
2437	unsigned int labels;
2438
2439	/*
2440	 * For DS variants we need to check fom the parent domain,
2441	 * since there may be a negative trust anchor for the name,
2442	 * while the enclosing domain where the DS record lives is
2443	 * under a secure entry point.
2444	 */
2445	labels = dns_name_countlabels(name);
2446	if (dns_rdatatype_atparent(type) && labels > 1) {
2447		dns_name_init(&suffix, NULL);
2448		dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
2449		name = &suffix;
2450	}
2451
2452	return (dns_view_issecuredomain(view, name, now, checknta, ntap,
2453					issecure));
2454}
2455
2456static isc_result_t
2457resquery_send(resquery_t *query) {
2458	isc_result_t result;
2459	fetchctx_t *fctx = query->fctx;
2460	dns_resolver_t *res = fctx->res;
2461	isc_buffer_t buffer;
2462	dns_name_t *qname = NULL;
2463	dns_rdataset_t *qrdataset = NULL;
2464	isc_region_t r;
2465	isc_netaddr_t ipaddr;
2466	dns_tsigkey_t *tsigkey = NULL;
2467	dns_peer_t *peer = NULL;
2468	dns_compress_t cctx;
2469	bool cleanup_cctx = false;
2470	bool useedns;
2471	bool secure_domain;
2472	bool tcp = ((query->options & DNS_FETCHOPT_TCP) != 0);
2473	dns_ednsopt_t ednsopts[DNS_EDNSOPTIONS];
2474	unsigned ednsopt = 0;
2475	uint16_t hint = 0, udpsize = 0; /* No EDNS */
2476#ifdef HAVE_DNSTAP
2477	isc_sockaddr_t localaddr, *la = NULL;
2478	unsigned char zone[DNS_NAME_MAXWIRE];
2479	dns_dtmsgtype_t dtmsgtype;
2480	isc_region_t zr;
2481	isc_buffer_t zb;
2482#endif /* HAVE_DNSTAP */
2483
2484	QTRACE("send");
2485
2486	if (atomic_load_acquire(&res->exiting)) {
2487		FCTXTRACE("resquery_send: resolver shutting down");
2488		return (ISC_R_SHUTTINGDOWN);
2489	}
2490
2491	result = dns_message_gettempname(fctx->qmessage, &qname);
2492	if (result != ISC_R_SUCCESS) {
2493		goto cleanup_temps;
2494	}
2495	result = dns_message_gettemprdataset(fctx->qmessage, &qrdataset);
2496	if (result != ISC_R_SUCCESS) {
2497		goto cleanup_temps;
2498	}
2499
2500	fctx->qmessage->opcode = dns_opcode_query;
2501
2502	/*
2503	 * Set up question.
2504	 */
2505	dns_name_clone(fctx->name, qname);
2506	dns_rdataset_makequestion(qrdataset, res->rdclass, fctx->type);
2507	ISC_LIST_APPEND(qname->list, qrdataset, link);
2508	dns_message_addname(fctx->qmessage, qname, DNS_SECTION_QUESTION);
2509	qname = NULL;
2510	qrdataset = NULL;
2511
2512	/*
2513	 * Set RD if the client has requested that we do a recursive
2514	 * query, or if we're sending to a forwarder.
2515	 */
2516	if ((query->options & DNS_FETCHOPT_RECURSIVE) != 0 ||
2517	    ISFORWARDER(query->addrinfo))
2518	{
2519		fctx->qmessage->flags |= DNS_MESSAGEFLAG_RD;
2520	}
2521
2522	/*
2523	 * Set CD if the client says not to validate, or if the
2524	 * question is under a secure entry point and this is a
2525	 * recursive/forward query -- unless the client said not to.
2526	 */
2527	if ((query->options & DNS_FETCHOPT_NOCDFLAG) != 0) {
2528		/* Do nothing */
2529	} else if ((query->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
2530		fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
2531	} else if (res->view->enablevalidation &&
2532		   ((fctx->qmessage->flags & DNS_MESSAGEFLAG_RD) != 0))
2533	{
2534		bool checknta = ((query->options & DNS_FETCHOPT_NONTA) == 0);
2535		bool ntacovered = false;
2536		result = issecuredomain(res->view, fctx->name, fctx->type,
2537					isc_time_seconds(&query->start),
2538					checknta, &ntacovered, &secure_domain);
2539		if (result != ISC_R_SUCCESS) {
2540			secure_domain = false;
2541		}
2542		if (secure_domain ||
2543		    (ISFORWARDER(query->addrinfo) && ntacovered))
2544		{
2545			fctx->qmessage->flags |= DNS_MESSAGEFLAG_CD;
2546		}
2547	}
2548
2549	/*
2550	 * We don't have to set opcode because it defaults to query.
2551	 */
2552	fctx->qmessage->id = query->id;
2553
2554	/*
2555	 * Convert the question to wire format.
2556	 */
2557	result = dns_compress_init(&cctx, -1, fctx->res->mctx);
2558	if (result != ISC_R_SUCCESS) {
2559		goto cleanup_message;
2560	}
2561	cleanup_cctx = true;
2562
2563	isc_buffer_init(&buffer, query->data, sizeof(query->data));
2564	result = dns_message_renderbegin(fctx->qmessage, &cctx, &buffer);
2565	if (result != ISC_R_SUCCESS) {
2566		goto cleanup_message;
2567	}
2568
2569	result = dns_message_rendersection(fctx->qmessage, DNS_SECTION_QUESTION,
2570					   0);
2571	if (result != ISC_R_SUCCESS) {
2572		goto cleanup_message;
2573	}
2574
2575	isc_netaddr_fromsockaddr(&ipaddr, &query->addrinfo->sockaddr);
2576	(void)dns_peerlist_peerbyaddr(fctx->res->view->peers, &ipaddr, &peer);
2577
2578	/*
2579	 * The ADB does not know about servers with "edns no".  Check
2580	 * this, and then inform the ADB for future use.
2581	 */
2582	if ((query->addrinfo->flags & FCTX_ADDRINFO_NOEDNS0) == 0 &&
2583	    peer != NULL &&
2584	    dns_peer_getsupportedns(peer, &useedns) == ISC_R_SUCCESS &&
2585	    !useedns)
2586	{
2587		query->options |= DNS_FETCHOPT_NOEDNS0;
2588		dns_adb_changeflags(fctx->adb, query->addrinfo,
2589				    FCTX_ADDRINFO_NOEDNS0,
2590				    FCTX_ADDRINFO_NOEDNS0);
2591	}
2592
2593	/* Sync NOEDNS0 flag in addrinfo->flags and options now. */
2594	if ((query->addrinfo->flags & FCTX_ADDRINFO_NOEDNS0) != 0) {
2595		query->options |= DNS_FETCHOPT_NOEDNS0;
2596	}
2597
2598	if (fctx->timeout && (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
2599		isc_sockaddr_t *sockaddr = &query->addrinfo->sockaddr;
2600		struct tried *tried;
2601
2602		/*
2603		 * If this is the first timeout for this server in this
2604		 * fetch context, try setting EDNS UDP buffer size to
2605		 * the largest UDP response size we have seen from this
2606		 * server so far.
2607		 *
2608		 * If this server has already timed out twice or more in
2609		 * this fetch context, force TCP.
2610		 */
2611		if ((tried = triededns(fctx, sockaddr)) != NULL) {
2612			if (tried->count == 1U) {
2613				hint = dns_adb_getudpsize(fctx->adb,
2614							  query->addrinfo);
2615			} else if (tried->count >= 2U) {
2616				if ((query->options & DNS_FETCHOPT_TCP) == 0) {
2617					/*
2618					 * Inform the ADB that we're ending a
2619					 * UDP fetch, and turn the query into
2620					 * a TCP query.
2621					 */
2622					dns_adb_endudpfetch(fctx->adb,
2623							    query->addrinfo);
2624					query->options |= DNS_FETCHOPT_TCP;
2625				}
2626			}
2627		}
2628	}
2629	fctx->timeout = false;
2630
2631	/*
2632	 * Use EDNS0, unless the caller doesn't want it, or we know that
2633	 * the remote server doesn't like it.
2634	 */
2635	if ((query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
2636		if ((query->addrinfo->flags & FCTX_ADDRINFO_NOEDNS0) == 0) {
2637			uint16_t peerudpsize = 0;
2638			unsigned int version = DNS_EDNS_VERSION;
2639			unsigned int flags = query->addrinfo->flags;
2640			bool reqnsid = res->view->requestnsid;
2641			bool sendcookie = res->view->sendcookie;
2642			bool tcpkeepalive = false;
2643			unsigned char cookie[COOKIE_BUFFER_SIZE];
2644			uint16_t padding = 0;
2645
2646			/*
2647			 * Set the default UDP size to what was
2648			 * configured as 'edns-buffer-size'
2649			 */
2650			udpsize = res->udpsize;
2651
2652			/*
2653			 * This server timed out for the first time in
2654			 * this fetch context and we received a response
2655			 * from it before (either in this fetch context
2656			 * or in a different one).  Set 'udpsize' to the
2657			 * size of the largest UDP response we have
2658			 * received from this server so far.
2659			 */
2660			if (hint != 0U) {
2661				udpsize = hint;
2662			}
2663
2664			/*
2665			 * If a fixed EDNS UDP buffer size is configured
2666			 * for this server, make sure we obey that.
2667			 */
2668			if (peer != NULL) {
2669				(void)dns_peer_getudpsize(peer, &peerudpsize);
2670				if (peerudpsize != 0) {
2671					udpsize = peerudpsize;
2672				}
2673			}
2674
2675			if ((flags & DNS_FETCHOPT_EDNSVERSIONSET) != 0) {
2676				version = flags & DNS_FETCHOPT_EDNSVERSIONMASK;
2677				version >>= DNS_FETCHOPT_EDNSVERSIONSHIFT;
2678			}
2679
2680			/* Request NSID/COOKIE/VERSION for current peer?
2681			 */
2682			if (peer != NULL) {
2683				uint8_t ednsversion;
2684				(void)dns_peer_getrequestnsid(peer, &reqnsid);
2685				(void)dns_peer_getsendcookie(peer, &sendcookie);
2686				result = dns_peer_getednsversion(peer,
2687								 &ednsversion);
2688				if (result == ISC_R_SUCCESS &&
2689				    ednsversion < version)
2690				{
2691					version = ednsversion;
2692				}
2693			}
2694			if (NOCOOKIE(query->addrinfo)) {
2695				sendcookie = false;
2696			}
2697			if (reqnsid) {
2698				INSIST(ednsopt < DNS_EDNSOPTIONS);
2699				ednsopts[ednsopt].code = DNS_OPT_NSID;
2700				ednsopts[ednsopt].length = 0;
2701				ednsopts[ednsopt].value = NULL;
2702				ednsopt++;
2703			}
2704			if (sendcookie) {
2705				INSIST(ednsopt < DNS_EDNSOPTIONS);
2706				ednsopts[ednsopt].code = DNS_OPT_COOKIE;
2707				ednsopts[ednsopt].length =
2708					(uint16_t)dns_adb_getcookie(
2709						fctx->adb, query->addrinfo,
2710						cookie, sizeof(cookie));
2711				if (ednsopts[ednsopt].length != 0) {
2712					ednsopts[ednsopt].value = cookie;
2713					inc_stats(
2714						fctx->res,
2715						dns_resstatscounter_cookieout);
2716				} else {
2717					compute_cc(query, cookie,
2718						   CLIENT_COOKIE_SIZE);
2719					ednsopts[ednsopt].value = cookie;
2720					ednsopts[ednsopt].length =
2721						CLIENT_COOKIE_SIZE;
2722					inc_stats(
2723						fctx->res,
2724						dns_resstatscounter_cookienew);
2725				}
2726				ednsopt++;
2727			}
2728
2729			/* Add TCP keepalive option if appropriate */
2730			if ((peer != NULL) && tcp) {
2731				(void)dns_peer_gettcpkeepalive(peer,
2732							       &tcpkeepalive);
2733			}
2734			if (tcpkeepalive) {
2735				INSIST(ednsopt < DNS_EDNSOPTIONS);
2736				ednsopts[ednsopt].code = DNS_OPT_TCP_KEEPALIVE;
2737				ednsopts[ednsopt].length = 0;
2738				ednsopts[ednsopt].value = NULL;
2739				ednsopt++;
2740			}
2741
2742			/* Add PAD for current peer? Require TCP for now
2743			 */
2744			if ((peer != NULL) && tcp) {
2745				(void)dns_peer_getpadding(peer, &padding);
2746			}
2747			if (padding != 0) {
2748				INSIST(ednsopt < DNS_EDNSOPTIONS);
2749				ednsopts[ednsopt].code = DNS_OPT_PAD;
2750				ednsopts[ednsopt].length = 0;
2751				ednsopt++;
2752				dns_message_setpadding(fctx->qmessage, padding);
2753			}
2754
2755			query->ednsversion = version;
2756			result = fctx_addopt(fctx->qmessage, version, udpsize,
2757					     ednsopts, ednsopt);
2758			if (reqnsid && result == ISC_R_SUCCESS) {
2759				query->options |= DNS_FETCHOPT_WANTNSID;
2760			} else if (result != ISC_R_SUCCESS) {
2761				/*
2762				 * We couldn't add the OPT, but we'll
2763				 * press on. We're not using EDNS0, so
2764				 * set the NOEDNS0 bit.
2765				 */
2766				query->options |= DNS_FETCHOPT_NOEDNS0;
2767				query->ednsversion = -1;
2768				udpsize = 0;
2769			}
2770		} else {
2771			/*
2772			 * We know this server doesn't like EDNS0, so we
2773			 * won't use it.  Set the NOEDNS0 bit since
2774			 * we're not using EDNS0.
2775			 */
2776			query->options |= DNS_FETCHOPT_NOEDNS0;
2777			query->ednsversion = -1;
2778		}
2779	} else {
2780		query->ednsversion = -1;
2781	}
2782
2783	/*
2784	 * Record the UDP EDNS size chosen.
2785	 */
2786	query->udpsize = udpsize;
2787
2788	/*
2789	 * If we need EDNS0 to do this query and aren't using it, we
2790	 * lose.
2791	 */
2792	if (NEEDEDNS0(fctx) && (query->options & DNS_FETCHOPT_NOEDNS0) != 0) {
2793		result = DNS_R_SERVFAIL;
2794		goto cleanup_message;
2795	}
2796
2797	add_triededns(fctx, &query->addrinfo->sockaddr);
2798
2799	/*
2800	 * Clear CD if EDNS is not in use.
2801	 */
2802	if ((query->options & DNS_FETCHOPT_NOEDNS0) != 0) {
2803		fctx->qmessage->flags &= ~DNS_MESSAGEFLAG_CD;
2804	}
2805
2806	/*
2807	 * Add TSIG record tailored to the current recipient.
2808	 */
2809	result = dns_view_getpeertsig(fctx->res->view, &ipaddr, &tsigkey);
2810	if (result != ISC_R_SUCCESS && result != ISC_R_NOTFOUND) {
2811		goto cleanup_message;
2812	}
2813
2814	if (tsigkey != NULL) {
2815		result = dns_message_settsigkey(fctx->qmessage, tsigkey);
2816		dns_tsigkey_detach(&tsigkey);
2817		if (result != ISC_R_SUCCESS) {
2818			goto cleanup_message;
2819		}
2820	}
2821
2822	result = dns_message_rendersection(fctx->qmessage,
2823					   DNS_SECTION_ADDITIONAL, 0);
2824	if (result != ISC_R_SUCCESS) {
2825		goto cleanup_message;
2826	}
2827
2828	result = dns_message_renderend(fctx->qmessage);
2829	if (result != ISC_R_SUCCESS) {
2830		goto cleanup_message;
2831	}
2832
2833#ifdef HAVE_DNSTAP
2834	memset(&zr, 0, sizeof(zr));
2835	isc_buffer_init(&zb, zone, sizeof(zone));
2836	dns_compress_setmethods(&cctx, DNS_COMPRESS_NONE);
2837	result = dns_name_towire(fctx->domain, &cctx, &zb);
2838	if (result == ISC_R_SUCCESS) {
2839		isc_buffer_usedregion(&zb, &zr);
2840	}
2841#endif /* HAVE_DNSTAP */
2842
2843	dns_compress_invalidate(&cctx);
2844	cleanup_cctx = false;
2845
2846	if (dns_message_gettsigkey(fctx->qmessage) != NULL) {
2847		dns_tsigkey_attach(dns_message_gettsigkey(fctx->qmessage),
2848				   &query->tsigkey);
2849		result = dns_message_getquerytsig(
2850			fctx->qmessage, fctx->res->mctx, &query->tsig);
2851		if (result != ISC_R_SUCCESS) {
2852			goto cleanup_message;
2853		}
2854	}
2855
2856	/*
2857	 * Log the outgoing packet.
2858	 */
2859	dns_message_logfmtpacket(
2860		fctx->qmessage, "sending packet to", &query->addrinfo->sockaddr,
2861		DNS_LOGCATEGORY_RESOLVER, DNS_LOGMODULE_PACKETS,
2862		&dns_master_style_comment, ISC_LOG_DEBUG(11), fctx->res->mctx);
2863
2864	/*
2865	 * We're now done with the query message.
2866	 */
2867	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
2868
2869	isc_buffer_usedregion(&buffer, &r);
2870
2871	resquery_attach(query, &(resquery_t *){ NULL });
2872	dns_dispatch_send(query->dispentry, &r);
2873
2874	QTRACE("sent");
2875
2876#ifdef HAVE_DNSTAP
2877	/*
2878	 * Log the outgoing query via dnstap.
2879	 */
2880	if ((fctx->qmessage->flags & DNS_MESSAGEFLAG_RD) != 0) {
2881		dtmsgtype = DNS_DTTYPE_FQ;
2882	} else {
2883		dtmsgtype = DNS_DTTYPE_RQ;
2884	}
2885
2886	result = dns_dispentry_getlocaladdress(query->dispentry, &localaddr);
2887	if (result == ISC_R_SUCCESS) {
2888		la = &localaddr;
2889	}
2890
2891	dns_dt_send(fctx->res->view, dtmsgtype, la, &query->addrinfo->sockaddr,
2892		    tcp, &zr, &query->start, NULL, &buffer);
2893#endif /* HAVE_DNSTAP */
2894
2895	return (ISC_R_SUCCESS);
2896
2897cleanup_message:
2898	if (cleanup_cctx) {
2899		dns_compress_invalidate(&cctx);
2900	}
2901
2902	dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER);
2903
2904	/*
2905	 * Stop the dispatcher from listening.
2906	 */
2907	dns_dispatch_done(&query->dispentry);
2908
2909cleanup_temps:
2910	if (qname != NULL) {
2911		dns_message_puttempname(fctx->qmessage, &qname);
2912	}
2913	if (qrdataset != NULL) {
2914		dns_message_puttemprdataset(fctx->qmessage, &qrdataset);
2915	}
2916
2917	return (result);
2918}
2919
2920static void
2921resquery_connected(isc_result_t eresult, isc_region_t *region, void *arg) {
2922	resquery_t *query = (resquery_t *)arg;
2923	resquery_t *copy = query;
2924	isc_result_t result;
2925	fetchctx_t *fctx = NULL;
2926	dns_resolver_t *res = NULL;
2927	int pf;
2928
2929	REQUIRE(VALID_QUERY(query));
2930
2931	QTRACE("connected");
2932
2933	UNUSED(region);
2934
2935	fctx = query->fctx;
2936	res = fctx->res;
2937
2938	if (RESQUERY_CANCELED(query)) {
2939		goto detach;
2940	}
2941
2942	if (atomic_load_acquire(&fctx->res->exiting)) {
2943		eresult = ISC_R_SHUTTINGDOWN;
2944	}
2945
2946	/*
2947	 * The reference counting of resquery objects is complex:
2948	 *
2949	 * 1. attached in fctx_query()
2950	 * 2. attached prior to dns_dispatch_connect(), detached in
2951	 *    resquery_connected()
2952	 * 3. attached prior to dns_dispatch_send(), detached in
2953	 *    resquery_senddone()
2954	 * 4. finally detached in fctx_cancelquery()
2955	 *
2956	 * On error conditions, it's necessary to call fctx_cancelquery()
2957	 * from resquery_connected() or _senddone(), detaching twice
2958	 * within the same function. To make it clear that's what's
2959	 * happening, we cancel-and-detach 'copy' and detach 'query',
2960	 * which are both pointing to the same object.
2961	 */
2962	switch (eresult) {
2963	case ISC_R_SUCCESS:
2964		/*
2965		 * We are connected. Send the query.
2966		 */
2967
2968		result = resquery_send(query);
2969		if (result != ISC_R_SUCCESS) {
2970			FCTXTRACE("query canceled: resquery_send() failed; "
2971				  "responding");
2972
2973			fctx_cancelquery(&copy, NULL, false, false);
2974			fctx_done_detach(&fctx, result);
2975			break;
2976		}
2977
2978		fctx->querysent++;
2979
2980		pf = isc_sockaddr_pf(&query->addrinfo->sockaddr);
2981		if (pf == PF_INET) {
2982			inc_stats(res, dns_resstatscounter_queryv4);
2983		} else {
2984			inc_stats(res, dns_resstatscounter_queryv6);
2985		}
2986		if (res->view->resquerystats != NULL) {
2987			dns_rdatatypestats_increment(res->view->resquerystats,
2988						     fctx->type);
2989		}
2990		break;
2991
2992	case ISC_R_CANCELED:
2993	case ISC_R_SHUTTINGDOWN:
2994		FCTXTRACE3("shutdown in resquery_connected()", eresult);
2995		fctx_cancelquery(&copy, NULL, true, false);
2996		fctx_done_detach(&fctx, eresult);
2997		break;
2998
2999	case ISC_R_NETUNREACH:
3000	case ISC_R_HOSTUNREACH:
3001	case ISC_R_CONNREFUSED:
3002	case ISC_R_NOPERM:
3003	case ISC_R_ADDRNOTAVAIL:
3004	case ISC_R_CONNECTIONRESET:
3005	case ISC_R_TIMEDOUT:
3006		/*
3007		 * Do not query this server again in this fetch context.
3008		 */
3009		FCTXTRACE3("query failed in resquery_connected(): "
3010			   "no response",
3011			   eresult);
3012		add_bad(fctx, query->rmessage, query->addrinfo, eresult,
3013			badns_unreachable);
3014		fctx_cancelquery(&copy, NULL, true, false);
3015
3016		FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
3017		fctx_try(fctx, true, false);
3018		break;
3019
3020	default:
3021		FCTXTRACE3("query canceled in resquery_connected() "
3022			   "due to unexpected result; responding",
3023			   eresult);
3024
3025		fctx_cancelquery(&copy, NULL, false, false);
3026		fctx_done_detach(&fctx, eresult);
3027		break;
3028	}
3029
3030detach:
3031	resquery_detach(&query);
3032}
3033
3034static void
3035fctx_finddone(isc_task_t *task, isc_event_t *event) {
3036	fetchctx_t *fctx = event->ev_arg;
3037	dns_adbfind_t *find = event->ev_sender;
3038	dns_resolver_t *res;
3039	bool want_try = false;
3040	bool want_done = false;
3041	unsigned int bucketnum;
3042	uint_fast32_t pending;
3043
3044	REQUIRE(VALID_FCTX(fctx));
3045	res = fctx->res;
3046
3047	UNUSED(task);
3048
3049	FCTXTRACE("finddone");
3050
3051	bucketnum = fctx->bucketnum;
3052	LOCK(&res->buckets[bucketnum].lock);
3053
3054	pending = atomic_fetch_sub_release(&fctx->pending, 1);
3055	INSIST(pending > 0);
3056
3057	if (ADDRWAIT(fctx)) {
3058		/*
3059		 * The fetch is waiting for a name to be found.
3060		 */
3061		INSIST(!SHUTTINGDOWN(fctx));
3062		if (event->ev_type == DNS_EVENT_ADBMOREADDRESSES) {
3063			FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
3064			want_try = true;
3065		} else {
3066			fctx->findfail++;
3067			if (atomic_load_acquire(&fctx->pending) == 0) {
3068				/*
3069				 * We've got nothing else to wait for
3070				 * and don't know the answer.  There's
3071				 * nothing to do but fail the fctx.
3072				 */
3073				FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
3074				want_done = true;
3075			}
3076		}
3077	}
3078
3079	isc_event_free(&event);
3080	UNLOCK(&res->buckets[bucketnum].lock);
3081
3082	dns_adb_destroyfind(&find);
3083
3084	if (want_done) {
3085		FCTXTRACE("fetch failed in finddone(); return "
3086			  "ISC_R_FAILURE");
3087
3088		/* Detach the extra reference from findname(). */
3089		fctx_unref(fctx);
3090		fctx_done_detach(&fctx, ISC_R_FAILURE);
3091	} else if (want_try) {
3092		fctx_try(fctx, true, false);
3093		fctx_detach(&fctx);
3094	} else {
3095		fctx_detach(&fctx);
3096	}
3097}
3098
3099static bool
3100bad_server(fetchctx_t *fctx, isc_sockaddr_t *address) {
3101	isc_sockaddr_t *sa;
3102
3103	for (sa = ISC_LIST_HEAD(fctx->bad); sa != NULL;
3104	     sa = ISC_LIST_NEXT(sa, link))
3105	{
3106		if (isc_sockaddr_equal(sa, address)) {
3107			return (true);
3108		}
3109	}
3110
3111	return (false);
3112}
3113
3114static bool
3115mark_bad(fetchctx_t *fctx) {
3116	dns_adbfind_t *curr;
3117	dns_adbaddrinfo_t *addrinfo;
3118	bool all_bad = true;
3119
3120#ifdef ENABLE_AFL
3121	if (dns_fuzzing_resolver) {
3122		return (false);
3123	}
3124#endif /* ifdef ENABLE_AFL */
3125
3126	/*
3127	 * Mark all known bad servers, so we don't try to talk to them
3128	 * again.
3129	 */
3130
3131	/*
3132	 * Mark any bad nameservers.
3133	 */
3134	for (curr = ISC_LIST_HEAD(fctx->finds); curr != NULL;
3135	     curr = ISC_LIST_NEXT(curr, publink))
3136	{
3137		for (addrinfo = ISC_LIST_HEAD(curr->list); addrinfo != NULL;
3138		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
3139		{
3140			if (bad_server(fctx, &addrinfo->sockaddr)) {
3141				addrinfo->flags |= FCTX_ADDRINFO_MARK;
3142			} else {
3143				all_bad = false;
3144			}
3145		}
3146	}
3147
3148	/*
3149	 * Mark any bad forwarders.
3150	 */
3151	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs); addrinfo != NULL;
3152	     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
3153	{
3154		if (bad_server(fctx, &addrinfo->sockaddr)) {
3155			addrinfo->flags |= FCTX_ADDRINFO_MARK;
3156		} else {
3157			all_bad = false;
3158		}
3159	}
3160
3161	/*
3162	 * Mark any bad alternates.
3163	 */
3164	for (curr = ISC_LIST_HEAD(fctx->altfinds); curr != NULL;
3165	     curr = ISC_LIST_NEXT(curr, publink))
3166	{
3167		for (addrinfo = ISC_LIST_HEAD(curr->list); addrinfo != NULL;
3168		     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
3169		{
3170			if (bad_server(fctx, &addrinfo->sockaddr)) {
3171				addrinfo->flags |= FCTX_ADDRINFO_MARK;
3172			} else {
3173				all_bad = false;
3174			}
3175		}
3176	}
3177
3178	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs); addrinfo != NULL;
3179	     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
3180	{
3181		if (bad_server(fctx, &addrinfo->sockaddr)) {
3182			addrinfo->flags |= FCTX_ADDRINFO_MARK;
3183		} else {
3184			all_bad = false;
3185		}
3186	}
3187
3188	return (all_bad);
3189}
3190
3191static void
3192add_bad(fetchctx_t *fctx, dns_message_t *rmessage, dns_adbaddrinfo_t *addrinfo,
3193	isc_result_t reason, badnstype_t badtype) {
3194	char namebuf[DNS_NAME_FORMATSIZE];
3195	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
3196	char classbuf[64];
3197	char typebuf[64];
3198	char code[64];
3199	isc_buffer_t b;
3200	isc_sockaddr_t *sa;
3201	const char *spc = "";
3202	isc_sockaddr_t *address = &addrinfo->sockaddr;
3203
3204#ifdef ENABLE_AFL
3205	if (dns_fuzzing_resolver) {
3206		return;
3207	}
3208#endif /* ifdef ENABLE_AFL */
3209
3210	if (reason == DNS_R_LAME) {
3211		fctx->lamecount++;
3212	} else {
3213		switch (badtype) {
3214		case badns_unreachable:
3215			fctx->neterr++;
3216			break;
3217		case badns_response:
3218			fctx->badresp++;
3219			break;
3220		case badns_validation:
3221			break; /* counted as 'valfail' */
3222		case badns_forwarder:
3223			/*
3224			 * We were called to prevent the given forwarder
3225			 * from being used again for this fetch context.
3226			 */
3227			break;
3228		}
3229	}
3230
3231	if (bad_server(fctx, address)) {
3232		/*
3233		 * We already know this server is bad.
3234		 */
3235		return;
3236	}
3237
3238	FCTXTRACE("add_bad");
3239
3240	sa = isc_mem_get(fctx->mctx, sizeof(*sa));
3241	*sa = *address;
3242	ISC_LIST_INITANDAPPEND(fctx->bad, sa, link);
3243
3244	if (reason == DNS_R_LAME) { /* already logged */
3245		return;
3246	}
3247
3248	if (reason == DNS_R_UNEXPECTEDRCODE &&
3249	    rmessage->rcode == dns_rcode_servfail && ISFORWARDER(addrinfo))
3250	{
3251		return;
3252	}
3253
3254	if (reason == DNS_R_UNEXPECTEDRCODE) {
3255		isc_buffer_init(&b, code, sizeof(code) - 1);
3256		dns_rcode_totext(rmessage->rcode, &b);
3257		code[isc_buffer_usedlength(&b)] = '\0';
3258		spc = " ";
3259	} else if (reason == DNS_R_UNEXPECTEDOPCODE) {
3260		isc_buffer_init(&b, code, sizeof(code) - 1);
3261		dns_opcode_totext((dns_opcode_t)rmessage->opcode, &b);
3262		code[isc_buffer_usedlength(&b)] = '\0';
3263		spc = " ";
3264	} else {
3265		code[0] = '\0';
3266	}
3267	dns_name_format(fctx->name, namebuf, sizeof(namebuf));
3268	dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
3269	dns_rdataclass_format(fctx->res->rdclass, classbuf, sizeof(classbuf));
3270	isc_sockaddr_format(address, addrbuf, sizeof(addrbuf));
3271	isc_log_write(
3272		dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS, DNS_LOGMODULE_RESOLVER,
3273		ISC_LOG_INFO, "%s%s%s resolving '%s/%s/%s': %s", code, spc,
3274		isc_result_totext(reason), namebuf, typebuf, classbuf, addrbuf);
3275}
3276
3277/*
3278 * Sort addrinfo list by RTT.
3279 */
3280static void
3281sort_adbfind(dns_adbfind_t *find, unsigned int bias) {
3282	dns_adbaddrinfo_t *best, *curr;
3283	dns_adbaddrinfolist_t sorted;
3284	unsigned int best_srtt, curr_srtt;
3285
3286	/* Lame N^2 bubble sort. */
3287	ISC_LIST_INIT(sorted);
3288	while (!ISC_LIST_EMPTY(find->list)) {
3289		best = ISC_LIST_HEAD(find->list);
3290		best_srtt = best->srtt;
3291		if (isc_sockaddr_pf(&best->sockaddr) != AF_INET6) {
3292			best_srtt += bias;
3293		}
3294		curr = ISC_LIST_NEXT(best, publink);
3295		while (curr != NULL) {
3296			curr_srtt = curr->srtt;
3297			if (isc_sockaddr_pf(&curr->sockaddr) != AF_INET6) {
3298				curr_srtt += bias;
3299			}
3300			if (curr_srtt < best_srtt) {
3301				best = curr;
3302				best_srtt = curr_srtt;
3303			}
3304			curr = ISC_LIST_NEXT(curr, publink);
3305		}
3306		ISC_LIST_UNLINK(find->list, best, publink);
3307		ISC_LIST_APPEND(sorted, best, publink);
3308	}
3309	find->list = sorted;
3310}
3311
3312/*
3313 * Sort a list of finds by server RTT.
3314 */
3315static void
3316sort_finds(dns_adbfindlist_t *findlist, unsigned int bias) {
3317	dns_adbfind_t *best, *curr;
3318	dns_adbfindlist_t sorted;
3319	dns_adbaddrinfo_t *addrinfo, *bestaddrinfo;
3320	unsigned int best_srtt, curr_srtt;
3321
3322	/* Sort each find's addrinfo list by SRTT. */
3323	for (curr = ISC_LIST_HEAD(*findlist); curr != NULL;
3324	     curr = ISC_LIST_NEXT(curr, publink))
3325	{
3326		sort_adbfind(curr, bias);
3327	}
3328
3329	/* Lame N^2 bubble sort. */
3330	ISC_LIST_INIT(sorted);
3331	while (!ISC_LIST_EMPTY(*findlist)) {
3332		best = ISC_LIST_HEAD(*findlist);
3333		bestaddrinfo = ISC_LIST_HEAD(best->list);
3334		INSIST(bestaddrinfo != NULL);
3335		best_srtt = bestaddrinfo->srtt;
3336		if (isc_sockaddr_pf(&bestaddrinfo->sockaddr) != AF_INET6) {
3337			best_srtt += bias;
3338		}
3339		curr = ISC_LIST_NEXT(best, publink);
3340		while (curr != NULL) {
3341			addrinfo = ISC_LIST_HEAD(curr->list);
3342			INSIST(addrinfo != NULL);
3343			curr_srtt = addrinfo->srtt;
3344			if (isc_sockaddr_pf(&addrinfo->sockaddr) != AF_INET6) {
3345				curr_srtt += bias;
3346			}
3347			if (curr_srtt < best_srtt) {
3348				best = curr;
3349				best_srtt = curr_srtt;
3350			}
3351			curr = ISC_LIST_NEXT(curr, publink);
3352		}
3353		ISC_LIST_UNLINK(*findlist, best, publink);
3354		ISC_LIST_APPEND(sorted, best, publink);
3355	}
3356	*findlist = sorted;
3357}
3358
3359/*
3360 * Return true iff the ADB find has a pending fetch for 'type'.  This is
3361 * used to find out whether we're in a loop, where a fetch is waiting for a
3362 * find which is waiting for that same fetch.
3363 *
3364 * Note: This could be done with either an equivalence check (e.g.,
3365 * query_pending == DNS_ADBFIND_INET) or with a bit check, as below.  If
3366 * we checked for equivalence, that would mean we could only detect a loop
3367 * when there is exactly one pending fetch, and we're it. If there were
3368 * pending fetches for *both* address families, then a loop would be
3369 * undetected.
3370 *
3371 * However, using a bit check means that in theory, an ADB find might be
3372 * aborted that could have succeeded, if the other fetch had returned an
3373 * answer.
3374 *
3375 * Since there's a good chance the server is broken and won't answer either
3376 * query, and since an ADB find with two pending fetches is a very rare
3377 * occurrance anyway, we regard this theoretical SERVFAIL as the lesser
3378 * evil.
3379 */
3380static bool
3381waiting_for(dns_adbfind_t *find, dns_rdatatype_t type) {
3382	switch (type) {
3383	case dns_rdatatype_a:
3384		return ((find->query_pending & DNS_ADBFIND_INET) != 0);
3385	case dns_rdatatype_aaaa:
3386		return ((find->query_pending & DNS_ADBFIND_INET6) != 0);
3387	default:
3388		return (false);
3389	}
3390}
3391
3392static void
3393findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port,
3394	 unsigned int options, unsigned int flags, isc_stdtime_t now,
3395	 bool *overquota, bool *need_alternate, unsigned int *no_addresses) {
3396	dns_adbaddrinfo_t *ai = NULL;
3397	dns_adbfind_t *find = NULL;
3398	dns_resolver_t *res = fctx->res;
3399	bool unshared = ((fctx->options & DNS_FETCHOPT_UNSHARED) != 0);
3400	isc_result_t result;
3401
3402	FCTXTRACE("FINDNAME");
3403
3404	/*
3405	 * If this name is a subdomain of the query domain, tell
3406	 * the ADB to start looking using zone/hint data. This keeps us
3407	 * from getting stuck if the nameserver is beneath the zone cut
3408	 * and we don't know its address (e.g. because the A record has
3409	 * expired).
3410	 */
3411	if (dns_name_issubdomain(name, fctx->domain)) {
3412		options |= DNS_ADBFIND_STARTATZONE;
3413	}
3414	options |= DNS_ADBFIND_GLUEOK;
3415	options |= DNS_ADBFIND_HINTOK;
3416
3417	/*
3418	 * See what we know about this address.
3419	 */
3420	fctx_addref(fctx);
3421	result = dns_adb_createfind(
3422		fctx->adb, res->buckets[fctx->bucketnum].task, fctx_finddone,
3423		fctx, name, fctx->name, fctx->type, options, now, NULL,
3424		res->view->dstport, fctx->depth + 1, fctx->qc, &find);
3425
3426	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3427		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
3428		      "fctx %p(%s): createfind for %s - %s", fctx, fctx->info,
3429		      fctx->clientstr, isc_result_totext(result));
3430
3431	if (result != ISC_R_SUCCESS) {
3432		if (result == DNS_R_ALIAS) {
3433			char namebuf[DNS_NAME_FORMATSIZE];
3434
3435			/*
3436			 * XXXRTH  Follow the CNAME/DNAME chain?
3437			 */
3438			dns_adb_destroyfind(&find);
3439			fctx->adberr++;
3440			dns_name_format(name, namebuf, sizeof(namebuf));
3441			isc_log_write(dns_lctx, DNS_LOGCATEGORY_CNAME,
3442				      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
3443				      "skipping nameserver '%s' because it "
3444				      "is a CNAME, while resolving '%s'",
3445				      namebuf, fctx->info);
3446		}
3447		fctx_detach(&fctx);
3448		return;
3449	}
3450
3451	if (!ISC_LIST_EMPTY(find->list)) {
3452		/*
3453		 * We have at least some of the addresses for the
3454		 * name.
3455		 */
3456		INSIST((find->options & DNS_ADBFIND_WANTEVENT) == 0);
3457		if (flags != 0 || port != 0) {
3458			for (ai = ISC_LIST_HEAD(find->list); ai != NULL;
3459			     ai = ISC_LIST_NEXT(ai, publink))
3460			{
3461				ai->flags |= flags;
3462				if (port != 0) {
3463					isc_sockaddr_setport(&ai->sockaddr,
3464							     port);
3465				}
3466			}
3467		}
3468		if ((flags & FCTX_ADDRINFO_DUALSTACK) != 0) {
3469			ISC_LIST_APPEND(fctx->altfinds, find, publink);
3470		} else {
3471			ISC_LIST_APPEND(fctx->finds, find, publink);
3472		}
3473		return;
3474	}
3475
3476	/*
3477	 * We don't know any of the addresses for this name.
3478	 *
3479	 * The find may be waiting on a resolver fetch for a server
3480	 * address. We need to make sure it isn't waiting on *this*
3481	 * fetch, because if it is, we won't be answering it and it
3482	 * won't be answering us.
3483	 */
3484	if (waiting_for(find, fctx->type) && dns_name_equal(name, fctx->name)) {
3485		fctx->adberr++;
3486		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3487			      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
3488			      "loop detected resolving '%s'", fctx->info);
3489
3490		if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
3491			atomic_fetch_add_relaxed(&fctx->pending, 1);
3492			dns_adb_cancelfind(find);
3493		} else {
3494			dns_adb_destroyfind(&find);
3495			fctx_detach(&fctx);
3496		}
3497		return;
3498	}
3499
3500	/*
3501	 * We may be waiting for another fetch to complete, and
3502	 * we'll get an event later when the find has what it needs.
3503	 */
3504	if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) {
3505		atomic_fetch_add_relaxed(&fctx->pending, 1);
3506
3507		/*
3508		 * Bootstrap.
3509		 */
3510		if (need_alternate != NULL && !*need_alternate && unshared &&
3511		    ((res->dispatches4 == NULL &&
3512		      find->result_v6 != DNS_R_NXDOMAIN) ||
3513		     (res->dispatches6 == NULL &&
3514		      find->result_v4 != DNS_R_NXDOMAIN)))
3515		{
3516			*need_alternate = true;
3517		}
3518		if (no_addresses != NULL) {
3519			(*no_addresses)++;
3520		}
3521		return;
3522	}
3523
3524	/*
3525	 * No addresses and no pending events: the find failed.
3526	 */
3527	if ((find->options & DNS_ADBFIND_OVERQUOTA) != 0) {
3528		if (overquota != NULL) {
3529			*overquota = true;
3530		}
3531		fctx->quotacount++; /* quota exceeded */
3532	} else if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0) {
3533		fctx->lamecount++; /* cached lame server */
3534	} else {
3535		fctx->adberr++; /* unreachable server, etc. */
3536	}
3537
3538	/*
3539	 * If we know there are no addresses for the family we are using then
3540	 * try to add an alternative server.
3541	 */
3542	if (need_alternate != NULL && !*need_alternate &&
3543	    ((res->dispatches4 == NULL && find->result_v6 == DNS_R_NXRRSET) ||
3544	     (res->dispatches6 == NULL && find->result_v4 == DNS_R_NXRRSET)))
3545	{
3546		*need_alternate = true;
3547	}
3548	dns_adb_destroyfind(&find);
3549	fctx_detach(&fctx);
3550}
3551
3552static bool
3553isstrictsubdomain(const dns_name_t *name1, const dns_name_t *name2) {
3554	int order;
3555	unsigned int nlabels;
3556	dns_namereln_t namereln;
3557
3558	namereln = dns_name_fullcompare(name1, name2, &order, &nlabels);
3559	return (namereln == dns_namereln_subdomain);
3560}
3561
3562static isc_result_t
3563fctx_getaddresses(fetchctx_t *fctx, bool badcache) {
3564	dns_rdata_t rdata = DNS_RDATA_INIT;
3565	isc_result_t result;
3566	dns_resolver_t *res;
3567	isc_stdtime_t now;
3568	unsigned int stdoptions = 0;
3569	dns_forwarder_t *fwd;
3570	dns_adbaddrinfo_t *ai;
3571	bool all_bad;
3572	dns_rdata_ns_t ns;
3573	bool need_alternate = false;
3574	bool all_spilled = true;
3575	unsigned int no_addresses = 0;
3576	unsigned int ns_processed = 0;
3577
3578	FCTXTRACE5("getaddresses", "fctx->depth=", fctx->depth);
3579
3580	/*
3581	 * Don't pound on remote servers.  (Failsafe!)
3582	 */
3583	fctx->restarts++;
3584	if (fctx->restarts > 100) {
3585		FCTXTRACE("too many restarts");
3586		return (DNS_R_SERVFAIL);
3587	}
3588
3589	res = fctx->res;
3590
3591	if (fctx->depth > res->maxdepth) {
3592		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
3593			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
3594			      "too much NS indirection resolving '%s' "
3595			      "(depth=%u, maxdepth=%u)",
3596			      fctx->info, fctx->depth, res->maxdepth);
3597		return (DNS_R_SERVFAIL);
3598	}
3599
3600	/*
3601	 * Forwarders.
3602	 */
3603
3604	INSIST(ISC_LIST_EMPTY(fctx->forwaddrs));
3605	INSIST(ISC_LIST_EMPTY(fctx->altaddrs));
3606
3607	/*
3608	 * If we have DNS_FETCHOPT_NOFORWARD set and forwarding policy
3609	 * allows us to not forward - skip forwarders and go straight
3610	 * to NSes. This is currently used to make sure that priming
3611	 * query gets root servers' IP addresses in ADDITIONAL section.
3612	 */
3613	if ((fctx->options & DNS_FETCHOPT_NOFORWARD) != 0 &&
3614	    (fctx->fwdpolicy != dns_fwdpolicy_only))
3615	{
3616		goto normal_nses;
3617	}
3618
3619	/*
3620	 * If this fctx has forwarders, use them; otherwise use any
3621	 * selective forwarders specified in the view; otherwise use the
3622	 * resolver's forwarders (if any).
3623	 */
3624	fwd = ISC_LIST_HEAD(fctx->forwarders);
3625	if (fwd == NULL) {
3626		dns_forwarders_t *forwarders = NULL;
3627		dns_name_t *name = fctx->name;
3628		dns_name_t suffix;
3629		unsigned int labels;
3630		dns_fixedname_t fixed;
3631		dns_name_t *domain;
3632
3633		/*
3634		 * DS records are found in the parent server.
3635		 * Strip label to get the correct forwarder (if any).
3636		 */
3637		if (dns_rdatatype_atparent(fctx->type) &&
3638		    dns_name_countlabels(name) > 1)
3639		{
3640			dns_name_init(&suffix, NULL);
3641			labels = dns_name_countlabels(name);
3642			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
3643			name = &suffix;
3644		}
3645
3646		domain = dns_fixedname_initname(&fixed);
3647		result = dns_fwdtable_find(res->view->fwdtable, name, domain,
3648					   &forwarders);
3649		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
3650			fwd = ISC_LIST_HEAD(forwarders->fwdrs);
3651			fctx->fwdpolicy = forwarders->fwdpolicy;
3652			dns_name_copy(domain, fctx->fwdname);
3653			if (fctx->fwdpolicy == dns_fwdpolicy_only &&
3654			    isstrictsubdomain(domain, fctx->domain))
3655			{
3656				fcount_decr(fctx);
3657				dns_name_copy(domain, fctx->domain);
3658				result = fcount_incr(fctx, true);
3659				if (result != ISC_R_SUCCESS) {
3660					return (result);
3661				}
3662			}
3663		}
3664	}
3665
3666	while (fwd != NULL) {
3667		if ((isc_sockaddr_pf(&fwd->addr) == AF_INET &&
3668		     res->dispatches4 == NULL) ||
3669		    (isc_sockaddr_pf(&fwd->addr) == AF_INET6 &&
3670		     res->dispatches6 == NULL))
3671		{
3672			fwd = ISC_LIST_NEXT(fwd, link);
3673			continue;
3674		}
3675		ai = NULL;
3676		result = dns_adb_findaddrinfo(fctx->adb, &fwd->addr, &ai, 0);
3677		if (result == ISC_R_SUCCESS) {
3678			dns_adbaddrinfo_t *cur;
3679			ai->flags |= FCTX_ADDRINFO_FORWARDER;
3680			cur = ISC_LIST_HEAD(fctx->forwaddrs);
3681			while (cur != NULL && cur->srtt < ai->srtt) {
3682				cur = ISC_LIST_NEXT(cur, publink);
3683			}
3684			if (cur != NULL) {
3685				ISC_LIST_INSERTBEFORE(fctx->forwaddrs, cur, ai,
3686						      publink);
3687			} else {
3688				ISC_LIST_APPEND(fctx->forwaddrs, ai, publink);
3689			}
3690		}
3691		fwd = ISC_LIST_NEXT(fwd, link);
3692	}
3693
3694	/*
3695	 * If the forwarding policy is "only", we don't need the
3696	 * addresses of the nameservers.
3697	 */
3698	if (fctx->fwdpolicy == dns_fwdpolicy_only) {
3699		goto out;
3700	}
3701
3702	/*
3703	 * Normal nameservers.
3704	 */
3705normal_nses:
3706	stdoptions = DNS_ADBFIND_WANTEVENT | DNS_ADBFIND_EMPTYEVENT;
3707	if (fctx->restarts == 1) {
3708		/*
3709		 * To avoid sending out a flood of queries likely to
3710		 * result in NXRRSET, we suppress fetches for address
3711		 * families we don't have the first time through,
3712		 * provided that we have addresses in some family we
3713		 * can use.
3714		 *
3715		 * We don't want to set this option all the time, since
3716		 * if fctx->restarts > 1, we've clearly been having
3717		 * trouble with the addresses we had, so getting more
3718		 * could help.
3719		 */
3720		stdoptions |= DNS_ADBFIND_AVOIDFETCHES;
3721	}
3722	if (res->dispatches4 != NULL) {
3723		stdoptions |= DNS_ADBFIND_INET;
3724	}
3725	if (res->dispatches6 != NULL) {
3726		stdoptions |= DNS_ADBFIND_INET6;
3727	}
3728
3729	if ((stdoptions & DNS_ADBFIND_ADDRESSMASK) == 0) {
3730		return (DNS_R_SERVFAIL);
3731	}
3732
3733	isc_stdtime_get(&now);
3734
3735	INSIST(ISC_LIST_EMPTY(fctx->finds));
3736	INSIST(ISC_LIST_EMPTY(fctx->altfinds));
3737
3738	for (result = dns_rdataset_first(&fctx->nameservers);
3739	     result == ISC_R_SUCCESS;
3740	     result = dns_rdataset_next(&fctx->nameservers))
3741	{
3742		bool overquota = false;
3743
3744		dns_rdataset_current(&fctx->nameservers, &rdata);
3745		/*
3746		 * Extract the name from the NS record.
3747		 */
3748		result = dns_rdata_tostruct(&rdata, &ns, NULL);
3749		if (result != ISC_R_SUCCESS) {
3750			continue;
3751		}
3752
3753		if (no_addresses > NS_FAIL_LIMIT &&
3754		    dns_rdataset_count(&fctx->nameservers) > NS_RR_LIMIT)
3755		{
3756			stdoptions |= DNS_ADBFIND_NOFETCH;
3757		}
3758		findname(fctx, &ns.name, 0, stdoptions, 0, now, &overquota,
3759			 &need_alternate, &no_addresses);
3760
3761		if (!overquota) {
3762			all_spilled = false;
3763		}
3764
3765		dns_rdata_reset(&rdata);
3766		dns_rdata_freestruct(&ns);
3767
3768		if (++ns_processed >= NS_PROCESSING_LIMIT) {
3769			result = ISC_R_NOMORE;
3770			break;
3771		}
3772	}
3773	if (result != ISC_R_NOMORE) {
3774		return (result);
3775	}
3776
3777	/*
3778	 * Do we need to use 6 to 4?
3779	 */
3780	if (need_alternate) {
3781		int family;
3782		alternate_t *a;
3783		family = (res->dispatches6 != NULL) ? AF_INET6 : AF_INET;
3784		for (a = ISC_LIST_HEAD(res->alternates); a != NULL;
3785		     a = ISC_LIST_NEXT(a, link))
3786		{
3787			if (!a->isaddress) {
3788				findname(fctx, &a->_u._n.name, a->_u._n.port,
3789					 stdoptions, FCTX_ADDRINFO_DUALSTACK,
3790					 now, NULL, NULL, NULL);
3791				continue;
3792			}
3793			if (isc_sockaddr_pf(&a->_u.addr) != family) {
3794				continue;
3795			}
3796			ai = NULL;
3797			result = dns_adb_findaddrinfo(fctx->adb, &a->_u.addr,
3798						      &ai, 0);
3799			if (result == ISC_R_SUCCESS) {
3800				dns_adbaddrinfo_t *cur;
3801				ai->flags |= FCTX_ADDRINFO_FORWARDER;
3802				ai->flags |= FCTX_ADDRINFO_DUALSTACK;
3803				cur = ISC_LIST_HEAD(fctx->altaddrs);
3804				while (cur != NULL && cur->srtt < ai->srtt) {
3805					cur = ISC_LIST_NEXT(cur, publink);
3806				}
3807				if (cur != NULL) {
3808					ISC_LIST_INSERTBEFORE(fctx->altaddrs,
3809							      cur, ai, publink);
3810				} else {
3811					ISC_LIST_APPEND(fctx->altaddrs, ai,
3812							publink);
3813				}
3814			}
3815		}
3816	}
3817
3818out:
3819	/*
3820	 * Mark all known bad servers.
3821	 */
3822	all_bad = mark_bad(fctx);
3823
3824	/*
3825	 * How are we doing?
3826	 */
3827	if (all_bad) {
3828		/*
3829		 * We've got no addresses.
3830		 */
3831		if (atomic_load_acquire(&fctx->pending) > 0) {
3832			/*
3833			 * We're fetching the addresses, but don't have
3834			 * any yet.   Tell the caller to wait for an
3835			 * answer.
3836			 */
3837			result = DNS_R_WAIT;
3838		} else {
3839			isc_time_t expire;
3840			isc_interval_t i;
3841			/*
3842			 * We've lost completely.  We don't know any
3843			 * addresses, and the ADB has told us it can't
3844			 * get them.
3845			 */
3846			FCTXTRACE("no addresses");
3847			isc_interval_set(&i, DNS_RESOLVER_BADCACHETTL(fctx), 0);
3848			result = isc_time_nowplusinterval(&expire, &i);
3849			if (badcache &&
3850			    (fctx->type == dns_rdatatype_dnskey ||
3851			     fctx->type == dns_rdatatype_ds) &&
3852			    result == ISC_R_SUCCESS)
3853			{
3854				dns_resolver_addbadcache(res, fctx->name,
3855							 fctx->type, &expire);
3856			}
3857
3858			result = ISC_R_FAILURE;
3859
3860			/*
3861			 * If all of the addresses found were over the
3862			 * fetches-per-server quota, return the
3863			 * configured response.
3864			 */
3865			if (all_spilled) {
3866				result = res->quotaresp[dns_quotatype_server];
3867				inc_stats(res, dns_resstatscounter_serverquota);
3868			}
3869		}
3870	} else {
3871		/*
3872		 * We've found some addresses.  We might still be
3873		 * looking for more addresses.
3874		 */
3875		sort_finds(&fctx->finds, res->view->v6bias);
3876		sort_finds(&fctx->altfinds, 0);
3877		result = ISC_R_SUCCESS;
3878	}
3879
3880	return (result);
3881}
3882
3883static void
3884possibly_mark(fetchctx_t *fctx, dns_adbaddrinfo_t *addr) {
3885	isc_netaddr_t na;
3886	char buf[ISC_NETADDR_FORMATSIZE];
3887	isc_sockaddr_t *sa;
3888	bool aborted = false;
3889	bool bogus;
3890	dns_acl_t *blackhole;
3891	isc_netaddr_t ipaddr;
3892	dns_peer_t *peer = NULL;
3893	dns_resolver_t *res;
3894	const char *msg = NULL;
3895
3896	sa = &addr->sockaddr;
3897
3898	res = fctx->res;
3899	isc_netaddr_fromsockaddr(&ipaddr, sa);
3900	blackhole = dns_dispatchmgr_getblackhole(res->dispatchmgr);
3901	(void)dns_peerlist_peerbyaddr(res->view->peers, &ipaddr, &peer);
3902
3903	if (blackhole != NULL) {
3904		int match;
3905
3906		if ((dns_acl_match(&ipaddr, NULL, blackhole, res->view->aclenv,
3907				   &match, NULL) == ISC_R_SUCCESS) &&
3908		    match > 0)
3909		{
3910			aborted = true;
3911		}
3912	}
3913
3914	if (peer != NULL && dns_peer_getbogus(peer, &bogus) == ISC_R_SUCCESS &&
3915	    bogus)
3916	{
3917		aborted = true;
3918	}
3919
3920	if (aborted) {
3921		addr->flags |= FCTX_ADDRINFO_MARK;
3922		msg = "ignoring blackholed / bogus server: ";
3923	} else if (isc_sockaddr_isnetzero(sa)) {
3924		addr->flags |= FCTX_ADDRINFO_MARK;
3925		msg = "ignoring net zero address: ";
3926	} else if (isc_sockaddr_ismulticast(sa)) {
3927		addr->flags |= FCTX_ADDRINFO_MARK;
3928		msg = "ignoring multicast address: ";
3929	} else if (isc_sockaddr_isexperimental(sa)) {
3930		addr->flags |= FCTX_ADDRINFO_MARK;
3931		msg = "ignoring experimental address: ";
3932	} else if (sa->type.sa.sa_family != AF_INET6) {
3933		return;
3934	} else if (IN6_IS_ADDR_V4MAPPED(&sa->type.sin6.sin6_addr)) {
3935		addr->flags |= FCTX_ADDRINFO_MARK;
3936		msg = "ignoring IPv6 mapped IPV4 address: ";
3937	} else if (IN6_IS_ADDR_V4COMPAT(&sa->type.sin6.sin6_addr)) {
3938		addr->flags |= FCTX_ADDRINFO_MARK;
3939		msg = "ignoring IPv6 compatibility IPV4 address: ";
3940	} else {
3941		return;
3942	}
3943
3944	if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(3))) {
3945		isc_netaddr_fromsockaddr(&na, sa);
3946		isc_netaddr_format(&na, buf, sizeof(buf));
3947		FCTXTRACE2(msg, buf);
3948	}
3949}
3950
3951static dns_adbaddrinfo_t *
3952fctx_nextaddress(fetchctx_t *fctx) {
3953	dns_adbfind_t *find, *start;
3954	dns_adbaddrinfo_t *addrinfo;
3955	dns_adbaddrinfo_t *faddrinfo;
3956
3957	/*
3958	 * Return the next untried address, if any.
3959	 */
3960
3961	/*
3962	 * Find the first unmarked forwarder (if any).
3963	 */
3964	for (addrinfo = ISC_LIST_HEAD(fctx->forwaddrs); addrinfo != NULL;
3965	     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
3966	{
3967		if (!UNMARKED(addrinfo)) {
3968			continue;
3969		}
3970		possibly_mark(fctx, addrinfo);
3971		if (UNMARKED(addrinfo)) {
3972			addrinfo->flags |= FCTX_ADDRINFO_MARK;
3973			fctx->find = NULL;
3974			fctx->forwarding = true;
3975
3976			/*
3977			 * QNAME minimization is disabled when
3978			 * forwarding, and has to remain disabled if
3979			 * we switch back to normal recursion; otherwise
3980			 * forwarding could leave us in an inconsistent
3981			 * state.
3982			 */
3983			fctx->minimized = false;
3984			return (addrinfo);
3985		}
3986	}
3987
3988	/*
3989	 * No forwarders.  Move to the next find.
3990	 */
3991	fctx->forwarding = false;
3992	FCTX_ATTR_SET(fctx, FCTX_ATTR_TRIEDFIND);
3993
3994	find = fctx->find;
3995	if (find == NULL) {
3996		find = ISC_LIST_HEAD(fctx->finds);
3997	} else {
3998		find = ISC_LIST_NEXT(find, publink);
3999		if (find == NULL) {
4000			find = ISC_LIST_HEAD(fctx->finds);
4001		}
4002	}
4003
4004	/*
4005	 * Find the first unmarked addrinfo.
4006	 */
4007	addrinfo = NULL;
4008	if (find != NULL) {
4009		start = find;
4010		do {
4011			for (addrinfo = ISC_LIST_HEAD(find->list);
4012			     addrinfo != NULL;
4013			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
4014			{
4015				if (!UNMARKED(addrinfo)) {
4016					continue;
4017				}
4018				possibly_mark(fctx, addrinfo);
4019				if (UNMARKED(addrinfo)) {
4020					addrinfo->flags |= FCTX_ADDRINFO_MARK;
4021					break;
4022				}
4023			}
4024			if (addrinfo != NULL) {
4025				break;
4026			}
4027			find = ISC_LIST_NEXT(find, publink);
4028			if (find == NULL) {
4029				find = ISC_LIST_HEAD(fctx->finds);
4030			}
4031		} while (find != start);
4032	}
4033
4034	fctx->find = find;
4035	if (addrinfo != NULL) {
4036		return (addrinfo);
4037	}
4038
4039	/*
4040	 * No nameservers left.  Try alternates.
4041	 */
4042
4043	FCTX_ATTR_SET(fctx, FCTX_ATTR_TRIEDALT);
4044
4045	find = fctx->altfind;
4046	if (find == NULL) {
4047		find = ISC_LIST_HEAD(fctx->altfinds);
4048	} else {
4049		find = ISC_LIST_NEXT(find, publink);
4050		if (find == NULL) {
4051			find = ISC_LIST_HEAD(fctx->altfinds);
4052		}
4053	}
4054
4055	/*
4056	 * Find the first unmarked addrinfo.
4057	 */
4058	addrinfo = NULL;
4059	if (find != NULL) {
4060		start = find;
4061		do {
4062			for (addrinfo = ISC_LIST_HEAD(find->list);
4063			     addrinfo != NULL;
4064			     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
4065			{
4066				if (!UNMARKED(addrinfo)) {
4067					continue;
4068				}
4069				possibly_mark(fctx, addrinfo);
4070				if (UNMARKED(addrinfo)) {
4071					addrinfo->flags |= FCTX_ADDRINFO_MARK;
4072					break;
4073				}
4074			}
4075			if (addrinfo != NULL) {
4076				break;
4077			}
4078			find = ISC_LIST_NEXT(find, publink);
4079			if (find == NULL) {
4080				find = ISC_LIST_HEAD(fctx->altfinds);
4081			}
4082		} while (find != start);
4083	}
4084
4085	faddrinfo = addrinfo;
4086
4087	/*
4088	 * See if we have a better alternate server by address.
4089	 */
4090
4091	for (addrinfo = ISC_LIST_HEAD(fctx->altaddrs); addrinfo != NULL;
4092	     addrinfo = ISC_LIST_NEXT(addrinfo, publink))
4093	{
4094		if (!UNMARKED(addrinfo)) {
4095			continue;
4096		}
4097		possibly_mark(fctx, addrinfo);
4098		if (UNMARKED(addrinfo) &&
4099		    (faddrinfo == NULL || addrinfo->srtt < faddrinfo->srtt))
4100		{
4101			if (faddrinfo != NULL) {
4102				faddrinfo->flags &= ~FCTX_ADDRINFO_MARK;
4103			}
4104			addrinfo->flags |= FCTX_ADDRINFO_MARK;
4105			break;
4106		}
4107	}
4108
4109	if (addrinfo == NULL) {
4110		addrinfo = faddrinfo;
4111		fctx->altfind = find;
4112	}
4113
4114	return (addrinfo);
4115}
4116
4117static void
4118fctx_try(fetchctx_t *fctx, bool retrying, bool badcache) {
4119	isc_result_t result;
4120	dns_adbaddrinfo_t *addrinfo = NULL;
4121	dns_resolver_t *res;
4122	isc_task_t *task;
4123	unsigned int bucketnum;
4124
4125	FCTXTRACE5("try", "fctx->qc=", isc_counter_used(fctx->qc));
4126
4127	REQUIRE(!ADDRWAIT(fctx));
4128
4129	res = fctx->res;
4130	bucketnum = fctx->bucketnum;
4131
4132	/* We've already exceeded maximum query count */
4133	if (isc_counter_used(fctx->qc) > res->maxqueries) {
4134		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4135			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
4136			      "exceeded max queries resolving '%s' "
4137			      "(querycount=%u, maxqueries=%u)",
4138			      fctx->info, isc_counter_used(fctx->qc),
4139			      res->maxqueries);
4140		fctx_done_detach(&fctx, DNS_R_SERVFAIL);
4141		return;
4142	}
4143
4144	addrinfo = fctx_nextaddress(fctx);
4145
4146	/* Try to find an address that isn't over quota */
4147	while (addrinfo != NULL && dns_adbentry_overquota(addrinfo->entry)) {
4148		addrinfo = fctx_nextaddress(fctx);
4149	}
4150
4151	if (addrinfo == NULL) {
4152		/* We have no more addresses.  Start over. */
4153		fctx_cancelqueries(fctx, true, false);
4154		fctx_cleanup(fctx);
4155		result = fctx_getaddresses(fctx, badcache);
4156		if (result == DNS_R_WAIT) {
4157			/*
4158			 * Sleep waiting for addresses.
4159			 */
4160			FCTXTRACE("addrwait");
4161			FCTX_ATTR_SET(fctx, FCTX_ATTR_ADDRWAIT);
4162			return;
4163		} else if (result != ISC_R_SUCCESS) {
4164			/*
4165			 * Something bad happened.
4166			 */
4167			fctx_done_detach(&fctx, result);
4168			return;
4169		}
4170
4171		addrinfo = fctx_nextaddress(fctx);
4172
4173		while (addrinfo != NULL &&
4174		       dns_adbentry_overquota(addrinfo->entry))
4175		{
4176			addrinfo = fctx_nextaddress(fctx);
4177		}
4178
4179		/*
4180		 * While we may have addresses from the ADB, they
4181		 * might be bad ones.  In this case, return SERVFAIL.
4182		 */
4183		if (addrinfo == NULL) {
4184			fctx_done_detach(&fctx, DNS_R_SERVFAIL);
4185			return;
4186		}
4187	}
4188	/*
4189	 * We're minimizing and we're not yet at the final NS -
4190	 * we need to launch a query for NS for 'upper' domain
4191	 */
4192	if (fctx->minimized && !fctx->forwarding) {
4193		unsigned int options = fctx->options;
4194		/*
4195		 * Also clear DNS_FETCHOPT_TRYSTALE_ONTIMEOUT here,
4196		 * otherwise every query minimization step will activate
4197		 * the try-stale timer again.
4198		 */
4199		options &= ~(DNS_FETCHOPT_QMINIMIZE |
4200			     DNS_FETCHOPT_TRYSTALE_ONTIMEOUT);
4201
4202		/*
4203		 * Is another QNAME minimization fetch still running?
4204		 */
4205		if (fctx->qminfetch != NULL) {
4206			bool validfctx = (DNS_FETCH_VALID(fctx->qminfetch) &&
4207					  VALID_FCTX(fctx->qminfetch->private));
4208			char namebuf[DNS_NAME_FORMATSIZE];
4209			char typebuf[DNS_RDATATYPE_FORMATSIZE];
4210
4211			dns_name_format(fctx->qminname, namebuf,
4212					sizeof(namebuf));
4213			dns_rdatatype_format(fctx->qmintype, typebuf,
4214					     sizeof(typebuf));
4215
4216			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4217				      DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR,
4218				      "fctx %p(%s): attempting QNAME "
4219				      "minimization fetch for %s/%s but "
4220				      "fetch %p(%s) still running",
4221				      fctx, fctx->info, namebuf, typebuf,
4222				      fctx->qminfetch,
4223				      validfctx ? fctx->qminfetch->private->info
4224						: "<invalid>");
4225			fctx_done_detach(&fctx, DNS_R_SERVFAIL);
4226			return;
4227		}
4228
4229		/*
4230		 * Turn on NOFOLLOW in relaxed mode so that QNAME minimisation
4231		 * doesn't cause additional queries to resolve the target of the
4232		 * QNAME minimisation request when a referral is returned.  This
4233		 * will also reduce the impact of mis-matched NS RRsets where
4234		 * the child's NS RRset is garbage.  If a delegation is
4235		 * discovered DNS_R_DELEGATION will be returned to resume_qmin.
4236		 */
4237		if ((options & DNS_FETCHOPT_QMIN_STRICT) == 0) {
4238			options |= DNS_FETCHOPT_NOFOLLOW;
4239		}
4240		fctx_addref(fctx);
4241		task = res->buckets[bucketnum].task;
4242		result = dns_resolver_createfetch(
4243			fctx->res, fctx->qminname, fctx->qmintype, fctx->domain,
4244			&fctx->nameservers, NULL, NULL, 0, options, 0, fctx->qc,
4245			task, resume_qmin, fctx, &fctx->qminrrset, NULL,
4246			&fctx->qminfetch);
4247		if (result != ISC_R_SUCCESS) {
4248			fctx_unref(fctx);
4249			fctx_done_detach(&fctx, DNS_R_SERVFAIL);
4250		}
4251		return;
4252	}
4253
4254	result = isc_counter_increment(fctx->qc);
4255	if (result != ISC_R_SUCCESS) {
4256		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4257			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
4258			      "exceeded max queries resolving '%s'",
4259			      fctx->info);
4260		fctx_done_detach(&fctx, DNS_R_SERVFAIL);
4261		return;
4262	}
4263
4264	result = fctx_query(fctx, addrinfo, fctx->options);
4265	if (result != ISC_R_SUCCESS) {
4266		fctx_done_detach(&fctx, result);
4267	} else if (retrying) {
4268		inc_stats(res, dns_resstatscounter_retry);
4269	}
4270}
4271
4272static void
4273resume_qmin(isc_task_t *task, isc_event_t *event) {
4274	dns_fetchevent_t *fevent = NULL;
4275	dns_resolver_t *res = NULL;
4276	fetchctx_t *fctx = NULL;
4277	isc_result_t result;
4278	unsigned int bucketnum;
4279	unsigned int findoptions = 0;
4280	dns_name_t *fname = NULL, *dcname = NULL;
4281	dns_fixedname_t ffixed, dcfixed;
4282
4283	UNUSED(task);
4284
4285	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
4286	fevent = (dns_fetchevent_t *)event;
4287	fctx = event->ev_arg;
4288	REQUIRE(VALID_FCTX(fctx));
4289	res = fctx->res;
4290
4291	FCTXTRACE("resume_qmin");
4292
4293	fname = dns_fixedname_initname(&ffixed);
4294	dcname = dns_fixedname_initname(&dcfixed);
4295
4296	if (fevent->node != NULL) {
4297		dns_db_detachnode(fevent->db, &fevent->node);
4298	}
4299	if (fevent->db != NULL) {
4300		dns_db_detach(&fevent->db);
4301	}
4302
4303	bucketnum = fctx->bucketnum;
4304
4305	if (dns_rdataset_isassociated(fevent->rdataset)) {
4306		dns_rdataset_disassociate(fevent->rdataset);
4307	}
4308
4309	result = fevent->result;
4310	isc_event_free(&event);
4311
4312	dns_resolver_destroyfetch(&fctx->qminfetch);
4313
4314	LOCK(&res->buckets[bucketnum].lock);
4315	if (SHUTTINGDOWN(fctx)) {
4316		maybe_cancel_validators(fctx, true);
4317		UNLOCK(&res->buckets[bucketnum].lock);
4318		fctx_detach(&fctx);
4319		return;
4320	}
4321	UNLOCK(&res->buckets[bucketnum].lock);
4322
4323	switch (result) {
4324	case ISC_R_SHUTTINGDOWN:
4325	case ISC_R_CANCELED:
4326		goto cleanup;
4327	case DNS_R_NXDOMAIN:
4328	case DNS_R_NCACHENXDOMAIN:
4329	case DNS_R_FORMERR:
4330	case DNS_R_REMOTEFORMERR:
4331	case ISC_R_FAILURE:
4332		if ((fctx->options & DNS_FETCHOPT_QMIN_STRICT) == 0) {
4333			fctx->qmin_labels = DNS_MAX_LABELS + 1;
4334			/*
4335			 * We store the result. If we succeed in the end
4336			 * we'll issue a warning that the server is
4337			 * broken.
4338			 */
4339			fctx->qmin_warning = result;
4340		} else {
4341			goto cleanup;
4342		}
4343		break;
4344	default:
4345		/*
4346		 * When DNS_FETCHOPT_NOFOLLOW is set and a delegation
4347		 * was discovered, DNS_R_DELEGATION is returned and is
4348		 * processed here.
4349		 */
4350		break;
4351	}
4352
4353	if (dns_rdataset_isassociated(&fctx->nameservers)) {
4354		dns_rdataset_disassociate(&fctx->nameservers);
4355	}
4356
4357	if (dns_rdatatype_atparent(fctx->type)) {
4358		findoptions |= DNS_DBFIND_NOEXACT;
4359	}
4360	result = dns_view_findzonecut(res->view, fctx->name, fname, dcname,
4361				      fctx->now, findoptions, true, true,
4362				      &fctx->nameservers, NULL);
4363
4364	/*
4365	 * DNS_R_NXDOMAIN here means we have not loaded the root zone
4366	 * mirror yet - but DNS_R_NXDOMAIN is not a valid return value
4367	 * when doing recursion, we need to patch it.
4368	 */
4369	if (result == DNS_R_NXDOMAIN) {
4370		result = DNS_R_SERVFAIL;
4371	}
4372
4373	if (result != ISC_R_SUCCESS) {
4374		goto cleanup;
4375	}
4376	fcount_decr(fctx);
4377
4378	dns_name_copy(fname, fctx->domain);
4379
4380	result = fcount_incr(fctx, false);
4381	if (result != ISC_R_SUCCESS) {
4382		goto cleanup;
4383	}
4384
4385	dns_name_copy(dcname, fctx->qmindcname);
4386	fctx->ns_ttl = fctx->nameservers.ttl;
4387	fctx->ns_ttl_ok = true;
4388
4389	fctx_minimize_qname(fctx);
4390
4391	if (!fctx->minimized) {
4392		/*
4393		 * We have finished minimizing, but fctx->finds was
4394		 * filled at the beginning of the run - now we need to
4395		 * clear it before sending the final query to use proper
4396		 * nameservers.
4397		 */
4398		fctx_cancelqueries(fctx, false, false);
4399		fctx_cleanup(fctx);
4400	}
4401
4402	fctx_try(fctx, true, false);
4403	fctx_detach(&fctx);
4404	return;
4405
4406cleanup:
4407	/* Detach the extra reference from fctx_try() */
4408	fctx_unref(fctx);
4409	fctx_done_detach(&fctx, result);
4410}
4411
4412static void
4413fctx_destroy(fetchctx_t *fctx, bool exiting) {
4414	dns_resolver_t *res = NULL;
4415	isc_sockaddr_t *sa = NULL, *next_sa = NULL;
4416	struct tried *tried = NULL;
4417	unsigned int bucketnum;
4418	bool bucket_empty = false;
4419	uint_fast32_t nfctx;
4420
4421	REQUIRE(VALID_FCTX(fctx));
4422	REQUIRE(ISC_LIST_EMPTY(fctx->events));
4423	REQUIRE(ISC_LIST_EMPTY(fctx->queries));
4424	REQUIRE(ISC_LIST_EMPTY(fctx->finds));
4425	REQUIRE(ISC_LIST_EMPTY(fctx->altfinds));
4426	REQUIRE(atomic_load_acquire(&fctx->pending) == 0);
4427	REQUIRE(ISC_LIST_EMPTY(fctx->validators));
4428
4429	FCTXTRACE("destroy");
4430
4431	fctx->magic = 0;
4432
4433	res = fctx->res;
4434	bucketnum = fctx->bucketnum;
4435
4436	LOCK(&res->buckets[bucketnum].lock);
4437	REQUIRE(fctx->state != fetchstate_active);
4438
4439	ISC_LIST_UNLINK(res->buckets[bucketnum].fctxs, fctx, link);
4440
4441	nfctx = atomic_fetch_sub_release(&res->nfctx, 1);
4442	INSIST(nfctx > 0);
4443
4444	dec_stats(res, dns_resstatscounter_nfetch);
4445
4446	if (atomic_load_acquire(&res->buckets[bucketnum].exiting) &&
4447	    ISC_LIST_EMPTY(res->buckets[bucketnum].fctxs))
4448	{
4449		bucket_empty = true;
4450	}
4451	UNLOCK(&res->buckets[bucketnum].lock);
4452
4453	if (bucket_empty && exiting &&
4454	    isc_refcount_decrement(&res->activebuckets) == 1)
4455	{
4456		send_shutdown_events(res);
4457	}
4458
4459	isc_refcount_destroy(&fctx->references);
4460
4461	/*
4462	 * Free bad.
4463	 */
4464	for (sa = ISC_LIST_HEAD(fctx->bad); sa != NULL; sa = next_sa) {
4465		next_sa = ISC_LIST_NEXT(sa, link);
4466		ISC_LIST_UNLINK(fctx->bad, sa, link);
4467		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
4468	}
4469
4470	for (tried = ISC_LIST_HEAD(fctx->edns); tried != NULL;
4471	     tried = ISC_LIST_HEAD(fctx->edns))
4472	{
4473		ISC_LIST_UNLINK(fctx->edns, tried, link);
4474		isc_mem_put(fctx->mctx, tried, sizeof(*tried));
4475	}
4476
4477	for (sa = ISC_LIST_HEAD(fctx->bad_edns); sa != NULL; sa = next_sa) {
4478		next_sa = ISC_LIST_NEXT(sa, link);
4479		ISC_LIST_UNLINK(fctx->bad_edns, sa, link);
4480		isc_mem_put(fctx->mctx, sa, sizeof(*sa));
4481	}
4482
4483	isc_counter_detach(&fctx->qc);
4484	fcount_decr(fctx);
4485	dns_message_detach(&fctx->qmessage);
4486	if (dns_rdataset_isassociated(&fctx->nameservers)) {
4487		dns_rdataset_disassociate(&fctx->nameservers);
4488	}
4489	dns_db_detach(&fctx->cache);
4490	dns_adb_detach(&fctx->adb);
4491
4492	isc_timer_destroy(&fctx->timer);
4493
4494	dns_resolver_detach(&fctx->res);
4495
4496	isc_mem_free(fctx->mctx, fctx->info);
4497	isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx));
4498}
4499
4500static void
4501fctx_shutdown(fetchctx_t *fctx) {
4502	isc_event_t *cevent = NULL;
4503
4504	FCTXTRACE("shutdown");
4505
4506	/*
4507	 * Start the shutdown process for fctx, if it isn't already
4508	 * under way.
4509	 */
4510	if (!atomic_compare_exchange_strong_acq_rel(&fctx->want_shutdown,
4511						    &(bool){ false }, true))
4512	{
4513		FCTXTRACE("already shut down");
4514		return;
4515	}
4516
4517	/*
4518	 * Unless we're still initializing (in which case the
4519	 * control event is still outstanding), we need to post
4520	 * the control event to tell the fetch we want it to
4521	 * exit.
4522	 */
4523	if (fctx->state != fetchstate_init) {
4524		FCTXTRACE("posting control event");
4525		cevent = &fctx->control_event;
4526		isc_task_sendto(fctx->res->buckets[fctx->bucketnum].task,
4527				&cevent, fctx->bucketnum);
4528	}
4529}
4530
4531static void
4532fctx_doshutdown(isc_task_t *task, isc_event_t *event) {
4533	fetchctx_t *fctx = event->ev_arg;
4534	dns_resolver_t *res = NULL;
4535	unsigned int bucketnum;
4536	dns_validator_t *validator = NULL;
4537
4538	REQUIRE(VALID_FCTX(fctx));
4539
4540	UNUSED(task);
4541
4542	res = fctx->res;
4543	bucketnum = fctx->bucketnum;
4544
4545	FCTXTRACE("doshutdown");
4546
4547	/*
4548	 * An fctx that is shutting down is no longer in ADDRWAIT mode.
4549	 */
4550	FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
4551
4552	/*
4553	 * Cancel all pending validators.  Note that this must be done
4554	 * without the bucket lock held, since that could cause
4555	 * deadlock.
4556	 */
4557	validator = ISC_LIST_HEAD(fctx->validators);
4558	while (validator != NULL) {
4559		dns_validator_cancel(validator);
4560		validator = ISC_LIST_NEXT(validator, link);
4561	}
4562
4563	if (fctx->nsfetch != NULL) {
4564		dns_resolver_cancelfetch(fctx->nsfetch);
4565	}
4566
4567	if (fctx->qminfetch != NULL) {
4568		dns_resolver_cancelfetch(fctx->qminfetch);
4569	}
4570
4571	/*
4572	 * Shut down anything still running on behalf of this
4573	 * fetch, and clean up finds and addresses.  To avoid deadlock
4574	 * with the ADB, we must do this before we lock the bucket lock.
4575	 * Increment the fctx references to avoid a race.
4576	 */
4577	fctx_cancelqueries(fctx, false, false);
4578	fctx_cleanup(fctx);
4579
4580	LOCK(&res->buckets[bucketnum].lock);
4581
4582	FCTX_ATTR_SET(fctx, FCTX_ATTR_SHUTTINGDOWN);
4583
4584	INSIST(fctx->state != fetchstate_init);
4585	INSIST(atomic_load_acquire(&fctx->want_shutdown));
4586
4587	if (fctx->state == fetchstate_active) {
4588		fctx->state = fetchstate_done;
4589
4590		fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__);
4591
4592		/* Detach the extra ref from dns_resolver_createfetch(). */
4593		fctx_unref(fctx);
4594	}
4595
4596	UNLOCK(&res->buckets[bucketnum].lock);
4597
4598	fctx_detach(&fctx);
4599}
4600
4601static void
4602fctx_start(isc_task_t *task, isc_event_t *event) {
4603	fetchctx_t *fctx = event->ev_arg;
4604	dns_resolver_t *res = NULL;
4605	unsigned int bucketnum;
4606	isc_result_t result;
4607
4608	REQUIRE(VALID_FCTX(fctx));
4609
4610	UNUSED(task);
4611
4612	res = fctx->res;
4613	bucketnum = fctx->bucketnum;
4614
4615	FCTXTRACE("start");
4616
4617	LOCK(&res->buckets[bucketnum].lock);
4618
4619	INSIST(fctx->state == fetchstate_init);
4620	if (atomic_load_acquire(&fctx->want_shutdown)) {
4621		/*
4622		 * We haven't started this fctx yet, but we've been
4623		 * requested to shut it down. Since we haven't started,
4624		 * we INSIST that we have no pending ADB finds or
4625		 * validations.
4626		 */
4627		INSIST(atomic_load_acquire(&fctx->pending) == 0);
4628		INSIST(atomic_load_acquire(&fctx->nqueries) == 0);
4629		INSIST(ISC_LIST_EMPTY(fctx->validators));
4630		UNLOCK(&res->buckets[bucketnum].lock);
4631
4632		FCTX_ATTR_SET(fctx, FCTX_ATTR_SHUTTINGDOWN);
4633
4634		/* Detach the extra ref from dns_resolver_createfetch(). */
4635		fctx_unref(fctx);
4636		fctx_done_detach(&fctx, ISC_R_SHUTTINGDOWN);
4637		return;
4638	}
4639
4640	/*
4641	 * Normal fctx startup.
4642	 */
4643	fctx->state = fetchstate_active;
4644
4645	/*
4646	 * Reset the control event for later use in shutting
4647	 * down the fctx.
4648	 */
4649	ISC_EVENT_INIT(event, sizeof(*event), 0, NULL, DNS_EVENT_FETCHCONTROL,
4650		       fctx_doshutdown, fctx, NULL, NULL, NULL);
4651
4652	UNLOCK(&res->buckets[bucketnum].lock);
4653
4654	result = fctx_starttimer(fctx);
4655	if (result != ISC_R_SUCCESS) {
4656		fctx_done_detach(&fctx, result);
4657	} else {
4658		fctx_try(fctx, false, false);
4659	}
4660}
4661
4662/*
4663 * Fetch Creation, Joining, and Cancellation.
4664 */
4665
4666static void
4667fctx_add_event(fetchctx_t *fctx, isc_task_t *task, const isc_sockaddr_t *client,
4668	       dns_messageid_t id, isc_taskaction_t action, void *arg,
4669	       dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
4670	       dns_fetch_t *fetch, isc_eventtype_t event_type) {
4671	dns_fetchevent_t *event = NULL;
4672
4673	FCTXTRACE("addevent");
4674
4675	/*
4676	 * We store the task we're going to send this event to in the
4677	 * sender field.  We'll make the fetch the sender when we
4678	 * actually send the event.
4679	 */
4680	isc_task_attach(task, &(isc_task_t *){ NULL });
4681	event = (dns_fetchevent_t *)isc_event_allocate(
4682		fctx->res->mctx, task, event_type, action, arg, sizeof(*event));
4683	event->result = DNS_R_SERVFAIL;
4684	event->qtype = fctx->type;
4685	event->db = NULL;
4686	event->node = NULL;
4687	event->rdataset = rdataset;
4688	event->sigrdataset = sigrdataset;
4689	event->fetch = fetch;
4690	event->client = client;
4691	event->id = id;
4692	event->foundname = dns_fixedname_initname(&event->fname);
4693
4694	/*
4695	 * Store the sigrdataset in the first event in case it is needed
4696	 * by any of the events.
4697	 */
4698	if (event->sigrdataset != NULL) {
4699		ISC_LIST_PREPEND(fctx->events, event, ev_link);
4700	} else {
4701		ISC_LIST_APPEND(fctx->events, event, ev_link);
4702	}
4703}
4704
4705static isc_result_t
4706fctx_join(fetchctx_t *fctx, isc_task_t *task, const isc_sockaddr_t *client,
4707	  dns_messageid_t id, isc_taskaction_t action, void *arg,
4708	  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
4709	  dns_fetch_t *fetch) {
4710	FCTXTRACE("join");
4711
4712	fctx_add_event(fctx, task, client, id, action, arg, rdataset,
4713		       sigrdataset, fetch, DNS_EVENT_FETCHDONE);
4714
4715	fetch->magic = DNS_FETCH_MAGIC;
4716	fctx_attach(fctx, &fetch->private);
4717
4718	return (ISC_R_SUCCESS);
4719}
4720
4721static void
4722log_ns_ttl(fetchctx_t *fctx, const char *where) {
4723	char namebuf[DNS_NAME_FORMATSIZE];
4724	char domainbuf[DNS_NAME_FORMATSIZE];
4725
4726	dns_name_format(fctx->name, namebuf, sizeof(namebuf));
4727	dns_name_format(fctx->domain, domainbuf, sizeof(domainbuf));
4728	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4729		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
4730		      "log_ns_ttl: fctx %p: %s: %s (in '%s'?): %u %u", fctx,
4731		      where, namebuf, domainbuf, fctx->ns_ttl_ok, fctx->ns_ttl);
4732}
4733
4734static void
4735fctx_expired(isc_task_t *task, isc_event_t *event) {
4736	fetchctx_t *fctx = event->ev_arg;
4737
4738	REQUIRE(VALID_FCTX(fctx));
4739
4740	UNUSED(task);
4741
4742	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
4743		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
4744		      "shut down hung fetch while resolving '%s'", fctx->info);
4745	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4746	fctx_shutdown(fctx);
4747	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
4748	isc_event_free(&event);
4749}
4750
4751static isc_result_t
4752fctx_create(dns_resolver_t *res, isc_task_t *task, const dns_name_t *name,
4753	    dns_rdatatype_t type, const dns_name_t *domain,
4754	    dns_rdataset_t *nameservers, const isc_sockaddr_t *client,
4755	    unsigned int options, unsigned int bucketnum, unsigned int depth,
4756	    isc_counter_t *qc, fetchctx_t **fctxp) {
4757	fetchctx_t *fctx = NULL;
4758	isc_result_t result;
4759	isc_result_t iresult;
4760	isc_interval_t interval;
4761	unsigned int findoptions = 0;
4762	char buf[DNS_NAME_FORMATSIZE + DNS_RDATATYPE_FORMATSIZE + 1];
4763	uint_fast32_t nfctx;
4764	size_t p;
4765
4766	/*
4767	 * Caller must be holding the lock for bucket number
4768	 * 'bucketnum'.
4769	 */
4770	REQUIRE(fctxp != NULL && *fctxp == NULL);
4771
4772	fctx = isc_mem_get(res->mctx, sizeof(*fctx));
4773	*fctx = (fetchctx_t){
4774		.type = type,
4775		.qmintype = type,
4776		.options = options,
4777		.task = task,
4778		.bucketnum = bucketnum,
4779		.dbucketnum = RES_NOBUCKET,
4780		.state = fetchstate_init,
4781		.depth = depth,
4782		.qmin_labels = 1,
4783		.fwdpolicy = dns_fwdpolicy_none,
4784		.result = ISC_R_FAILURE,
4785		.exitline = -1, /* sentinel */
4786	};
4787
4788	dns_resolver_attach(res, &fctx->res);
4789
4790	if (qc != NULL) {
4791		isc_counter_attach(qc, &fctx->qc);
4792	} else {
4793		result = isc_counter_create(res->mctx, res->maxqueries,
4794					    &fctx->qc);
4795		if (result != ISC_R_SUCCESS) {
4796			goto cleanup_fetch;
4797		}
4798	}
4799
4800	/*
4801	 * Make fctx->info point to a copy of a formatted string
4802	 * "name/type". FCTXTRACE won't work until this is done.
4803	 */
4804	dns_name_format(name, buf, sizeof(buf));
4805	p = strlcat(buf, "/", sizeof(buf));
4806	INSIST(p + DNS_RDATATYPE_FORMATSIZE < sizeof(buf));
4807	dns_rdatatype_format(type, buf + p, sizeof(buf) - p);
4808	fctx->info = isc_mem_strdup(res->mctx, buf);
4809
4810	FCTXTRACE("create");
4811
4812	isc_refcount_init(&fctx->references, 1);
4813
4814	ISC_LIST_INIT(fctx->queries);
4815	ISC_LIST_INIT(fctx->finds);
4816	ISC_LIST_INIT(fctx->altfinds);
4817	ISC_LIST_INIT(fctx->forwaddrs);
4818	ISC_LIST_INIT(fctx->altaddrs);
4819	ISC_LIST_INIT(fctx->forwarders);
4820	ISC_LIST_INIT(fctx->bad);
4821	ISC_LIST_INIT(fctx->edns);
4822	ISC_LIST_INIT(fctx->bad_edns);
4823	ISC_LIST_INIT(fctx->validators);
4824
4825	atomic_init(&fctx->attributes, 0);
4826
4827	fctx->name = dns_fixedname_initname(&fctx->fname);
4828	fctx->nsname = dns_fixedname_initname(&fctx->nsfname);
4829	fctx->domain = dns_fixedname_initname(&fctx->dfname);
4830	fctx->qminname = dns_fixedname_initname(&fctx->qminfname);
4831	fctx->qmindcname = dns_fixedname_initname(&fctx->qmindcfname);
4832	fctx->fwdname = dns_fixedname_initname(&fctx->fwdfname);
4833
4834	dns_name_copy(name, fctx->name);
4835	dns_name_copy(name, fctx->qminname);
4836
4837	dns_rdataset_init(&fctx->nameservers);
4838	dns_rdataset_init(&fctx->qminrrset);
4839	dns_rdataset_init(&fctx->nsrrset);
4840
4841	TIME_NOW(&fctx->start);
4842	fctx->now = (isc_stdtime_t)fctx->start.seconds;
4843
4844	if (client != NULL) {
4845		isc_sockaddr_format(client, fctx->clientstr,
4846				    sizeof(fctx->clientstr));
4847	} else {
4848		strlcpy(fctx->clientstr, "<unknown>", sizeof(fctx->clientstr));
4849	}
4850
4851	if (domain == NULL) {
4852		dns_forwarders_t *forwarders = NULL;
4853		dns_fixedname_t fixed;
4854		dns_name_t *fname = dns_fixedname_initname(&fixed);
4855		unsigned int labels;
4856		const dns_name_t *fwdname = name;
4857		dns_name_t suffix;
4858
4859		/*
4860		 * DS records are found in the parent server. Strip one
4861		 * leading label from the name (to be used in finding
4862		 * the forwarder).
4863		 */
4864		if (dns_rdatatype_atparent(fctx->type) &&
4865		    dns_name_countlabels(name) > 1)
4866		{
4867			dns_name_init(&suffix, NULL);
4868			labels = dns_name_countlabels(name);
4869			dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
4870			fwdname = &suffix;
4871		}
4872
4873		/* Find the forwarder for this name. */
4874		result = dns_fwdtable_find(fctx->res->view->fwdtable, fwdname,
4875					   fname, &forwarders);
4876		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
4877			fctx->fwdpolicy = forwarders->fwdpolicy;
4878			dns_name_copy(fname, fctx->fwdname);
4879		}
4880
4881		if (fctx->fwdpolicy != dns_fwdpolicy_only) {
4882			dns_fixedname_t dcfixed;
4883			dns_name_t *dcname = dns_fixedname_initname(&dcfixed);
4884
4885			/*
4886			 * The caller didn't supply a query domain and
4887			 * nameservers, and we're not in forward-only
4888			 * mode, so find the best nameservers to use.
4889			 */
4890			if (dns_rdatatype_atparent(fctx->type)) {
4891				findoptions |= DNS_DBFIND_NOEXACT;
4892			}
4893			result = dns_view_findzonecut(res->view, name, fname,
4894						      dcname, fctx->now,
4895						      findoptions, true, true,
4896						      &fctx->nameservers, NULL);
4897			if (result != ISC_R_SUCCESS) {
4898				goto cleanup_nameservers;
4899			}
4900
4901			dns_name_copy(fname, fctx->domain);
4902			dns_name_copy(dcname, fctx->qmindcname);
4903			fctx->ns_ttl = fctx->nameservers.ttl;
4904			fctx->ns_ttl_ok = true;
4905		} else {
4906			/*
4907			 * We're in forward-only mode.  Set the query
4908			 * domain.
4909			 */
4910			dns_name_copy(fname, fctx->domain);
4911			dns_name_copy(fname, fctx->qmindcname);
4912			/*
4913			 * Disable query minimization
4914			 */
4915			options &= ~DNS_FETCHOPT_QMINIMIZE;
4916		}
4917	} else {
4918		dns_name_copy(domain, fctx->domain);
4919		dns_name_copy(domain, fctx->qmindcname);
4920		dns_rdataset_clone(nameservers, &fctx->nameservers);
4921		fctx->ns_ttl = fctx->nameservers.ttl;
4922		fctx->ns_ttl_ok = true;
4923	}
4924
4925	/*
4926	 * Are there too many simultaneous queries for this domain?
4927	 */
4928	result = fcount_incr(fctx, false);
4929	if (result != ISC_R_SUCCESS) {
4930		result = fctx->res->quotaresp[dns_quotatype_zone];
4931		inc_stats(res, dns_resstatscounter_zonequota);
4932		goto cleanup_nameservers;
4933	}
4934
4935	log_ns_ttl(fctx, "fctx_create");
4936
4937	if (!dns_name_issubdomain(fctx->name, fctx->domain)) {
4938		dns_name_format(fctx->domain, buf, sizeof(buf));
4939		UNEXPECTED_ERROR("'%s' is not subdomain of '%s'", fctx->info,
4940				 buf);
4941		result = ISC_R_UNEXPECTED;
4942		goto cleanup_fcount;
4943	}
4944
4945	dns_message_create(res->mctx, DNS_MESSAGE_INTENTRENDER,
4946			   &fctx->qmessage);
4947
4948	/*
4949	 * Compute an expiration time for the entire fetch.
4950	 */
4951	isc_interval_set(&interval, res->query_timeout / 1000,
4952			 res->query_timeout % 1000 * 1000000);
4953	iresult = isc_time_nowplusinterval(&fctx->expires, &interval);
4954	if (iresult != ISC_R_SUCCESS) {
4955		UNEXPECTED_ERROR("isc_time_nowplusinterval: %s",
4956				 isc_result_totext(iresult));
4957		result = ISC_R_UNEXPECTED;
4958		goto cleanup_qmessage;
4959	}
4960
4961	/*
4962	 * As a backstop, we also set a timer to stop the fetch
4963	 * if in-band netmgr timeouts don't work. It will fire two
4964	 * seconds after the fetch should have finished. (This
4965	 * should be enough of a gap to avoid the timer firing
4966	 * while a response is being processed normally.)
4967	 */
4968	isc_interval_set(&interval, 2, 0);
4969	iresult = isc_time_add(&fctx->expires, &interval, &fctx->final);
4970	if (iresult != ISC_R_SUCCESS) {
4971		UNEXPECTED_ERROR("isc_time_add: %s",
4972				 isc_result_totext(iresult));
4973		result = ISC_R_UNEXPECTED;
4974		goto cleanup_qmessage;
4975	}
4976
4977	/*
4978	 * Create an inactive timer to enforce maximum query
4979	 * lifetime. It will be made active when the fetch is
4980	 * started.
4981	 */
4982	iresult = isc_timer_create(res->timermgr, isc_timertype_inactive, NULL,
4983				   NULL, res->buckets[bucketnum].task,
4984				   fctx_expired, fctx, &fctx->timer);
4985	if (iresult != ISC_R_SUCCESS) {
4986		UNEXPECTED_ERROR("isc_timer_create: %s",
4987				 isc_result_totext(iresult));
4988		result = ISC_R_UNEXPECTED;
4989		goto cleanup_qmessage;
4990	}
4991
4992	/*
4993	 * Default retry interval initialization.  We set the interval
4994	 * now mostly so it won't be uninitialized.  It will be set to
4995	 * the correct value before a query is issued.
4996	 */
4997	isc_interval_set(&fctx->interval, 2, 0);
4998
4999	/*
5000	 * If stale answers are enabled, compute an expiration time
5001	 * after which stale data will be served, if the target RRset is
5002	 * available in cache.
5003	 */
5004	if ((options & DNS_FETCHOPT_TRYSTALE_ONTIMEOUT) != 0) {
5005		INSIST(res->view->staleanswerclienttimeout <=
5006		       (res->query_timeout - 1000));
5007		isc_interval_set(
5008			&interval, res->view->staleanswerclienttimeout / 1000,
5009			res->view->staleanswerclienttimeout % 1000 * 1000000);
5010		iresult = isc_time_nowplusinterval(&fctx->expires_try_stale,
5011						   &interval);
5012		if (iresult != ISC_R_SUCCESS) {
5013			UNEXPECTED_ERROR("isc_time_nowplusinterval: %s",
5014					 isc_result_totext(iresult));
5015			result = ISC_R_UNEXPECTED;
5016			goto cleanup_timer;
5017		}
5018	}
5019
5020	/*
5021	 * Attach to the view's cache and adb.
5022	 */
5023	dns_db_attach(res->view->cachedb, &fctx->cache);
5024	dns_adb_attach(res->view->adb, &fctx->adb);
5025	isc_mem_attach(res->mctx, &fctx->mctx);
5026
5027	ISC_LIST_INIT(fctx->events);
5028	ISC_LINK_INIT(fctx, link);
5029	fctx->magic = FCTX_MAGIC;
5030
5031	/*
5032	 * If qname minimization is enabled we need to trim
5033	 * the name in fctx to proper length.
5034	 */
5035	if ((options & DNS_FETCHOPT_QMINIMIZE) != 0) {
5036		fctx->ip6arpaskip = (options & DNS_FETCHOPT_QMIN_SKIP_IP6A) !=
5037					    0 &&
5038				    dns_name_issubdomain(fctx->name, &ip6_arpa);
5039		fctx_minimize_qname(fctx);
5040	}
5041
5042	ISC_LIST_APPEND(res->buckets[bucketnum].fctxs, fctx, link);
5043
5044	nfctx = atomic_fetch_add_relaxed(&res->nfctx, 1);
5045	INSIST(nfctx < UINT32_MAX);
5046
5047	inc_stats(res, dns_resstatscounter_nfetch);
5048
5049	*fctxp = fctx;
5050
5051	return (ISC_R_SUCCESS);
5052
5053cleanup_timer:
5054	isc_timer_destroy(&fctx->timer);
5055
5056cleanup_qmessage:
5057	dns_message_detach(&fctx->qmessage);
5058
5059cleanup_fcount:
5060	fcount_decr(fctx);
5061
5062cleanup_nameservers:
5063	if (dns_rdataset_isassociated(&fctx->nameservers)) {
5064		dns_rdataset_disassociate(&fctx->nameservers);
5065	}
5066	isc_mem_free(res->mctx, fctx->info);
5067	isc_counter_detach(&fctx->qc);
5068
5069cleanup_fetch:
5070	dns_resolver_detach(&fctx->res);
5071	isc_mem_put(res->mctx, fctx, sizeof(*fctx));
5072
5073	return (result);
5074}
5075
5076/*
5077 * Handle Responses
5078 */
5079static bool
5080is_lame(fetchctx_t *fctx, dns_message_t *message) {
5081	dns_name_t *name;
5082	dns_rdataset_t *rdataset;
5083	isc_result_t result;
5084
5085	if (message->rcode != dns_rcode_noerror &&
5086	    message->rcode != dns_rcode_yxdomain &&
5087	    message->rcode != dns_rcode_nxdomain)
5088	{
5089		return (false);
5090	}
5091
5092	if (message->counts[DNS_SECTION_ANSWER] != 0) {
5093		return (false);
5094	}
5095
5096	if (message->counts[DNS_SECTION_AUTHORITY] == 0) {
5097		return (false);
5098	}
5099
5100	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
5101	while (result == ISC_R_SUCCESS) {
5102		name = NULL;
5103		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
5104		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
5105		     rdataset = ISC_LIST_NEXT(rdataset, link))
5106		{
5107			dns_namereln_t namereln;
5108			int order;
5109			unsigned int labels;
5110			if (rdataset->type != dns_rdatatype_ns) {
5111				continue;
5112			}
5113			namereln = dns_name_fullcompare(name, fctx->domain,
5114							&order, &labels);
5115			if (namereln == dns_namereln_equal &&
5116			    (message->flags & DNS_MESSAGEFLAG_AA) != 0)
5117			{
5118				return (false);
5119			}
5120			if (namereln == dns_namereln_subdomain) {
5121				return (false);
5122			}
5123			return (true);
5124		}
5125		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
5126	}
5127
5128	return (false);
5129}
5130
5131static void
5132log_lame(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo) {
5133	char namebuf[DNS_NAME_FORMATSIZE];
5134	char domainbuf[DNS_NAME_FORMATSIZE];
5135	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
5136
5137	dns_name_format(fctx->name, namebuf, sizeof(namebuf));
5138	dns_name_format(fctx->domain, domainbuf, sizeof(domainbuf));
5139	isc_sockaddr_format(&addrinfo->sockaddr, addrbuf, sizeof(addrbuf));
5140	isc_log_write(dns_lctx, DNS_LOGCATEGORY_LAME_SERVERS,
5141		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
5142		      "lame server resolving '%s' (in '%s'?): %s", namebuf,
5143		      domainbuf, addrbuf);
5144}
5145
5146static void
5147log_formerr(fetchctx_t *fctx, const char *format, ...) {
5148	char nsbuf[ISC_SOCKADDR_FORMATSIZE];
5149	char msgbuf[2048];
5150	va_list args;
5151
5152	va_start(args, format);
5153	vsnprintf(msgbuf, sizeof(msgbuf), format, args);
5154	va_end(args);
5155
5156	isc_sockaddr_format(&fctx->addrinfo->sockaddr, nsbuf, sizeof(nsbuf));
5157
5158	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5159		      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
5160		      "DNS format error from %s resolving %s for %s: %s", nsbuf,
5161		      fctx->info, fctx->clientstr, msgbuf);
5162}
5163
5164static isc_result_t
5165same_question(fetchctx_t *fctx, dns_message_t *message) {
5166	isc_result_t result;
5167	dns_name_t *name = NULL;
5168	dns_rdataset_t *rdataset = NULL;
5169
5170	/*
5171	 * Caller must be holding the fctx lock.
5172	 */
5173
5174	/*
5175	 * XXXRTH  Currently we support only one question.
5176	 */
5177	if (message->counts[DNS_SECTION_QUESTION] == 0) {
5178		if ((message->flags & DNS_MESSAGEFLAG_TC) != 0) {
5179			/*
5180			 * If TC=1 and the question section is empty, we
5181			 * accept the reply message as a truncated
5182			 * answer, to be retried over TCP.
5183			 *
5184			 * It is really a FORMERR condition, but this is
5185			 * a workaround to accept replies from some
5186			 * implementations.
5187			 *
5188			 * Because the question section matching is not
5189			 * performed, the worst that could happen is
5190			 * that an attacker who gets past the ID and
5191			 * source port checks can force the use of
5192			 * TCP. This is considered an acceptable risk.
5193			 */
5194			log_formerr(fctx, "empty question section, "
5195					  "accepting it anyway as TC=1");
5196			return (ISC_R_SUCCESS);
5197		} else {
5198			log_formerr(fctx, "empty question section");
5199			return (DNS_R_FORMERR);
5200		}
5201	} else if (message->counts[DNS_SECTION_QUESTION] > 1) {
5202		log_formerr(fctx, "too many questions");
5203		return (DNS_R_FORMERR);
5204	}
5205
5206	result = dns_message_firstname(message, DNS_SECTION_QUESTION);
5207	if (result != ISC_R_SUCCESS) {
5208		return (result);
5209	}
5210
5211	dns_message_currentname(message, DNS_SECTION_QUESTION, &name);
5212	rdataset = ISC_LIST_HEAD(name->list);
5213	INSIST(rdataset != NULL);
5214	INSIST(ISC_LIST_NEXT(rdataset, link) == NULL);
5215
5216	if (fctx->type != rdataset->type ||
5217	    fctx->res->rdclass != rdataset->rdclass ||
5218	    !dns_name_equal(fctx->name, name))
5219	{
5220		char namebuf[DNS_NAME_FORMATSIZE];
5221		char classbuf[DNS_RDATACLASS_FORMATSIZE];
5222		char typebuf[DNS_RDATATYPE_FORMATSIZE];
5223
5224		dns_name_format(name, namebuf, sizeof(namebuf));
5225		dns_rdataclass_format(rdataset->rdclass, classbuf,
5226				      sizeof(classbuf));
5227		dns_rdatatype_format(rdataset->type, typebuf, sizeof(typebuf));
5228		log_formerr(fctx, "question section mismatch: got %s/%s/%s",
5229			    namebuf, classbuf, typebuf);
5230		return (DNS_R_FORMERR);
5231	}
5232
5233	return (ISC_R_SUCCESS);
5234}
5235
5236static void
5237clone_results(fetchctx_t *fctx) {
5238	dns_fetchevent_t *event = NULL, *hevent = NULL;
5239
5240	FCTXTRACE("clone_results");
5241
5242	/*
5243	 * Set up any other events to have the same data as the first
5244	 * event.
5245	 *
5246	 * Caller must be holding the appropriate lock.
5247	 */
5248
5249	fctx->cloned = true;
5250
5251	for (event = ISC_LIST_HEAD(fctx->events); event != NULL;
5252	     event = ISC_LIST_NEXT(event, ev_link))
5253	{
5254		/* This is the the head event; keep a pointer and move
5255		 * on */
5256		if (hevent == NULL) {
5257			hevent = ISC_LIST_HEAD(fctx->events);
5258			continue;
5259		}
5260
5261		if (event->ev_type == DNS_EVENT_TRYSTALE) {
5262			/*
5263			 * We don't need to clone resulting data to this
5264			 * type of event, as its associated callback is
5265			 * only called when stale-answer-client-timeout
5266			 * triggers, and the logic in there doesn't
5267			 * expect any result as input, as it will itself
5268			 * lookup for stale data in cache to use as
5269			 * result, if any is available.
5270			 *
5271			 * Also, if we reached this point, then the
5272			 * whole fetch context is done, it will cancel
5273			 * timers, process associated callbacks of type
5274			 * DNS_EVENT_FETCHDONE, and silently remove/free
5275			 * events of type DNS_EVENT_TRYSTALE.
5276			 */
5277			continue;
5278		}
5279
5280		event->result = hevent->result;
5281		dns_name_copy(hevent->foundname, event->foundname);
5282		dns_db_attach(hevent->db, &event->db);
5283		dns_db_attachnode(hevent->db, hevent->node, &event->node);
5284
5285		INSIST(hevent->rdataset != NULL);
5286		INSIST(event->rdataset != NULL);
5287		if (dns_rdataset_isassociated(hevent->rdataset)) {
5288			dns_rdataset_clone(hevent->rdataset, event->rdataset);
5289		}
5290
5291		INSIST(!(hevent->sigrdataset == NULL &&
5292			 event->sigrdataset != NULL));
5293		if (hevent->sigrdataset != NULL &&
5294		    dns_rdataset_isassociated(hevent->sigrdataset) &&
5295		    event->sigrdataset != NULL)
5296		{
5297			dns_rdataset_clone(hevent->sigrdataset,
5298					   event->sigrdataset);
5299		}
5300	}
5301}
5302
5303#define CACHE(r)      (((r)->attributes & DNS_RDATASETATTR_CACHE) != 0)
5304#define ANSWER(r)     (((r)->attributes & DNS_RDATASETATTR_ANSWER) != 0)
5305#define ANSWERSIG(r)  (((r)->attributes & DNS_RDATASETATTR_ANSWERSIG) != 0)
5306#define EXTERNAL(r)   (((r)->attributes & DNS_RDATASETATTR_EXTERNAL) != 0)
5307#define CHAINING(r)   (((r)->attributes & DNS_RDATASETATTR_CHAINING) != 0)
5308#define CHASE(r)      (((r)->attributes & DNS_RDATASETATTR_CHASE) != 0)
5309#define CHECKNAMES(r) (((r)->attributes & DNS_RDATASETATTR_CHECKNAMES) != 0)
5310
5311/*
5312 * Cancel validators associated with '*fctx' if it is ready to be
5313 * destroyed (i.e., no queries waiting for it and no pending ADB finds).
5314 *
5315 * Requires:
5316 *      '*fctx' is shutting down.
5317 */
5318static void
5319maybe_cancel_validators(fetchctx_t *fctx, bool locked) {
5320	unsigned int bucketnum;
5321	dns_resolver_t *res = fctx->res;
5322	dns_validator_t *validator, *next_validator;
5323
5324	bucketnum = fctx->bucketnum;
5325	if (!locked) {
5326		LOCK(&res->buckets[bucketnum].lock);
5327	}
5328
5329	REQUIRE(SHUTTINGDOWN(fctx));
5330
5331	if (atomic_load_acquire(&fctx->pending) != 0 ||
5332	    atomic_load_acquire(&fctx->nqueries) != 0)
5333	{
5334		goto unlock;
5335	}
5336
5337	for (validator = ISC_LIST_HEAD(fctx->validators); validator != NULL;
5338	     validator = next_validator)
5339	{
5340		next_validator = ISC_LIST_NEXT(validator, link);
5341		dns_validator_cancel(validator);
5342	}
5343unlock:
5344	if (!locked) {
5345		UNLOCK(&res->buckets[bucketnum].lock);
5346	}
5347}
5348
5349/*
5350 * typemap with just RRSIG(46) and NSEC(47) bits set.
5351 *
5352 * Bitmap calculation from dns_nsec_setbit:
5353 *
5354 *					46	47
5355 *	shift = 7 - (type % 8);		0	1
5356 *	mask = 1 << shift;		0x02	0x01
5357 *	array[type / 8] |= mask;
5358 *
5359 * Window (0), bitmap length (6), and bitmap.
5360 */
5361static const unsigned char minimal_typemap[] = { 0, 6, 0, 0, 0, 0, 0, 0x03 };
5362
5363static bool
5364is_minimal_nsec(dns_rdataset_t *nsecset) {
5365	dns_rdataset_t rdataset;
5366	isc_result_t result;
5367
5368	dns_rdataset_init(&rdataset);
5369	dns_rdataset_clone(nsecset, &rdataset);
5370
5371	for (result = dns_rdataset_first(&rdataset); result == ISC_R_SUCCESS;
5372	     result = dns_rdataset_next(&rdataset))
5373	{
5374		dns_rdata_t rdata = DNS_RDATA_INIT;
5375		dns_rdata_nsec_t nsec;
5376		dns_rdataset_current(&rdataset, &rdata);
5377		result = dns_rdata_tostruct(&rdata, &nsec, NULL);
5378		RUNTIME_CHECK(result == ISC_R_SUCCESS);
5379		if (nsec.len == sizeof(minimal_typemap) &&
5380		    memcmp(nsec.typebits, minimal_typemap, nsec.len) == 0)
5381		{
5382			dns_rdataset_disassociate(&rdataset);
5383			return (true);
5384		}
5385	}
5386	dns_rdataset_disassociate(&rdataset);
5387	return (false);
5388}
5389
5390/*
5391 * If there is a SOA record in the type map then there must be a DNSKEY.
5392 */
5393static bool
5394check_soa_and_dnskey(dns_rdataset_t *nsecset) {
5395	dns_rdataset_t rdataset;
5396	isc_result_t result;
5397
5398	dns_rdataset_init(&rdataset);
5399	dns_rdataset_clone(nsecset, &rdataset);
5400
5401	for (result = dns_rdataset_first(&rdataset); result == ISC_R_SUCCESS;
5402	     result = dns_rdataset_next(&rdataset))
5403	{
5404		dns_rdata_t rdata = DNS_RDATA_INIT;
5405		dns_rdataset_current(&rdataset, &rdata);
5406		if (dns_nsec_typepresent(&rdata, dns_rdatatype_soa) &&
5407		    (!dns_nsec_typepresent(&rdata, dns_rdatatype_dnskey) ||
5408		     !dns_nsec_typepresent(&rdata, dns_rdatatype_ns)))
5409		{
5410			dns_rdataset_disassociate(&rdataset);
5411			return (false);
5412		}
5413	}
5414	dns_rdataset_disassociate(&rdataset);
5415	return (true);
5416}
5417
5418/*
5419 * Look for NSEC next name that starts with the label '\000'.
5420 */
5421static bool
5422has_000_label(dns_rdataset_t *nsecset) {
5423	dns_rdataset_t rdataset;
5424	isc_result_t result;
5425
5426	dns_rdataset_init(&rdataset);
5427	dns_rdataset_clone(nsecset, &rdataset);
5428
5429	for (result = dns_rdataset_first(&rdataset); result == ISC_R_SUCCESS;
5430	     result = dns_rdataset_next(&rdataset))
5431	{
5432		dns_rdata_t rdata = DNS_RDATA_INIT;
5433		dns_rdataset_current(&rdataset, &rdata);
5434		if (rdata.length > 1 && rdata.data[0] == 1 &&
5435		    rdata.data[1] == 0)
5436		{
5437			dns_rdataset_disassociate(&rdataset);
5438			return (true);
5439		}
5440	}
5441	dns_rdataset_disassociate(&rdataset);
5442	return (false);
5443}
5444
5445/*
5446 * The validator has finished.
5447 */
5448static void
5449validated(isc_task_t *task, isc_event_t *event) {
5450	dns_adbaddrinfo_t *addrinfo = NULL;
5451	dns_dbnode_t *node = NULL;
5452	dns_dbnode_t *nsnode = NULL;
5453	dns_fetchevent_t *hevent = NULL;
5454	dns_name_t *name = NULL;
5455	dns_rdataset_t *ardataset = NULL;
5456	dns_rdataset_t *asigrdataset = NULL;
5457	dns_rdataset_t *rdataset = NULL;
5458	dns_rdataset_t *sigrdataset = NULL;
5459	dns_resolver_t *res = NULL;
5460	dns_valarg_t *valarg = NULL;
5461	dns_validatorevent_t *vevent = NULL;
5462	fetchctx_t *fctx = NULL;
5463	bool chaining;
5464	bool negative;
5465	bool sentresponse;
5466	isc_result_t eresult = ISC_R_SUCCESS;
5467	isc_result_t result = ISC_R_SUCCESS;
5468	isc_stdtime_t now;
5469	uint32_t ttl;
5470	unsigned options;
5471	uint32_t bucketnum;
5472	dns_fixedname_t fwild;
5473	dns_name_t *wild = NULL;
5474	dns_message_t *message = NULL;
5475
5476	UNUSED(task); /* for now */
5477
5478	REQUIRE(event->ev_type == DNS_EVENT_VALIDATORDONE);
5479	valarg = event->ev_arg;
5480
5481	REQUIRE(VALID_FCTX(valarg->fctx));
5482	REQUIRE(!ISC_LIST_EMPTY(valarg->fctx->validators));
5483
5484	fctx = valarg->fctx;
5485	valarg->fctx = NULL;
5486
5487	FCTXTRACE("received validation completion event");
5488
5489	res = fctx->res;
5490	addrinfo = valarg->addrinfo;
5491
5492	message = valarg->message;
5493	valarg->message = NULL;
5494
5495	vevent = (dns_validatorevent_t *)event;
5496	fctx->vresult = vevent->result;
5497
5498	bucketnum = fctx->bucketnum;
5499	LOCK(&res->buckets[bucketnum].lock);
5500	ISC_LIST_UNLINK(fctx->validators, vevent->validator, link);
5501	fctx->validator = NULL;
5502	UNLOCK(&res->buckets[bucketnum].lock);
5503
5504	/*
5505	 * Destroy the validator early so that we can
5506	 * destroy the fctx if necessary.  Save the wildcard name.
5507	 */
5508	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL) {
5509		wild = dns_fixedname_initname(&fwild);
5510		dns_name_copy(dns_fixedname_name(&vevent->validator->wild),
5511			      wild);
5512	}
5513	dns_validator_destroy(&vevent->validator);
5514	isc_mem_put(fctx->mctx, valarg, sizeof(*valarg));
5515
5516	negative = (vevent->rdataset == NULL);
5517
5518	LOCK(&res->buckets[bucketnum].lock);
5519	sentresponse = ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0);
5520
5521	/*
5522	 * If shutting down, ignore the results.  Check to see if we're
5523	 * done waiting for validator completions and ADB pending
5524	 * events; if so, destroy the fctx.
5525	 */
5526	if (SHUTTINGDOWN(fctx) && !sentresponse) {
5527		UNLOCK(&res->buckets[bucketnum].lock);
5528		fctx_detach(&fctx);
5529		goto cleanup_event;
5530	}
5531
5532	isc_stdtime_get(&now);
5533
5534	/*
5535	 * If chaining, we need to make sure that the right result code
5536	 * is returned, and that the rdatasets are bound.
5537	 */
5538	if (vevent->result == ISC_R_SUCCESS && !negative &&
5539	    vevent->rdataset != NULL && CHAINING(vevent->rdataset))
5540	{
5541		if (vevent->rdataset->type == dns_rdatatype_cname) {
5542			eresult = DNS_R_CNAME;
5543		} else {
5544			INSIST(vevent->rdataset->type == dns_rdatatype_dname);
5545			eresult = DNS_R_DNAME;
5546		}
5547		chaining = true;
5548	} else {
5549		chaining = false;
5550	}
5551
5552	/*
5553	 * Either we're not shutting down, or we are shutting down but
5554	 * want to cache the result anyway (if this was a validation
5555	 * started by a query with cd set)
5556	 */
5557
5558	hevent = ISC_LIST_HEAD(fctx->events);
5559	if (hevent != NULL) {
5560		if (!negative && !chaining &&
5561		    (fctx->type == dns_rdatatype_any ||
5562		     fctx->type == dns_rdatatype_rrsig ||
5563		     fctx->type == dns_rdatatype_sig))
5564		{
5565			/*
5566			 * Don't bind rdatasets; the caller
5567			 * will iterate the node.
5568			 */
5569		} else {
5570			ardataset = hevent->rdataset;
5571			asigrdataset = hevent->sigrdataset;
5572		}
5573	}
5574
5575	if (vevent->result != ISC_R_SUCCESS) {
5576		FCTXTRACE("validation failed");
5577		inc_stats(res, dns_resstatscounter_valfail);
5578		fctx->valfail++;
5579		fctx->vresult = vevent->result;
5580		if (fctx->vresult != DNS_R_BROKENCHAIN) {
5581			result = ISC_R_NOTFOUND;
5582			if (vevent->rdataset != NULL) {
5583				result = dns_db_findnode(
5584					fctx->cache, vevent->name, true, &node);
5585			}
5586			if (result == ISC_R_SUCCESS) {
5587				(void)dns_db_deleterdataset(fctx->cache, node,
5588							    NULL, vevent->type,
5589							    0);
5590			}
5591			if (result == ISC_R_SUCCESS &&
5592			    vevent->sigrdataset != NULL)
5593			{
5594				(void)dns_db_deleterdataset(
5595					fctx->cache, node, NULL,
5596					dns_rdatatype_rrsig, vevent->type);
5597			}
5598			if (result == ISC_R_SUCCESS) {
5599				dns_db_detachnode(fctx->cache, &node);
5600			}
5601		}
5602		if (fctx->vresult == DNS_R_BROKENCHAIN && !negative) {
5603			/*
5604			 * Cache the data as pending for later
5605			 * validation.
5606			 */
5607			result = ISC_R_NOTFOUND;
5608			if (vevent->rdataset != NULL) {
5609				result = dns_db_findnode(
5610					fctx->cache, vevent->name, true, &node);
5611			}
5612			if (result == ISC_R_SUCCESS) {
5613				(void)dns_db_addrdataset(
5614					fctx->cache, node, NULL, now,
5615					vevent->rdataset, 0, NULL);
5616			}
5617			if (result == ISC_R_SUCCESS &&
5618			    vevent->sigrdataset != NULL)
5619			{
5620				(void)dns_db_addrdataset(
5621					fctx->cache, node, NULL, now,
5622					vevent->sigrdataset, 0, NULL);
5623			}
5624			if (result == ISC_R_SUCCESS) {
5625				dns_db_detachnode(fctx->cache, &node);
5626			}
5627		}
5628		result = fctx->vresult;
5629		add_bad(fctx, message, addrinfo, result, badns_validation);
5630		dns_message_detach(&message);
5631		isc_event_free(&event);
5632
5633		UNLOCK(&res->buckets[bucketnum].lock);
5634		INSIST(fctx->validator == NULL);
5635
5636		fctx->validator = ISC_LIST_HEAD(fctx->validators);
5637		if (fctx->validator != NULL) {
5638			dns_validator_send(fctx->validator);
5639			fctx_detach(&fctx);
5640		} else if (sentresponse) {
5641			/* Detach the extra ref that was set in valcreate() */
5642			fctx_unref(fctx);
5643			fctx_done_detach(&fctx, result); /* Locks bucket */
5644		} else if (result == DNS_R_BROKENCHAIN) {
5645			isc_result_t tresult;
5646			isc_time_t expire;
5647			isc_interval_t i;
5648
5649			isc_interval_set(&i, DNS_RESOLVER_BADCACHETTL(fctx), 0);
5650			tresult = isc_time_nowplusinterval(&expire, &i);
5651			if (negative &&
5652			    (fctx->type == dns_rdatatype_dnskey ||
5653			     fctx->type == dns_rdatatype_ds) &&
5654			    tresult == ISC_R_SUCCESS)
5655			{
5656				dns_resolver_addbadcache(res, fctx->name,
5657							 fctx->type, &expire);
5658			}
5659
5660			/* Detach the extra ref that was set in valcreate() */
5661			fctx_unref(fctx);
5662			fctx_done_detach(&fctx, result); /* Locks bucket */
5663		} else {
5664			fctx_try(fctx, true, true); /* Locks bucket */
5665			fctx_detach(&fctx);
5666		}
5667		return;
5668	}
5669
5670	if (negative) {
5671		dns_rdatatype_t covers;
5672		FCTXTRACE("nonexistence validation OK");
5673
5674		inc_stats(res, dns_resstatscounter_valnegsuccess);
5675
5676		/*
5677		 * Cache DS NXDOMAIN separately to other types.
5678		 */
5679		if (message->rcode == dns_rcode_nxdomain &&
5680		    fctx->type != dns_rdatatype_ds)
5681		{
5682			covers = dns_rdatatype_any;
5683		} else {
5684			covers = fctx->type;
5685		}
5686
5687		result = dns_db_findnode(fctx->cache, vevent->name, true,
5688					 &node);
5689		if (result != ISC_R_SUCCESS) {
5690			goto noanswer_response;
5691		}
5692
5693		/*
5694		 * If we are asking for a SOA record set the cache time
5695		 * to zero to facilitate locating the containing zone of
5696		 * a arbitrary zone.
5697		 */
5698		ttl = res->view->maxncachettl;
5699		if (fctx->type == dns_rdatatype_soa &&
5700		    covers == dns_rdatatype_any && res->zero_no_soa_ttl)
5701		{
5702			ttl = 0;
5703		}
5704
5705		result = ncache_adderesult(message, fctx->cache, node, covers,
5706					   now, fctx->res->view->minncachettl,
5707					   ttl, vevent->optout, vevent->secure,
5708					   ardataset, &eresult);
5709		if (result != ISC_R_SUCCESS) {
5710			goto noanswer_response;
5711		}
5712		goto answer_response;
5713	} else {
5714		inc_stats(res, dns_resstatscounter_valsuccess);
5715	}
5716
5717	FCTXTRACE("validation OK");
5718
5719	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL) {
5720		result = dns_rdataset_addnoqname(
5721			vevent->rdataset,
5722			vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF]);
5723		RUNTIME_CHECK(result == ISC_R_SUCCESS);
5724		INSIST(vevent->sigrdataset != NULL);
5725		vevent->sigrdataset->ttl = vevent->rdataset->ttl;
5726		if (vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER] != NULL) {
5727			result = dns_rdataset_addclosest(
5728				vevent->rdataset,
5729				vevent->proofs[DNS_VALIDATOR_CLOSESTENCLOSER]);
5730			RUNTIME_CHECK(result == ISC_R_SUCCESS);
5731		}
5732	} else if (vevent->rdataset->trust == dns_trust_answer &&
5733		   vevent->rdataset->type != dns_rdatatype_rrsig)
5734	{
5735		isc_result_t tresult;
5736		dns_name_t *noqname = NULL;
5737		tresult = findnoqname(fctx, message, vevent->name,
5738				      vevent->rdataset->type, &noqname);
5739		if (tresult == ISC_R_SUCCESS && noqname != NULL) {
5740			tresult = dns_rdataset_addnoqname(vevent->rdataset,
5741							  noqname);
5742			RUNTIME_CHECK(tresult == ISC_R_SUCCESS);
5743		}
5744	}
5745
5746	/*
5747	 * The data was already cached as pending data.
5748	 * Re-cache it as secure and bind the cached
5749	 * rdatasets to the first event on the fetch
5750	 * event list.
5751	 */
5752	result = dns_db_findnode(fctx->cache, vevent->name, true, &node);
5753	if (result != ISC_R_SUCCESS) {
5754		goto noanswer_response;
5755	}
5756
5757	options = 0;
5758	if ((fctx->options & DNS_FETCHOPT_PREFETCH) != 0) {
5759		options = DNS_DBADD_PREFETCH;
5760	}
5761	result = dns_db_addrdataset(fctx->cache, node, NULL, now,
5762				    vevent->rdataset, options, ardataset);
5763	if (result != ISC_R_SUCCESS && result != DNS_R_UNCHANGED) {
5764		goto noanswer_response;
5765	}
5766	if (ardataset != NULL && NEGATIVE(ardataset)) {
5767		if (NXDOMAIN(ardataset)) {
5768			eresult = DNS_R_NCACHENXDOMAIN;
5769		} else {
5770			eresult = DNS_R_NCACHENXRRSET;
5771		}
5772	} else if (vevent->sigrdataset != NULL) {
5773		result = dns_db_addrdataset(fctx->cache, node, NULL, now,
5774					    vevent->sigrdataset, options,
5775					    asigrdataset);
5776		if (result != ISC_R_SUCCESS && result != DNS_R_UNCHANGED) {
5777			goto noanswer_response;
5778		}
5779	}
5780
5781	if (sentresponse) {
5782		/*
5783		 * If we only deferred the destroy because we wanted to
5784		 * cache the data, destroy now.
5785		 */
5786		dns_db_detachnode(fctx->cache, &node);
5787		if (SHUTTINGDOWN(fctx)) {
5788			maybe_cancel_validators(fctx, true);
5789		}
5790		UNLOCK(&res->buckets[bucketnum].lock);
5791		fctx_detach(&fctx);
5792		goto cleanup_event;
5793	}
5794
5795	if (!ISC_LIST_EMPTY(fctx->validators)) {
5796		INSIST(!negative);
5797		INSIST(fctx->type == dns_rdatatype_any ||
5798		       fctx->type == dns_rdatatype_rrsig ||
5799		       fctx->type == dns_rdatatype_sig);
5800		/*
5801		 * Don't send a response yet - we have
5802		 * more rdatasets that still need to
5803		 * be validated.
5804		 */
5805		dns_db_detachnode(fctx->cache, &node);
5806		UNLOCK(&res->buckets[bucketnum].lock);
5807		dns_validator_send(ISC_LIST_HEAD(fctx->validators));
5808		fctx_detach(&fctx);
5809		goto cleanup_event;
5810	}
5811
5812answer_response:
5813
5814	/*
5815	 * Cache any SOA/NS/NSEC records that happened to be validated.
5816	 */
5817	result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
5818	while (result == ISC_R_SUCCESS) {
5819		name = NULL;
5820		dns_message_currentname(message, DNS_SECTION_AUTHORITY, &name);
5821		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
5822		     rdataset = ISC_LIST_NEXT(rdataset, link))
5823		{
5824			if ((rdataset->type != dns_rdatatype_ns &&
5825			     rdataset->type != dns_rdatatype_soa &&
5826			     rdataset->type != dns_rdatatype_nsec) ||
5827			    rdataset->trust != dns_trust_secure)
5828			{
5829				continue;
5830			}
5831			for (sigrdataset = ISC_LIST_HEAD(name->list);
5832			     sigrdataset != NULL;
5833			     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
5834			{
5835				if (sigrdataset->type != dns_rdatatype_rrsig ||
5836				    sigrdataset->covers != rdataset->type)
5837				{
5838					continue;
5839				}
5840				break;
5841			}
5842			if (sigrdataset == NULL ||
5843			    sigrdataset->trust != dns_trust_secure)
5844			{
5845				continue;
5846			}
5847
5848			/*
5849			 * Don't cache NSEC if missing NSEC or RRSIG types.
5850			 */
5851			if (rdataset->type == dns_rdatatype_nsec &&
5852			    !dns_nsec_requiredtypespresent(rdataset))
5853			{
5854				continue;
5855			}
5856
5857			/*
5858			 * Don't cache "white lies" but do cache
5859			 * "black lies".
5860			 */
5861			if (rdataset->type == dns_rdatatype_nsec &&
5862			    !dns_name_equal(fctx->name, name) &&
5863			    is_minimal_nsec(rdataset))
5864			{
5865				continue;
5866			}
5867
5868			/*
5869			 * Check SOA and DNSKEY consistency.
5870			 */
5871			if (rdataset->type == dns_rdatatype_nsec &&
5872			    !check_soa_and_dnskey(rdataset))
5873			{
5874				continue;
5875			}
5876
5877			/*
5878			 * Look for \000 label in next name.
5879			 */
5880			if (rdataset->type == dns_rdatatype_nsec &&
5881			    has_000_label(rdataset))
5882			{
5883				continue;
5884			}
5885
5886			result = dns_db_findnode(fctx->cache, name, true,
5887						 &nsnode);
5888			if (result != ISC_R_SUCCESS) {
5889				continue;
5890			}
5891
5892			result = dns_db_addrdataset(fctx->cache, nsnode, NULL,
5893						    now, rdataset, 0, NULL);
5894			if (result == ISC_R_SUCCESS) {
5895				result = dns_db_addrdataset(
5896					fctx->cache, nsnode, NULL, now,
5897					sigrdataset, 0, NULL);
5898			}
5899			dns_db_detachnode(fctx->cache, &nsnode);
5900			if (result != ISC_R_SUCCESS) {
5901				continue;
5902			}
5903		}
5904		result = dns_message_nextname(message, DNS_SECTION_AUTHORITY);
5905	}
5906
5907	/*
5908	 * Add the wild card entry.
5909	 */
5910	if (vevent->proofs[DNS_VALIDATOR_NOQNAMEPROOF] != NULL &&
5911	    vevent->rdataset != NULL &&
5912	    dns_rdataset_isassociated(vevent->rdataset) &&
5913	    vevent->rdataset->trust == dns_trust_secure &&
5914	    vevent->sigrdataset != NULL &&
5915	    dns_rdataset_isassociated(vevent->sigrdataset) &&
5916	    vevent->sigrdataset->trust == dns_trust_secure && wild != NULL)
5917	{
5918		dns_dbnode_t *wnode = NULL;
5919
5920		result = dns_db_findnode(fctx->cache, wild, true, &wnode);
5921		if (result == ISC_R_SUCCESS) {
5922			result = dns_db_addrdataset(fctx->cache, wnode, NULL,
5923						    now, vevent->rdataset, 0,
5924						    NULL);
5925		}
5926		if (result == ISC_R_SUCCESS) {
5927			(void)dns_db_addrdataset(fctx->cache, wnode, NULL, now,
5928						 vevent->sigrdataset, 0, NULL);
5929		}
5930		if (wnode != NULL) {
5931			dns_db_detachnode(fctx->cache, &wnode);
5932		}
5933	}
5934
5935	result = ISC_R_SUCCESS;
5936
5937	/*
5938	 * Respond with an answer, positive or negative,
5939	 * as opposed to an error.  'node' must be non-NULL.
5940	 */
5941
5942	FCTX_ATTR_SET(fctx, FCTX_ATTR_HAVEANSWER);
5943
5944	if (hevent != NULL) {
5945		/*
5946		 * Negative results must be indicated in event->result.
5947		 */
5948		INSIST(hevent->rdataset != NULL);
5949		if (dns_rdataset_isassociated(hevent->rdataset) &&
5950		    NEGATIVE(hevent->rdataset))
5951		{
5952			INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
5953			       eresult == DNS_R_NCACHENXRRSET);
5954		}
5955
5956		hevent->result = eresult;
5957		dns_name_copy(vevent->name, hevent->foundname);
5958		dns_db_attach(fctx->cache, &hevent->db);
5959		dns_db_transfernode(fctx->cache, &node, &hevent->node);
5960		clone_results(fctx);
5961	}
5962
5963noanswer_response:
5964	if (node != NULL) {
5965		dns_db_detachnode(fctx->cache, &node);
5966	}
5967
5968	UNLOCK(&res->buckets[bucketnum].lock);
5969	/* Detach the extra reference that was set in valcreate() */
5970	fctx_unref(fctx);
5971	fctx_done_detach(&fctx, result); /* Locks bucket. */
5972
5973cleanup_event:
5974	INSIST(node == NULL);
5975	dns_message_detach(&message);
5976	isc_event_free(&event);
5977}
5978
5979static void
5980fctx_log(void *arg, int level, const char *fmt, ...) {
5981	char msgbuf[2048];
5982	va_list args;
5983	fetchctx_t *fctx = arg;
5984
5985	va_start(args, fmt);
5986	vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
5987	va_end(args);
5988
5989	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
5990		      DNS_LOGMODULE_RESOLVER, level, "fctx %p(%s): %s", fctx,
5991		      fctx->info, msgbuf);
5992}
5993
5994static isc_result_t
5995findnoqname(fetchctx_t *fctx, dns_message_t *message, dns_name_t *name,
5996	    dns_rdatatype_t type, dns_name_t **noqnamep) {
5997	dns_rdataset_t *nrdataset, *next, *sigrdataset;
5998	dns_rdata_rrsig_t rrsig;
5999	isc_result_t result;
6000	unsigned int labels;
6001	dns_section_t section;
6002	dns_name_t *zonename;
6003	dns_fixedname_t fzonename;
6004	dns_name_t *closest;
6005	dns_fixedname_t fclosest;
6006	dns_name_t *nearest;
6007	dns_fixedname_t fnearest;
6008	dns_rdatatype_t found = dns_rdatatype_none;
6009	dns_name_t *noqname = NULL;
6010
6011	FCTXTRACE("findnoqname");
6012
6013	REQUIRE(noqnamep != NULL && *noqnamep == NULL);
6014
6015	/*
6016	 * Find the SIG for this rdataset, if we have it.
6017	 */
6018	for (sigrdataset = ISC_LIST_HEAD(name->list); sigrdataset != NULL;
6019	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
6020	{
6021		if (sigrdataset->type == dns_rdatatype_rrsig &&
6022		    sigrdataset->covers == type)
6023		{
6024			break;
6025		}
6026	}
6027
6028	if (sigrdataset == NULL) {
6029		return (ISC_R_NOTFOUND);
6030	}
6031
6032	labels = dns_name_countlabels(name);
6033
6034	for (result = dns_rdataset_first(sigrdataset); result == ISC_R_SUCCESS;
6035	     result = dns_rdataset_next(sigrdataset))
6036	{
6037		dns_rdata_t rdata = DNS_RDATA_INIT;
6038		dns_rdataset_current(sigrdataset, &rdata);
6039		result = dns_rdata_tostruct(&rdata, &rrsig, NULL);
6040		RUNTIME_CHECK(result == ISC_R_SUCCESS);
6041		/* Wildcard has rrsig.labels < labels - 1. */
6042		if (rrsig.labels + 1U >= labels) {
6043			continue;
6044		}
6045		break;
6046	}
6047
6048	if (result == ISC_R_NOMORE) {
6049		return (ISC_R_NOTFOUND);
6050	}
6051	if (result != ISC_R_SUCCESS) {
6052		return (result);
6053	}
6054
6055	zonename = dns_fixedname_initname(&fzonename);
6056	closest = dns_fixedname_initname(&fclosest);
6057	nearest = dns_fixedname_initname(&fnearest);
6058
6059#define NXND(x) ((x) == ISC_R_SUCCESS)
6060
6061	section = DNS_SECTION_AUTHORITY;
6062	for (result = dns_message_firstname(message, section);
6063	     result == ISC_R_SUCCESS;
6064	     result = dns_message_nextname(message, section))
6065	{
6066		dns_name_t *nsec = NULL;
6067		dns_message_currentname(message, section, &nsec);
6068		for (nrdataset = ISC_LIST_HEAD(nsec->list); nrdataset != NULL;
6069		     nrdataset = next)
6070		{
6071			bool data = false, exists = false;
6072			bool optout = false, unknown = false;
6073			bool setclosest = false;
6074			bool setnearest = false;
6075
6076			next = ISC_LIST_NEXT(nrdataset, link);
6077			if (nrdataset->type != dns_rdatatype_nsec &&
6078			    nrdataset->type != dns_rdatatype_nsec3)
6079			{
6080				continue;
6081			}
6082
6083			if (nrdataset->type == dns_rdatatype_nsec &&
6084			    NXND(dns_nsec_noexistnodata(
6085				    type, name, nsec, nrdataset, &exists, &data,
6086				    NULL, fctx_log, fctx)))
6087			{
6088				if (!exists) {
6089					noqname = nsec;
6090					found = dns_rdatatype_nsec;
6091				}
6092			}
6093
6094			if (nrdataset->type == dns_rdatatype_nsec3 &&
6095			    NXND(dns_nsec3_noexistnodata(
6096				    type, name, nsec, nrdataset, zonename,
6097				    &exists, &data, &optout, &unknown,
6098				    &setclosest, &setnearest, closest, nearest,
6099				    fctx_log, fctx)))
6100			{
6101				if (!exists && setnearest) {
6102					noqname = nsec;
6103					found = dns_rdatatype_nsec3;
6104				}
6105			}
6106		}
6107	}
6108	if (result == ISC_R_NOMORE) {
6109		result = ISC_R_SUCCESS;
6110	}
6111	if (noqname != NULL) {
6112		for (sigrdataset = ISC_LIST_HEAD(noqname->list);
6113		     sigrdataset != NULL;
6114		     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
6115		{
6116			if (sigrdataset->type == dns_rdatatype_rrsig &&
6117			    sigrdataset->covers == found)
6118			{
6119				break;
6120			}
6121		}
6122		if (sigrdataset != NULL) {
6123			*noqnamep = noqname;
6124		}
6125	}
6126	return (result);
6127}
6128
6129static isc_result_t
6130cache_name(fetchctx_t *fctx, dns_name_t *name, dns_message_t *message,
6131	   dns_adbaddrinfo_t *addrinfo, isc_stdtime_t now) {
6132	dns_rdataset_t *rdataset = NULL, *sigrdataset = NULL;
6133	dns_rdataset_t *addedrdataset = NULL;
6134	dns_rdataset_t *ardataset = NULL, *asigrdataset = NULL;
6135	dns_rdataset_t *valrdataset = NULL, *valsigrdataset = NULL;
6136	dns_dbnode_t *node = NULL, **anodep = NULL;
6137	dns_db_t **adbp = NULL;
6138	dns_resolver_t *res = fctx->res;
6139	bool need_validation = false;
6140	bool secure_domain = false;
6141	bool have_answer = false;
6142	isc_result_t result, eresult = ISC_R_SUCCESS;
6143	dns_fetchevent_t *event = NULL;
6144	unsigned int options;
6145	isc_task_t *task;
6146	bool fail;
6147	unsigned int valoptions = 0;
6148	bool checknta = true;
6149
6150	FCTXTRACE("cache_name");
6151
6152	/*
6153	 * The appropriate bucket lock must be held.
6154	 */
6155	task = res->buckets[fctx->bucketnum].task;
6156
6157	/*
6158	 * Is DNSSEC validation required for this name?
6159	 */
6160	if ((fctx->options & DNS_FETCHOPT_NONTA) != 0) {
6161		valoptions |= DNS_VALIDATOR_NONTA;
6162		checknta = false;
6163	}
6164
6165	if (res->view->enablevalidation) {
6166		result = issecuredomain(res->view, name, fctx->type, now,
6167					checknta, NULL, &secure_domain);
6168		if (result != ISC_R_SUCCESS) {
6169			return (result);
6170		}
6171	}
6172
6173	if ((fctx->options & DNS_FETCHOPT_NOCDFLAG) != 0) {
6174		valoptions |= DNS_VALIDATOR_NOCDFLAG;
6175	}
6176
6177	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
6178		need_validation = false;
6179	} else {
6180		need_validation = secure_domain;
6181	}
6182
6183	if (((name->attributes & DNS_NAMEATTR_ANSWER) != 0) &&
6184	    (!need_validation))
6185	{
6186		have_answer = true;
6187		event = ISC_LIST_HEAD(fctx->events);
6188
6189		if (event != NULL) {
6190			adbp = &event->db;
6191			dns_name_copy(name, event->foundname);
6192			anodep = &event->node;
6193
6194			/*
6195			 * If this is an ANY, SIG or RRSIG query, we're
6196			 * not going to return any rdatasets, unless we
6197			 * encountered a CNAME or DNAME as "the answer".
6198			 * In this case, we're going to return
6199			 * DNS_R_CNAME or DNS_R_DNAME and we must set up
6200			 * the rdatasets.
6201			 */
6202			if ((fctx->type != dns_rdatatype_any &&
6203			     fctx->type != dns_rdatatype_rrsig &&
6204			     fctx->type != dns_rdatatype_sig) ||
6205			    (name->attributes & DNS_NAMEATTR_CHAINING) != 0)
6206			{
6207				ardataset = event->rdataset;
6208				asigrdataset = event->sigrdataset;
6209			}
6210		}
6211	}
6212
6213	/*
6214	 * Find or create the cache node.
6215	 */
6216	node = NULL;
6217	result = dns_db_findnode(fctx->cache, name, true, &node);
6218	if (result != ISC_R_SUCCESS) {
6219		return (result);
6220	}
6221
6222	/*
6223	 * Cache or validate each cacheable rdataset.
6224	 */
6225	fail = ((fctx->res->options & DNS_RESOLVER_CHECKNAMESFAIL) != 0);
6226	for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
6227	     rdataset = ISC_LIST_NEXT(rdataset, link))
6228	{
6229		if (!CACHE(rdataset)) {
6230			continue;
6231		}
6232		if (CHECKNAMES(rdataset)) {
6233			char namebuf[DNS_NAME_FORMATSIZE];
6234			char typebuf[DNS_RDATATYPE_FORMATSIZE];
6235			char classbuf[DNS_RDATATYPE_FORMATSIZE];
6236
6237			dns_name_format(name, namebuf, sizeof(namebuf));
6238			dns_rdatatype_format(rdataset->type, typebuf,
6239					     sizeof(typebuf));
6240			dns_rdataclass_format(rdataset->rdclass, classbuf,
6241					      sizeof(classbuf));
6242			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
6243				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
6244				      "check-names %s %s/%s/%s",
6245				      fail ? "failure" : "warning", namebuf,
6246				      typebuf, classbuf);
6247			if (fail) {
6248				if (ANSWER(rdataset)) {
6249					dns_db_detachnode(fctx->cache, &node);
6250					return (DNS_R_BADNAME);
6251				}
6252				continue;
6253			}
6254		}
6255
6256		/*
6257		 * Enforce the configure maximum cache TTL.
6258		 */
6259		if (rdataset->ttl > res->view->maxcachettl) {
6260			rdataset->ttl = res->view->maxcachettl;
6261		}
6262
6263		/*
6264		 * Enforce configured minimum cache TTL.
6265		 */
6266		if (rdataset->ttl < res->view->mincachettl) {
6267			rdataset->ttl = res->view->mincachettl;
6268		}
6269
6270		/*
6271		 * Mark the rdataset as being prefetch eligible.
6272		 */
6273		if (rdataset->ttl >= fctx->res->view->prefetch_eligible) {
6274			rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
6275		}
6276
6277		/*
6278		 * Find the SIG for this rdataset, if we have it.
6279		 */
6280		for (sigrdataset = ISC_LIST_HEAD(name->list);
6281		     sigrdataset != NULL;
6282		     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
6283		{
6284			if (sigrdataset->type == dns_rdatatype_rrsig &&
6285			    sigrdataset->covers == rdataset->type)
6286			{
6287				break;
6288			}
6289		}
6290
6291		/*
6292		 * If this RRset is in a secure domain, is in bailiwick,
6293		 * and is not glue, attempt DNSSEC validation.	(We do
6294		 * not attempt to validate glue or out-of-bailiwick
6295		 * data--even though there might be some performance
6296		 * benefit to doing so--because it makes it simpler and
6297		 * safer to ensure that records from a secure domain are
6298		 * only cached if validated within the context of a
6299		 * query to the domain that owns them.)
6300		 */
6301		if (secure_domain && rdataset->trust != dns_trust_glue &&
6302		    !EXTERNAL(rdataset))
6303		{
6304			dns_trust_t trust;
6305
6306			/*
6307			 * RRSIGs are validated as part of validating
6308			 * the type they cover.
6309			 */
6310			if (rdataset->type == dns_rdatatype_rrsig) {
6311				continue;
6312			}
6313
6314			if (sigrdataset == NULL && need_validation &&
6315			    !ANSWER(rdataset))
6316			{
6317				/*
6318				 * Ignore unrelated non-answer
6319				 * rdatasets that are missing
6320				 * signatures.
6321				 */
6322				continue;
6323			}
6324
6325			/*
6326			 * Normalize the rdataset and sigrdataset TTLs.
6327			 */
6328			if (sigrdataset != NULL) {
6329				rdataset->ttl = ISC_MIN(rdataset->ttl,
6330							sigrdataset->ttl);
6331				sigrdataset->ttl = rdataset->ttl;
6332			}
6333
6334			/*
6335			 * Mark the rdataset as being prefetch eligible.
6336			 */
6337			if (rdataset->ttl >= fctx->res->view->prefetch_eligible)
6338			{
6339				rdataset->attributes |=
6340					DNS_RDATASETATTR_PREFETCH;
6341			}
6342
6343			/*
6344			 * Cache this rdataset/sigrdataset pair as
6345			 * pending data.  Track whether it was
6346			 * additional or not. If this was a priming
6347			 * query, additional should be cached as glue.
6348			 */
6349			if (rdataset->trust == dns_trust_additional) {
6350				trust = dns_trust_pending_additional;
6351			} else {
6352				trust = dns_trust_pending_answer;
6353			}
6354
6355			rdataset->trust = trust;
6356			if (sigrdataset != NULL) {
6357				sigrdataset->trust = trust;
6358			}
6359			if (!need_validation || !ANSWER(rdataset)) {
6360				options = 0;
6361				if (ANSWER(rdataset) &&
6362				    rdataset->type != dns_rdatatype_rrsig)
6363				{
6364					isc_result_t tresult;
6365					dns_name_t *noqname = NULL;
6366					tresult = findnoqname(
6367						fctx, message, name,
6368						rdataset->type, &noqname);
6369					if (tresult == ISC_R_SUCCESS &&
6370					    noqname != NULL)
6371					{
6372						(void)dns_rdataset_addnoqname(
6373							rdataset, noqname);
6374					}
6375				}
6376				if ((fctx->options & DNS_FETCHOPT_PREFETCH) !=
6377				    0)
6378				{
6379					options = DNS_DBADD_PREFETCH;
6380				}
6381				if ((fctx->options & DNS_FETCHOPT_NOCACHED) !=
6382				    0)
6383				{
6384					options |= DNS_DBADD_FORCE;
6385				}
6386				addedrdataset = ardataset;
6387				result = dns_db_addrdataset(
6388					fctx->cache, node, NULL, now, rdataset,
6389					options, addedrdataset);
6390				if (result == DNS_R_UNCHANGED) {
6391					result = ISC_R_SUCCESS;
6392					if (!need_validation &&
6393					    ardataset != NULL &&
6394					    NEGATIVE(ardataset))
6395					{
6396						/*
6397						 * The answer in the
6398						 * cache is better than
6399						 * the answer we found,
6400						 * and is a negative
6401						 * cache entry, so we
6402						 * must set eresult
6403						 * appropriately.
6404						 */
6405						if (NXDOMAIN(ardataset)) {
6406							eresult =
6407								DNS_R_NCACHENXDOMAIN;
6408						} else {
6409							eresult =
6410								DNS_R_NCACHENXRRSET;
6411						}
6412						/*
6413						 * We have a negative
6414						 * response from the
6415						 * cache so don't
6416						 * attempt to add the
6417						 * RRSIG rrset.
6418						 */
6419						continue;
6420					}
6421				}
6422				if (result != ISC_R_SUCCESS) {
6423					break;
6424				}
6425				if (sigrdataset != NULL) {
6426					addedrdataset = asigrdataset;
6427					result = dns_db_addrdataset(
6428						fctx->cache, node, NULL, now,
6429						sigrdataset, options,
6430						addedrdataset);
6431					if (result == DNS_R_UNCHANGED) {
6432						result = ISC_R_SUCCESS;
6433					}
6434					if (result != ISC_R_SUCCESS) {
6435						break;
6436					}
6437				} else if (!ANSWER(rdataset)) {
6438					continue;
6439				}
6440			}
6441
6442			if (ANSWER(rdataset) && need_validation) {
6443				if (fctx->type != dns_rdatatype_any &&
6444				    fctx->type != dns_rdatatype_rrsig &&
6445				    fctx->type != dns_rdatatype_sig)
6446				{
6447					/*
6448					 * This is The Answer.  We will
6449					 * validate it, but first we
6450					 * cache the rest of the
6451					 * response - it may contain
6452					 * useful keys.
6453					 */
6454					INSIST(valrdataset == NULL &&
6455					       valsigrdataset == NULL);
6456					valrdataset = rdataset;
6457					valsigrdataset = sigrdataset;
6458				} else {
6459					/*
6460					 * This is one of (potentially)
6461					 * multiple answers to an ANY
6462					 * or SIG query.  To keep things
6463					 * simple, we just start the
6464					 * validator right away rather
6465					 * than caching first and
6466					 * having to remember which
6467					 * rdatasets needed validation.
6468					 */
6469					result = valcreate(
6470						fctx, message, addrinfo, name,
6471						rdataset->type, rdataset,
6472						sigrdataset, valoptions, task);
6473				}
6474			} else if (CHAINING(rdataset)) {
6475				if (rdataset->type == dns_rdatatype_cname) {
6476					eresult = DNS_R_CNAME;
6477				} else {
6478					INSIST(rdataset->type ==
6479					       dns_rdatatype_dname);
6480					eresult = DNS_R_DNAME;
6481				}
6482			}
6483		} else if (!EXTERNAL(rdataset)) {
6484			/*
6485			 * It's OK to cache this rdataset now.
6486			 */
6487			if (ANSWER(rdataset)) {
6488				addedrdataset = ardataset;
6489			} else if (ANSWERSIG(rdataset)) {
6490				addedrdataset = asigrdataset;
6491			} else {
6492				addedrdataset = NULL;
6493			}
6494			if (CHAINING(rdataset)) {
6495				if (rdataset->type == dns_rdatatype_cname) {
6496					eresult = DNS_R_CNAME;
6497				} else {
6498					INSIST(rdataset->type ==
6499					       dns_rdatatype_dname);
6500					eresult = DNS_R_DNAME;
6501				}
6502			}
6503			if (rdataset->trust == dns_trust_glue &&
6504			    (rdataset->type == dns_rdatatype_ns ||
6505			     (rdataset->type == dns_rdatatype_rrsig &&
6506			      rdataset->covers == dns_rdatatype_ns)))
6507			{
6508				/*
6509				 * If the trust level is
6510				 * 'dns_trust_glue' then we are adding
6511				 * data from a referral we got while
6512				 * executing the search algorithm. New
6513				 * referral data always takes precedence
6514				 * over the existing cache contents.
6515				 */
6516				options = DNS_DBADD_FORCE;
6517			} else if ((fctx->options & DNS_FETCHOPT_PREFETCH) != 0)
6518			{
6519				options = DNS_DBADD_PREFETCH;
6520			} else {
6521				options = 0;
6522			}
6523
6524			if (ANSWER(rdataset) &&
6525			    rdataset->type != dns_rdatatype_rrsig)
6526			{
6527				isc_result_t tresult;
6528				dns_name_t *noqname = NULL;
6529				tresult = findnoqname(fctx, message, name,
6530						      rdataset->type, &noqname);
6531				if (tresult == ISC_R_SUCCESS && noqname != NULL)
6532				{
6533					(void)dns_rdataset_addnoqname(rdataset,
6534								      noqname);
6535				}
6536			}
6537
6538			/*
6539			 * Now we can add the rdataset.
6540			 */
6541			result = dns_db_addrdataset(fctx->cache, node, NULL,
6542						    now, rdataset, options,
6543						    addedrdataset);
6544
6545			if (result == DNS_R_UNCHANGED) {
6546				if (ANSWER(rdataset) && ardataset != NULL &&
6547				    NEGATIVE(ardataset))
6548				{
6549					/*
6550					 * The answer in the cache is
6551					 * better than the answer we
6552					 * found, and is a negative
6553					 * cache entry, so we must set
6554					 * eresult appropriately.
6555					 */
6556					if (NXDOMAIN(ardataset)) {
6557						eresult = DNS_R_NCACHENXDOMAIN;
6558					} else {
6559						eresult = DNS_R_NCACHENXRRSET;
6560					}
6561				}
6562				result = ISC_R_SUCCESS;
6563			} else if (result != ISC_R_SUCCESS) {
6564				break;
6565			}
6566		}
6567	}
6568
6569	if (valrdataset != NULL) {
6570		dns_rdatatype_t vtype = fctx->type;
6571		if (CHAINING(valrdataset)) {
6572			if (valrdataset->type == dns_rdatatype_cname) {
6573				vtype = dns_rdatatype_cname;
6574			} else {
6575				vtype = dns_rdatatype_dname;
6576			}
6577		}
6578
6579		result = valcreate(fctx, message, addrinfo, name, vtype,
6580				   valrdataset, valsigrdataset, valoptions,
6581				   task);
6582	}
6583
6584	if (result == ISC_R_SUCCESS && have_answer) {
6585		FCTX_ATTR_SET(fctx, FCTX_ATTR_HAVEANSWER);
6586		if (event != NULL) {
6587			/*
6588			 * Negative results must be indicated in
6589			 * event->result.
6590			 */
6591			if (dns_rdataset_isassociated(event->rdataset) &&
6592			    NEGATIVE(event->rdataset))
6593			{
6594				INSIST(eresult == DNS_R_NCACHENXDOMAIN ||
6595				       eresult == DNS_R_NCACHENXRRSET);
6596			}
6597			event->result = eresult;
6598			if (adbp != NULL && *adbp != NULL) {
6599				if (anodep != NULL && *anodep != NULL) {
6600					dns_db_detachnode(*adbp, anodep);
6601				}
6602				dns_db_detach(adbp);
6603			}
6604			dns_db_attach(fctx->cache, adbp);
6605			dns_db_transfernode(fctx->cache, &node, anodep);
6606			clone_results(fctx);
6607		}
6608	}
6609
6610	if (node != NULL) {
6611		dns_db_detachnode(fctx->cache, &node);
6612	}
6613
6614	return (result);
6615}
6616
6617static isc_result_t
6618cache_message(fetchctx_t *fctx, dns_message_t *message,
6619	      dns_adbaddrinfo_t *addrinfo, isc_stdtime_t now) {
6620	isc_result_t result;
6621	dns_section_t section;
6622	dns_name_t *name;
6623
6624	FCTXTRACE("cache_message");
6625
6626	FCTX_ATTR_CLR(fctx, FCTX_ATTR_WANTCACHE);
6627
6628	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
6629
6630	for (section = DNS_SECTION_ANSWER; section <= DNS_SECTION_ADDITIONAL;
6631	     section++)
6632	{
6633		result = dns_message_firstname(message, section);
6634		while (result == ISC_R_SUCCESS) {
6635			name = NULL;
6636			dns_message_currentname(message, section, &name);
6637			if ((name->attributes & DNS_NAMEATTR_CACHE) != 0) {
6638				result = cache_name(fctx, name, message,
6639						    addrinfo, now);
6640				if (result != ISC_R_SUCCESS) {
6641					break;
6642				}
6643			}
6644			result = dns_message_nextname(message, section);
6645		}
6646		if (result != ISC_R_NOMORE) {
6647			break;
6648		}
6649	}
6650	if (result == ISC_R_NOMORE) {
6651		result = ISC_R_SUCCESS;
6652	}
6653
6654	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
6655
6656	return (result);
6657}
6658
6659/*
6660 * Do what dns_ncache_addoptout() does, and then compute an appropriate
6661 * eresult.
6662 */
6663static isc_result_t
6664ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node,
6665		  dns_rdatatype_t covers, isc_stdtime_t now, dns_ttl_t minttl,
6666		  dns_ttl_t maxttl, bool optout, bool secure,
6667		  dns_rdataset_t *ardataset, isc_result_t *eresultp) {
6668	isc_result_t result;
6669	dns_rdataset_t rdataset;
6670
6671	if (ardataset == NULL) {
6672		dns_rdataset_init(&rdataset);
6673		ardataset = &rdataset;
6674	}
6675	if (secure) {
6676		result = dns_ncache_addoptout(message, cache, node, covers, now,
6677					      minttl, maxttl, optout,
6678					      ardataset);
6679	} else {
6680		result = dns_ncache_add(message, cache, node, covers, now,
6681					minttl, maxttl, ardataset);
6682	}
6683	if (result == DNS_R_UNCHANGED || result == ISC_R_SUCCESS) {
6684		/*
6685		 * If the cache now contains a negative entry and we
6686		 * care about whether it is DNS_R_NCACHENXDOMAIN or
6687		 * DNS_R_NCACHENXRRSET then extract it.
6688		 */
6689		if (NEGATIVE(ardataset)) {
6690			/*
6691			 * The cache data is a negative cache entry.
6692			 */
6693			if (NXDOMAIN(ardataset)) {
6694				*eresultp = DNS_R_NCACHENXDOMAIN;
6695			} else {
6696				*eresultp = DNS_R_NCACHENXRRSET;
6697			}
6698		} else {
6699			/*
6700			 * Either we don't care about the nature of the
6701			 * cache rdataset (because no fetch is
6702			 * interested in the outcome), or the cache
6703			 * rdataset is not a negative cache entry.
6704			 * Whichever case it is, we can return success.
6705			 *
6706			 * XXXRTH  There's a CNAME/DNAME problem here.
6707			 */
6708			*eresultp = ISC_R_SUCCESS;
6709		}
6710		result = ISC_R_SUCCESS;
6711	}
6712	if (ardataset == &rdataset && dns_rdataset_isassociated(ardataset)) {
6713		dns_rdataset_disassociate(ardataset);
6714	}
6715
6716	return (result);
6717}
6718
6719static isc_result_t
6720ncache_message(fetchctx_t *fctx, dns_message_t *message,
6721	       dns_adbaddrinfo_t *addrinfo, dns_rdatatype_t covers,
6722	       isc_stdtime_t now) {
6723	isc_result_t result, eresult = ISC_R_SUCCESS;
6724	dns_name_t *name = fctx->name;
6725	dns_resolver_t *res = fctx->res;
6726	dns_db_t **adbp = NULL;
6727	dns_dbnode_t *node = NULL, **anodep = NULL;
6728	dns_rdataset_t *ardataset = NULL;
6729	bool need_validation = false, secure_domain = false;
6730	dns_fetchevent_t *event = NULL;
6731	uint32_t ttl;
6732	unsigned int valoptions = 0;
6733	bool checknta = true;
6734
6735	FCTXTRACE("ncache_message");
6736
6737	FCTX_ATTR_CLR(fctx, FCTX_ATTR_WANTNCACHE);
6738
6739	POST(need_validation);
6740
6741	/*
6742	 * XXXMPA remove when we follow cnames and adjust the setting
6743	 * of FCTX_ATTR_WANTNCACHE in rctx_answer_none().
6744	 */
6745	INSIST(message->counts[DNS_SECTION_ANSWER] == 0);
6746
6747	/*
6748	 * Is DNSSEC validation required for this name?
6749	 */
6750	if ((fctx->options & DNS_FETCHOPT_NONTA) != 0) {
6751		valoptions |= DNS_VALIDATOR_NONTA;
6752		checknta = false;
6753	}
6754
6755	if (fctx->res->view->enablevalidation) {
6756		result = issecuredomain(res->view, name, fctx->type, now,
6757					checknta, NULL, &secure_domain);
6758		if (result != ISC_R_SUCCESS) {
6759			return (result);
6760		}
6761	}
6762
6763	if ((fctx->options & DNS_FETCHOPT_NOCDFLAG) != 0) {
6764		valoptions |= DNS_VALIDATOR_NOCDFLAG;
6765	}
6766
6767	if ((fctx->options & DNS_FETCHOPT_NOVALIDATE) != 0) {
6768		need_validation = false;
6769	} else {
6770		need_validation = secure_domain;
6771	}
6772
6773	if (secure_domain) {
6774		/*
6775		 * Mark all rdatasets as pending.
6776		 */
6777		result = dns_message_firstname(message, DNS_SECTION_AUTHORITY);
6778		while (result == ISC_R_SUCCESS) {
6779			dns_rdataset_t *trdataset = NULL;
6780			dns_name_t *tname = NULL;
6781
6782			dns_message_currentname(message, DNS_SECTION_AUTHORITY,
6783						&tname);
6784			for (trdataset = ISC_LIST_HEAD(tname->list);
6785			     trdataset != NULL;
6786			     trdataset = ISC_LIST_NEXT(trdataset, link))
6787			{
6788				trdataset->trust = dns_trust_pending_answer;
6789			}
6790			result = dns_message_nextname(message,
6791						      DNS_SECTION_AUTHORITY);
6792		}
6793		if (result != ISC_R_NOMORE) {
6794			return (result);
6795		}
6796	}
6797
6798	if (need_validation) {
6799		/*
6800		 * Do negative response validation.
6801		 */
6802		result = valcreate(fctx, message, addrinfo, name, fctx->type,
6803				   NULL, NULL, valoptions,
6804				   res->buckets[fctx->bucketnum].task);
6805		/*
6806		 * If validation is necessary, return now.  Otherwise
6807		 * continue to process the message, letting the
6808		 * validation complete in its own good time.
6809		 */
6810		return (result);
6811	}
6812
6813	LOCK(&res->buckets[fctx->bucketnum].lock);
6814
6815	if (!HAVE_ANSWER(fctx)) {
6816		event = ISC_LIST_HEAD(fctx->events);
6817		if (event != NULL) {
6818			adbp = &event->db;
6819			dns_name_copy(name, event->foundname);
6820			anodep = &event->node;
6821			ardataset = event->rdataset;
6822		}
6823	}
6824
6825	result = dns_db_findnode(fctx->cache, name, true, &node);
6826	if (result != ISC_R_SUCCESS) {
6827		goto unlock;
6828	}
6829
6830	/*
6831	 * If we are asking for a SOA record set the cache time
6832	 * to zero to facilitate locating the containing zone of
6833	 * a arbitrary zone.
6834	 */
6835	ttl = fctx->res->view->maxncachettl;
6836	if (fctx->type == dns_rdatatype_soa && covers == dns_rdatatype_any &&
6837	    fctx->res->zero_no_soa_ttl)
6838	{
6839		ttl = 0;
6840	}
6841
6842	result = ncache_adderesult(message, fctx->cache, node, covers, now,
6843				   fctx->res->view->minncachettl, ttl, false,
6844				   false, ardataset, &eresult);
6845	if (result != ISC_R_SUCCESS) {
6846		goto unlock;
6847	}
6848
6849	if (!HAVE_ANSWER(fctx)) {
6850		FCTX_ATTR_SET(fctx, FCTX_ATTR_HAVEANSWER);
6851		if (event != NULL) {
6852			event->result = eresult;
6853			if (adbp != NULL && *adbp != NULL) {
6854				if (anodep != NULL && *anodep != NULL) {
6855					dns_db_detachnode(*adbp, anodep);
6856				}
6857				dns_db_detach(adbp);
6858			}
6859			dns_db_attach(fctx->cache, adbp);
6860			dns_db_transfernode(fctx->cache, &node, anodep);
6861			clone_results(fctx);
6862		}
6863	}
6864
6865unlock:
6866	UNLOCK(&res->buckets[fctx->bucketnum].lock);
6867
6868	if (node != NULL) {
6869		dns_db_detachnode(fctx->cache, &node);
6870	}
6871
6872	return (result);
6873}
6874
6875static void
6876mark_related(dns_name_t *name, dns_rdataset_t *rdataset, bool external,
6877	     bool gluing) {
6878	name->attributes |= DNS_NAMEATTR_CACHE;
6879	if (gluing) {
6880		rdataset->trust = dns_trust_glue;
6881		/*
6882		 * Glue with 0 TTL causes problems.  We force the TTL to
6883		 * 1 second to prevent this.
6884		 */
6885		if (rdataset->ttl == 0) {
6886			rdataset->ttl = 1;
6887		}
6888	} else {
6889		rdataset->trust = dns_trust_additional;
6890	}
6891	/*
6892	 * Avoid infinite loops by only marking new rdatasets.
6893	 */
6894	if (!CACHE(rdataset)) {
6895		name->attributes |= DNS_NAMEATTR_CHASE;
6896		rdataset->attributes |= DNS_RDATASETATTR_CHASE;
6897	}
6898	rdataset->attributes |= DNS_RDATASETATTR_CACHE;
6899	if (external) {
6900		rdataset->attributes |= DNS_RDATASETATTR_EXTERNAL;
6901	}
6902}
6903
6904/*
6905 * Returns true if 'name' is external to the namespace for which
6906 * the server being queried can answer, either because it's not a
6907 * subdomain or because it's below a forward declaration or a
6908 * locally served zone.
6909 */
6910static inline bool
6911name_external(const dns_name_t *name, dns_rdatatype_t type, fetchctx_t *fctx) {
6912	isc_result_t result;
6913	dns_forwarders_t *forwarders = NULL;
6914	dns_fixedname_t fixed, zfixed;
6915	dns_name_t *fname = dns_fixedname_initname(&fixed);
6916	dns_name_t *zfname = dns_fixedname_initname(&zfixed);
6917	dns_name_t *apex = NULL;
6918	dns_name_t suffix;
6919	dns_zone_t *zone = NULL;
6920	unsigned int labels;
6921	dns_namereln_t rel;
6922
6923	apex = (ISDUALSTACK(fctx->addrinfo) || !ISFORWARDER(fctx->addrinfo))
6924		       ? fctx->domain
6925		       : fctx->fwdname;
6926
6927	/*
6928	 * The name is outside the queried namespace.
6929	 */
6930	rel = dns_name_fullcompare(name, apex, &(int){ 0 },
6931				   &(unsigned int){ 0U });
6932	if (rel != dns_namereln_subdomain && rel != dns_namereln_equal) {
6933		return (true);
6934	}
6935
6936	/*
6937	 * If the record lives in the parent zone, adjust the name so we
6938	 * look for the correct zone or forward clause.
6939	 */
6940	labels = dns_name_countlabels(name);
6941	if (dns_rdatatype_atparent(type) && labels > 1U) {
6942		dns_name_init(&suffix, NULL);
6943		dns_name_getlabelsequence(name, 1, labels - 1, &suffix);
6944		name = &suffix;
6945	} else if (rel == dns_namereln_equal) {
6946		/* If 'name' is 'apex', no further checking is needed. */
6947		return (false);
6948	}
6949
6950	/*
6951	 * If there is a locally served zone between 'apex' and 'name'
6952	 * then don't cache.
6953	 */
6954	LOCK(&fctx->res->view->lock);
6955	if (fctx->res->view->zonetable != NULL) {
6956		unsigned int options = DNS_ZTFIND_NOEXACT | DNS_ZTFIND_MIRROR;
6957		result = dns_zt_find(fctx->res->view->zonetable, name, options,
6958				     zfname, &zone);
6959		if (zone != NULL) {
6960			dns_zone_detach(&zone);
6961		}
6962		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
6963			if (dns_name_fullcompare(zfname, apex, &(int){ 0 },
6964						 &(unsigned int){ 0U }) ==
6965			    dns_namereln_subdomain)
6966			{
6967				UNLOCK(&fctx->res->view->lock);
6968				return (true);
6969			}
6970		}
6971	}
6972	UNLOCK(&fctx->res->view->lock);
6973
6974	/*
6975	 * Look for a forward declaration below 'name'.
6976	 */
6977	result = dns_fwdtable_find(fctx->res->view->fwdtable, name, fname,
6978				   &forwarders);
6979
6980	if (ISFORWARDER(fctx->addrinfo)) {
6981		/*
6982		 * See if the forwarder declaration is better.
6983		 */
6984		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
6985			return (!dns_name_equal(fname, fctx->fwdname));
6986		}
6987
6988		/*
6989		 * If the lookup failed, the configuration must have
6990		 * changed: play it safe and don't cache.
6991		 */
6992		return (true);
6993	} else if ((result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) &&
6994		   forwarders->fwdpolicy == dns_fwdpolicy_only &&
6995		   !ISC_LIST_EMPTY(forwarders->fwdrs))
6996	{
6997		/*
6998		 * If 'name' is covered by a 'forward only' clause then we
6999		 * can't cache this repsonse.
7000		 */
7001		return (true);
7002	}
7003
7004	return (false);
7005}
7006
7007static isc_result_t
7008check_section(void *arg, const dns_name_t *addname, dns_rdatatype_t type,
7009	      dns_rdataset_t *found, dns_section_t section) {
7010	respctx_t *rctx = arg;
7011	fetchctx_t *fctx = rctx->fctx;
7012	isc_result_t result;
7013	dns_name_t *name = NULL;
7014	dns_rdataset_t *rdataset = NULL;
7015	bool external;
7016	dns_rdatatype_t rtype;
7017	bool gluing;
7018
7019	REQUIRE(VALID_FCTX(fctx));
7020
7021#if CHECK_FOR_GLUE_IN_ANSWER
7022	if (section == DNS_SECTION_ANSWER && type != dns_rdatatype_a) {
7023		return (ISC_R_SUCCESS);
7024	}
7025#endif /* if CHECK_FOR_GLUE_IN_ANSWER */
7026
7027	gluing = (GLUING(fctx) || (fctx->type == dns_rdatatype_ns &&
7028				   dns_name_equal(fctx->name, dns_rootname)));
7029
7030	result = dns_message_findname(rctx->query->rmessage, section, addname,
7031				      dns_rdatatype_any, 0, &name, NULL);
7032	if (result == ISC_R_SUCCESS) {
7033		external = name_external(name, type, fctx);
7034		if (type == dns_rdatatype_a) {
7035			for (rdataset = ISC_LIST_HEAD(name->list);
7036			     rdataset != NULL;
7037			     rdataset = ISC_LIST_NEXT(rdataset, link))
7038			{
7039				if (rdataset->type == dns_rdatatype_rrsig) {
7040					rtype = rdataset->covers;
7041				} else {
7042					rtype = rdataset->type;
7043				}
7044				if (rtype == dns_rdatatype_a ||
7045				    rtype == dns_rdatatype_aaaa)
7046				{
7047					mark_related(name, rdataset, external,
7048						     gluing);
7049				}
7050			}
7051		} else {
7052			result = dns_message_findtype(name, type, 0, &rdataset);
7053			if (result == ISC_R_SUCCESS) {
7054				mark_related(name, rdataset, external, gluing);
7055				if (found != NULL) {
7056					dns_rdataset_clone(rdataset, found);
7057				}
7058				/*
7059				 * Do we have its SIG too?
7060				 */
7061				rdataset = NULL;
7062				result = dns_message_findtype(
7063					name, dns_rdatatype_rrsig, type,
7064					&rdataset);
7065				if (result == ISC_R_SUCCESS) {
7066					mark_related(name, rdataset, external,
7067						     gluing);
7068				}
7069			}
7070		}
7071	}
7072
7073	return (ISC_R_SUCCESS);
7074}
7075
7076static isc_result_t
7077check_related(void *arg, const dns_name_t *addname, dns_rdatatype_t type,
7078	      dns_rdataset_t *found) {
7079	return (check_section(arg, addname, type, found,
7080			      DNS_SECTION_ADDITIONAL));
7081}
7082
7083#ifndef CHECK_FOR_GLUE_IN_ANSWER
7084#define CHECK_FOR_GLUE_IN_ANSWER 0
7085#endif /* ifndef CHECK_FOR_GLUE_IN_ANSWER */
7086
7087#if CHECK_FOR_GLUE_IN_ANSWER
7088static isc_result_t
7089check_answer(void *arg, const dns_name_t *addname, dns_rdatatype_t type,
7090	     dns_rdataset_t *found) {
7091	return (check_section(arg, addname, type, found, DNS_SECTION_ANSWER));
7092}
7093#endif /* if CHECK_FOR_GLUE_IN_ANSWER */
7094
7095static bool
7096is_answeraddress_allowed(dns_view_t *view, dns_name_t *name,
7097			 dns_rdataset_t *rdataset) {
7098	isc_result_t result;
7099	dns_rdata_t rdata = DNS_RDATA_INIT;
7100	struct in_addr ina;
7101	struct in6_addr in6a;
7102	isc_netaddr_t netaddr;
7103	char addrbuf[ISC_NETADDR_FORMATSIZE];
7104	char namebuf[DNS_NAME_FORMATSIZE];
7105	char classbuf[64];
7106	char typebuf[64];
7107	int match;
7108
7109	/* By default, we allow any addresses. */
7110	if (view->denyansweracl == NULL) {
7111		return (true);
7112	}
7113
7114	/*
7115	 * If the owner name matches one in the exclusion list, either
7116	 * exactly or partially, allow it.
7117	 */
7118	if (view->answeracl_exclude != NULL) {
7119		dns_rbtnode_t *node = NULL;
7120
7121		result = dns_rbt_findnode(view->answeracl_exclude, name, NULL,
7122					  &node, NULL, 0, NULL, NULL);
7123
7124		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7125			return (true);
7126		}
7127	}
7128
7129	/*
7130	 * Otherwise, search the filter list for a match for each
7131	 * address record.  If a match is found, the address should be
7132	 * filtered, so should the entire answer.
7133	 */
7134	for (result = dns_rdataset_first(rdataset); result == ISC_R_SUCCESS;
7135	     result = dns_rdataset_next(rdataset))
7136	{
7137		dns_rdata_reset(&rdata);
7138		dns_rdataset_current(rdataset, &rdata);
7139		if (rdataset->type == dns_rdatatype_a) {
7140			INSIST(rdata.length == sizeof(ina.s_addr));
7141			memmove(&ina.s_addr, rdata.data, sizeof(ina.s_addr));
7142			isc_netaddr_fromin(&netaddr, &ina);
7143		} else {
7144			INSIST(rdata.length == sizeof(in6a.s6_addr));
7145			memmove(in6a.s6_addr, rdata.data, sizeof(in6a.s6_addr));
7146			isc_netaddr_fromin6(&netaddr, &in6a);
7147		}
7148
7149		result = dns_acl_match(&netaddr, NULL, view->denyansweracl,
7150				       view->aclenv, &match, NULL);
7151		if (result == ISC_R_SUCCESS && match > 0) {
7152			isc_netaddr_format(&netaddr, addrbuf, sizeof(addrbuf));
7153			dns_name_format(name, namebuf, sizeof(namebuf));
7154			dns_rdatatype_format(rdataset->type, typebuf,
7155					     sizeof(typebuf));
7156			dns_rdataclass_format(rdataset->rdclass, classbuf,
7157					      sizeof(classbuf));
7158			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7159				      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7160				      "answer address %s denied for %s/%s/%s",
7161				      addrbuf, namebuf, typebuf, classbuf);
7162			return (false);
7163		}
7164	}
7165
7166	return (true);
7167}
7168
7169static bool
7170is_answertarget_allowed(fetchctx_t *fctx, dns_name_t *qname, dns_name_t *rname,
7171			dns_rdataset_t *rdataset, bool *chainingp) {
7172	isc_result_t result;
7173	dns_rbtnode_t *node = NULL;
7174	char qnamebuf[DNS_NAME_FORMATSIZE];
7175	char tnamebuf[DNS_NAME_FORMATSIZE];
7176	char classbuf[64];
7177	char typebuf[64];
7178	dns_name_t *tname = NULL;
7179	dns_rdata_cname_t cname;
7180	dns_rdata_dname_t dname;
7181	dns_view_t *view = fctx->res->view;
7182	dns_rdata_t rdata = DNS_RDATA_INIT;
7183	unsigned int nlabels;
7184	dns_fixedname_t fixed;
7185	dns_name_t prefix;
7186	int order;
7187
7188	REQUIRE(rdataset != NULL);
7189	REQUIRE(rdataset->type == dns_rdatatype_cname ||
7190		rdataset->type == dns_rdatatype_dname);
7191
7192	/*
7193	 * By default, we allow any target name.
7194	 * If newqname != NULL we also need to extract the newqname.
7195	 */
7196	if (chainingp == NULL && view->denyanswernames == NULL) {
7197		return (true);
7198	}
7199
7200	result = dns_rdataset_first(rdataset);
7201	RUNTIME_CHECK(result == ISC_R_SUCCESS);
7202	dns_rdataset_current(rdataset, &rdata);
7203	switch (rdataset->type) {
7204	case dns_rdatatype_cname:
7205		result = dns_rdata_tostruct(&rdata, &cname, NULL);
7206		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7207		tname = &cname.cname;
7208		break;
7209	case dns_rdatatype_dname:
7210		if (dns_name_fullcompare(qname, rname, &order, &nlabels) !=
7211		    dns_namereln_subdomain)
7212		{
7213			return (true);
7214		}
7215		result = dns_rdata_tostruct(&rdata, &dname, NULL);
7216		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7217		dns_name_init(&prefix, NULL);
7218		tname = dns_fixedname_initname(&fixed);
7219		nlabels = dns_name_countlabels(rname);
7220		dns_name_split(qname, nlabels, &prefix, NULL);
7221		result = dns_name_concatenate(&prefix, &dname.dname, tname,
7222					      NULL);
7223		if (result == DNS_R_NAMETOOLONG) {
7224			if (chainingp != NULL) {
7225				*chainingp = true;
7226			}
7227			return (true);
7228		}
7229		RUNTIME_CHECK(result == ISC_R_SUCCESS);
7230		break;
7231	default:
7232		UNREACHABLE();
7233	}
7234
7235	if (chainingp != NULL) {
7236		*chainingp = true;
7237	}
7238
7239	if (view->denyanswernames == NULL) {
7240		return (true);
7241	}
7242
7243	/*
7244	 * If the owner name matches one in the exclusion list, either
7245	 * exactly or partially, allow it.
7246	 */
7247	if (view->answernames_exclude != NULL) {
7248		result = dns_rbt_findnode(view->answernames_exclude, qname,
7249					  NULL, &node, NULL, 0, NULL, NULL);
7250		if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7251			return (true);
7252		}
7253	}
7254
7255	/*
7256	 * If the target name is a subdomain of the search domain, allow
7257	 * it.
7258	 *
7259	 * Note that if BIND is configured as a forwarding DNS server,
7260	 * the search domain will always match the root domain ("."), so
7261	 * we must also check whether forwarding is enabled so that
7262	 * filters can be applied; see GL #1574.
7263	 */
7264	if (!fctx->forwarding && dns_name_issubdomain(tname, fctx->domain)) {
7265		return (true);
7266	}
7267
7268	/*
7269	 * Otherwise, apply filters.
7270	 */
7271	result = dns_rbt_findnode(view->denyanswernames, tname, NULL, &node,
7272				  NULL, 0, NULL, NULL);
7273	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
7274		dns_name_format(qname, qnamebuf, sizeof(qnamebuf));
7275		dns_name_format(tname, tnamebuf, sizeof(tnamebuf));
7276		dns_rdatatype_format(rdataset->type, typebuf, sizeof(typebuf));
7277		dns_rdataclass_format(view->rdclass, classbuf,
7278				      sizeof(classbuf));
7279		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7280			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
7281			      "%s target %s denied for %s/%s", typebuf,
7282			      tnamebuf, qnamebuf, classbuf);
7283		return (false);
7284	}
7285
7286	return (true);
7287}
7288
7289static void
7290trim_ns_ttl(fetchctx_t *fctx, dns_name_t *name, dns_rdataset_t *rdataset) {
7291	char ns_namebuf[DNS_NAME_FORMATSIZE];
7292	char namebuf[DNS_NAME_FORMATSIZE];
7293	char tbuf[DNS_RDATATYPE_FORMATSIZE];
7294
7295	if (fctx->ns_ttl_ok && rdataset->ttl > fctx->ns_ttl) {
7296		dns_name_format(name, ns_namebuf, sizeof(ns_namebuf));
7297		dns_name_format(fctx->name, namebuf, sizeof(namebuf));
7298		dns_rdatatype_format(fctx->type, tbuf, sizeof(tbuf));
7299
7300		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7301			      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(10),
7302			      "fctx %p: trimming ttl of %s/NS for %s/%s: "
7303			      "%u -> %u",
7304			      fctx, ns_namebuf, namebuf, tbuf, rdataset->ttl,
7305			      fctx->ns_ttl);
7306		rdataset->ttl = fctx->ns_ttl;
7307	}
7308}
7309
7310static bool
7311validinanswer(dns_rdataset_t *rdataset, fetchctx_t *fctx) {
7312	if (rdataset->type == dns_rdatatype_nsec3) {
7313		/*
7314		 * NSEC3 records are not allowed to
7315		 * appear in the answer section.
7316		 */
7317		log_formerr(fctx, "NSEC3 in answer");
7318		return (false);
7319	}
7320	if (rdataset->type == dns_rdatatype_tkey) {
7321		/*
7322		 * TKEY is not a valid record in a
7323		 * response to any query we can make.
7324		 */
7325		log_formerr(fctx, "TKEY in answer");
7326		return (false);
7327	}
7328	if (rdataset->rdclass != fctx->res->rdclass) {
7329		log_formerr(fctx, "Mismatched class in answer");
7330		return (false);
7331	}
7332	return (true);
7333}
7334
7335static void
7336fctx__attach(fetchctx_t *fctx, fetchctx_t **fctxp, const char *file,
7337	     unsigned int line, const char *func) {
7338	REQUIRE(VALID_FCTX(fctx));
7339	REQUIRE(fctxp != NULL && *fctxp == NULL);
7340	uint_fast32_t refs = isc_refcount_increment(&fctx->references);
7341
7342#ifdef FCTX_TRACE
7343	fprintf(stderr, "%s:%s:%u:%s(%p, %p) -> %" PRIuFAST32 "\n", func, file,
7344		line, __func__, fctx, fctxp, refs + 1);
7345#else
7346	UNUSED(refs);
7347	UNUSED(file);
7348	UNUSED(line);
7349	UNUSED(func);
7350#endif
7351
7352	*fctxp = fctx;
7353}
7354
7355static void
7356fctx__detach(fetchctx_t **fctxp, const char *file, unsigned int line,
7357	     const char *func) {
7358	fetchctx_t *fctx = NULL;
7359	uint_fast32_t refs;
7360
7361	REQUIRE(fctxp != NULL && VALID_FCTX(*fctxp));
7362
7363	fctx = *fctxp;
7364	*fctxp = NULL;
7365
7366	refs = isc_refcount_decrement(&fctx->references);
7367
7368#ifdef FCTX_TRACE
7369	fprintf(stderr, "%s:%s:%u:%s(%p, %p) -> %" PRIuFAST32 "\n", func, file,
7370		line, __func__, fctx, fctxp, refs - 1);
7371#else
7372	UNUSED(refs);
7373	UNUSED(file);
7374	UNUSED(line);
7375	UNUSED(func);
7376#endif
7377
7378	if (refs == 1) {
7379		fctx_destroy(fctx, true);
7380	}
7381}
7382
7383static void
7384resume_dslookup(isc_task_t *task, isc_event_t *event) {
7385	isc_result_t result;
7386	dns_fetchevent_t *fevent = (dns_fetchevent_t *)event;
7387	fetchctx_t *fctx = event->ev_arg;
7388	dns_resolver_t *res = NULL;
7389	dns_rdataset_t *frdataset = NULL, *nsrdataset = NULL;
7390	dns_rdataset_t nameservers;
7391	dns_fixedname_t fixed;
7392	dns_name_t *domain = NULL;
7393	unsigned int n;
7394
7395	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
7396
7397	REQUIRE(VALID_FCTX(fctx));
7398	res = fctx->res;
7399
7400	UNUSED(task);
7401	FCTXTRACE("resume_dslookup");
7402
7403	if (fevent->node != NULL) {
7404		dns_db_detachnode(fevent->db, &fevent->node);
7405	}
7406	if (fevent->db != NULL) {
7407		dns_db_detach(&fevent->db);
7408	}
7409
7410	/* Preserve data from fevent before freeing it. */
7411	frdataset = fevent->rdataset;
7412	result = fevent->result;
7413	isc_event_free(&event);
7414
7415	LOCK(&res->buckets[fctx->bucketnum].lock);
7416	if (SHUTTINGDOWN(fctx)) {
7417		maybe_cancel_validators(fctx, true);
7418		UNLOCK(&res->buckets[fctx->bucketnum].lock);
7419
7420		if (dns_rdataset_isassociated(frdataset)) {
7421			dns_rdataset_disassociate(frdataset);
7422		}
7423
7424		dns_resolver_destroyfetch(&fctx->nsfetch);
7425		fctx_detach(&fctx);
7426		return;
7427	}
7428	UNLOCK(&res->buckets[fctx->bucketnum].lock);
7429
7430	/*
7431	 * Detach the extra reference that was set in rctx_chaseds()
7432	 * or a prior iteration of this function.
7433	 */
7434	fctx_unref(fctx);
7435
7436	switch (result) {
7437	case ISC_R_CANCELED:
7438		dns_resolver_destroyfetch(&fctx->nsfetch);
7439		if (dns_rdataset_isassociated(frdataset)) {
7440			dns_rdataset_disassociate(frdataset);
7441		}
7442		fctx_done_detach(&fctx, ISC_R_CANCELED);
7443		break;
7444
7445	case ISC_R_SUCCESS:
7446		FCTXTRACE("resuming DS lookup");
7447
7448		dns_resolver_destroyfetch(&fctx->nsfetch);
7449		if (dns_rdataset_isassociated(&fctx->nameservers)) {
7450			dns_rdataset_disassociate(&fctx->nameservers);
7451		}
7452
7453		dns_rdataset_clone(frdataset, &fctx->nameservers);
7454		if (dns_rdataset_isassociated(frdataset)) {
7455			dns_rdataset_disassociate(frdataset);
7456		}
7457		fctx->ns_ttl = fctx->nameservers.ttl;
7458		fctx->ns_ttl_ok = true;
7459		log_ns_ttl(fctx, "resume_dslookup");
7460
7461		fcount_decr(fctx);
7462		dns_name_copy(fctx->nsname, fctx->domain);
7463		result = fcount_incr(fctx, true);
7464		if (result == ISC_R_SUCCESS) {
7465			/*
7466			 * Try again.
7467			 */
7468			fctx_try(fctx, true, false);
7469		} else {
7470			fctx_done_detach(&fctx, DNS_R_SERVFAIL);
7471		}
7472		break;
7473
7474	default:
7475		if (dns_rdataset_isassociated(frdataset)) {
7476			dns_rdataset_disassociate(frdataset);
7477		}
7478
7479		/*
7480		 * Get domain from fctx->nsfetch before we destroy it.
7481		 */
7482		domain = dns_fixedname_initname(&fixed);
7483		dns_name_copy(fctx->nsfetch->private->domain, domain);
7484
7485		/*
7486		 * If the chain of resume_dslookup() invocations managed to
7487		 * chop off enough labels from the original DS owner name to
7488		 * reach the top of the namespace, no further progress can be
7489		 * made.  Interrupt the DS chasing process, returning SERVFAIL.
7490		 */
7491		if (dns_name_equal(fctx->nsname, domain)) {
7492			dns_resolver_destroyfetch(&fctx->nsfetch);
7493			fctx_done_detach(&fctx, DNS_R_SERVFAIL);
7494			return;
7495		}
7496
7497		/*
7498		 * Get nameservers from fctx->nsfetch before we destroy it.
7499		 */
7500		dns_rdataset_init(&nameservers);
7501		if (dns_rdataset_isassociated(
7502			    &fctx->nsfetch->private->nameservers))
7503		{
7504			dns_rdataset_clone(&fctx->nsfetch->private->nameservers,
7505					   &nameservers);
7506			nsrdataset = &nameservers;
7507		} else {
7508			domain = NULL;
7509		}
7510
7511		dns_resolver_destroyfetch(&fctx->nsfetch);
7512
7513		n = dns_name_countlabels(fctx->nsname);
7514		dns_name_getlabelsequence(fctx->nsname, 1, n - 1, fctx->nsname);
7515
7516		FCTXTRACE("continuing to look for parent's NS records");
7517
7518		/* Starting a new fetch, so restore the extra reference */
7519		fctx_addref(fctx);
7520		result = dns_resolver_createfetch(
7521			res, fctx->nsname, dns_rdatatype_ns, domain, nsrdataset,
7522			NULL, NULL, 0, fctx->options, 0, NULL, task,
7523			resume_dslookup, fctx, &fctx->nsrrset, NULL,
7524			&fctx->nsfetch);
7525		if (result != ISC_R_SUCCESS) {
7526			if (result == DNS_R_DUPLICATE) {
7527				result = DNS_R_SERVFAIL;
7528			}
7529			fctx_unref(fctx);
7530			fctx_done_detach(&fctx, result);
7531		}
7532
7533		if (dns_rdataset_isassociated(&nameservers)) {
7534			dns_rdataset_disassociate(&nameservers);
7535		}
7536	}
7537}
7538
7539static void
7540checknamessection(dns_message_t *message, dns_section_t section) {
7541	isc_result_t result;
7542	dns_name_t *name;
7543	dns_rdata_t rdata = DNS_RDATA_INIT;
7544	dns_rdataset_t *rdataset;
7545
7546	for (result = dns_message_firstname(message, section);
7547	     result == ISC_R_SUCCESS;
7548	     result = dns_message_nextname(message, section))
7549	{
7550		name = NULL;
7551		dns_message_currentname(message, section, &name);
7552		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
7553		     rdataset = ISC_LIST_NEXT(rdataset, link))
7554		{
7555			for (result = dns_rdataset_first(rdataset);
7556			     result == ISC_R_SUCCESS;
7557			     result = dns_rdataset_next(rdataset))
7558			{
7559				dns_rdataset_current(rdataset, &rdata);
7560				if (!dns_rdata_checkowner(name, rdata.rdclass,
7561							  rdata.type, false) ||
7562				    !dns_rdata_checknames(&rdata, name, NULL))
7563				{
7564					rdataset->attributes |=
7565						DNS_RDATASETATTR_CHECKNAMES;
7566				}
7567				dns_rdata_reset(&rdata);
7568			}
7569		}
7570	}
7571}
7572
7573static void
7574checknames(dns_message_t *message) {
7575	checknamessection(message, DNS_SECTION_ANSWER);
7576	checknamessection(message, DNS_SECTION_AUTHORITY);
7577	checknamessection(message, DNS_SECTION_ADDITIONAL);
7578}
7579
7580/*
7581 * Log server NSID at log level 'level'
7582 */
7583static void
7584log_nsid(isc_buffer_t *opt, size_t nsid_len, resquery_t *query, int level,
7585	 isc_mem_t *mctx) {
7586	static const char hex[17] = "0123456789abcdef";
7587	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
7588	size_t buflen;
7589	unsigned char *p, *nsid;
7590	unsigned char *buf = NULL, *pbuf = NULL;
7591
7592	REQUIRE(nsid_len <= UINT16_MAX);
7593
7594	/* Allocate buffer for storing hex version of the NSID */
7595	buflen = nsid_len * 2 + 1;
7596	buf = isc_mem_get(mctx, buflen);
7597	pbuf = isc_mem_get(mctx, nsid_len + 1);
7598
7599	/* Convert to hex */
7600	p = buf;
7601	nsid = isc_buffer_current(opt);
7602	for (size_t i = 0; i < nsid_len; i++) {
7603		*p++ = hex[(nsid[i] >> 4) & 0xf];
7604		*p++ = hex[nsid[i] & 0xf];
7605	}
7606	*p = '\0';
7607
7608	/* Make printable version */
7609	p = pbuf;
7610	for (size_t i = 0; i < nsid_len; i++) {
7611		*p++ = isprint(nsid[i]) ? nsid[i] : '.';
7612	}
7613	*p = '\0';
7614
7615	isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
7616			    sizeof(addrbuf));
7617	isc_log_write(dns_lctx, DNS_LOGCATEGORY_NSID, DNS_LOGMODULE_RESOLVER,
7618		      level, "received NSID %s (\"%s\") from %s", buf, pbuf,
7619		      addrbuf);
7620
7621	isc_mem_put(mctx, pbuf, nsid_len + 1);
7622	isc_mem_put(mctx, buf, buflen);
7623}
7624
7625static bool
7626iscname(dns_message_t *message, dns_name_t *name) {
7627	isc_result_t result;
7628
7629	result = dns_message_findname(message, DNS_SECTION_ANSWER, name,
7630				      dns_rdatatype_cname, 0, NULL, NULL);
7631	return (result == ISC_R_SUCCESS ? true : false);
7632}
7633
7634static bool
7635betterreferral(respctx_t *rctx) {
7636	isc_result_t result;
7637	dns_name_t *name;
7638	dns_rdataset_t *rdataset;
7639
7640	for (result = dns_message_firstname(rctx->query->rmessage,
7641					    DNS_SECTION_AUTHORITY);
7642	     result == ISC_R_SUCCESS;
7643	     result = dns_message_nextname(rctx->query->rmessage,
7644					   DNS_SECTION_AUTHORITY))
7645	{
7646		name = NULL;
7647		dns_message_currentname(rctx->query->rmessage,
7648					DNS_SECTION_AUTHORITY, &name);
7649		if (!isstrictsubdomain(name, rctx->fctx->domain)) {
7650			continue;
7651		}
7652		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
7653		     rdataset = ISC_LIST_NEXT(rdataset, link))
7654		{
7655			if (rdataset->type == dns_rdatatype_ns) {
7656				return (true);
7657			}
7658		}
7659	}
7660	return (false);
7661}
7662
7663/*
7664 * resquery_response():
7665 * Handles responses received in response to iterative queries sent by
7666 * resquery_send(). Sets up a response context (respctx_t).
7667 */
7668static void
7669resquery_response(isc_result_t eresult, isc_region_t *region, void *arg) {
7670	isc_result_t result;
7671	resquery_t *query = (resquery_t *)arg;
7672	fetchctx_t *fctx = NULL;
7673	respctx_t rctx;
7674
7675	if (eresult == ISC_R_CANCELED) {
7676		return;
7677	}
7678
7679	REQUIRE(VALID_QUERY(query));
7680	fctx = query->fctx;
7681	REQUIRE(VALID_FCTX(fctx));
7682
7683	QTRACE("response");
7684
7685	if (eresult == ISC_R_TIMEDOUT) {
7686		result = resquery_timeout(query);
7687		if (result == ISC_R_COMPLETE) {
7688			return;
7689		}
7690	}
7691
7692	if (isc_sockaddr_pf(&query->addrinfo->sockaddr) == PF_INET) {
7693		inc_stats(fctx->res, dns_resstatscounter_responsev4);
7694	} else {
7695		inc_stats(fctx->res, dns_resstatscounter_responsev6);
7696	}
7697
7698	rctx_respinit(query, fctx, eresult, region, &rctx);
7699
7700	if (eresult == ISC_R_SHUTTINGDOWN ||
7701	    atomic_load_acquire(&fctx->res->exiting))
7702	{
7703		result = ISC_R_SHUTTINGDOWN;
7704		FCTXTRACE("resolver shutting down");
7705		rctx.finish = NULL;
7706		rctx_done(&rctx, result);
7707		return;
7708	}
7709
7710	result = rctx_timedout(&rctx);
7711	if (result == ISC_R_COMPLETE) {
7712		FCTXTRACE("timed out");
7713		return;
7714	}
7715
7716	fctx->addrinfo = query->addrinfo;
7717	fctx->timeout = false;
7718	fctx->timeouts = 0;
7719
7720	/*
7721	 * Check whether the dispatcher has failed; if so we're done
7722	 */
7723	result = rctx_dispfail(&rctx);
7724	if (result == ISC_R_COMPLETE) {
7725		return;
7726	}
7727
7728	if (query->tsig != NULL) {
7729		result = dns_message_setquerytsig(query->rmessage, query->tsig);
7730		if (result != ISC_R_SUCCESS) {
7731			FCTXTRACE3("unable to set query tsig", result);
7732			rctx_done(&rctx, result);
7733			return;
7734		}
7735	}
7736
7737	if (query->tsigkey != NULL) {
7738		result = dns_message_settsigkey(query->rmessage,
7739						query->tsigkey);
7740		if (result != ISC_R_SUCCESS) {
7741			FCTXTRACE3("unable to set tsig key", result);
7742			rctx_done(&rctx, result);
7743			return;
7744		}
7745	}
7746
7747	dns_message_setclass(query->rmessage, fctx->res->rdclass);
7748
7749	if ((rctx.retryopts & DNS_FETCHOPT_TCP) == 0) {
7750		if ((rctx.retryopts & DNS_FETCHOPT_NOEDNS0) == 0) {
7751			dns_adb_setudpsize(fctx->adb, query->addrinfo,
7752					   isc_buffer_usedlength(&rctx.buffer));
7753		} else {
7754			dns_adb_plainresponse(fctx->adb, query->addrinfo);
7755		}
7756	}
7757
7758	/*
7759	 * Parse response message.
7760	 */
7761	result = rctx_parse(&rctx);
7762	if (result == ISC_R_COMPLETE) {
7763		return;
7764	}
7765
7766	/*
7767	 * Log the incoming packet.
7768	 */
7769	rctx_logpacket(&rctx);
7770
7771	if (query->rmessage->rdclass != fctx->res->rdclass) {
7772		rctx.resend = true;
7773		FCTXTRACE("bad class");
7774		rctx_done(&rctx, result);
7775		return;
7776	}
7777
7778	/*
7779	 * Process receive opt record.
7780	 */
7781	rctx.opt = dns_message_getopt(query->rmessage);
7782	if (rctx.opt != NULL) {
7783		rctx_opt(&rctx);
7784	}
7785
7786	if (query->rmessage->cc_bad && (rctx.retryopts & DNS_FETCHOPT_TCP) == 0)
7787	{
7788		/*
7789		 * If the COOKIE is bad, assume it is an attack and
7790		 * keep listening for a good answer.
7791		 */
7792		rctx.nextitem = true;
7793		if (isc_log_wouldlog(dns_lctx, ISC_LOG_INFO)) {
7794			char addrbuf[ISC_SOCKADDR_FORMATSIZE];
7795			isc_sockaddr_format(&query->addrinfo->sockaddr, addrbuf,
7796					    sizeof(addrbuf));
7797			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7798				      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
7799				      "bad cookie from %s", addrbuf);
7800		}
7801		rctx_done(&rctx, result);
7802		return;
7803	}
7804
7805	/*
7806	 * Is the question the same as the one we asked?
7807	 * NOERROR/NXDOMAIN/YXDOMAIN/REFUSED/SERVFAIL/BADCOOKIE must
7808	 * have the same question. FORMERR/NOTIMP if they have a
7809	 * question section then it must match.
7810	 */
7811	switch (query->rmessage->rcode) {
7812	case dns_rcode_notimp:
7813	case dns_rcode_formerr:
7814		if (query->rmessage->counts[DNS_SECTION_QUESTION] == 0) {
7815			break;
7816		}
7817		FALLTHROUGH;
7818	case dns_rcode_nxrrset: /* Not expected. */
7819	case dns_rcode_badcookie:
7820	case dns_rcode_noerror:
7821	case dns_rcode_nxdomain:
7822	case dns_rcode_yxdomain:
7823	case dns_rcode_refused:
7824	case dns_rcode_servfail:
7825	default:
7826		result = same_question(fctx, query->rmessage);
7827		if (result != ISC_R_SUCCESS) {
7828			FCTXTRACE3("question section invalid", result);
7829			rctx.nextitem = true;
7830			rctx_done(&rctx, result);
7831			return;
7832		}
7833		break;
7834	}
7835
7836	/*
7837	 * If the message is signed, check the signature.  If not, this
7838	 * returns success anyway.
7839	 */
7840	result = dns_message_checksig(query->rmessage, fctx->res->view);
7841	if (result != ISC_R_SUCCESS) {
7842		FCTXTRACE3("signature check failed", result);
7843		if (result == DNS_R_UNEXPECTEDTSIG ||
7844		    result == DNS_R_EXPECTEDTSIG)
7845		{
7846			rctx.nextitem = true;
7847		}
7848		rctx_done(&rctx, result);
7849		return;
7850	}
7851
7852	/*
7853	 * The dispatcher should ensure we only get responses with QR
7854	 * set.
7855	 */
7856	INSIST((query->rmessage->flags & DNS_MESSAGEFLAG_QR) != 0);
7857
7858	/*
7859	 * If we have had a server cookie and don't get one retry over
7860	 * TCP. This may be a misconfigured anycast server or an attempt
7861	 * to send a spoofed response.  Skip if we have a valid tsig.
7862	 */
7863	if (dns_message_gettsig(query->rmessage, NULL) == NULL &&
7864	    !query->rmessage->cc_ok && !query->rmessage->cc_bad &&
7865	    (rctx.retryopts & DNS_FETCHOPT_TCP) == 0)
7866	{
7867		unsigned char cookie[COOKIE_BUFFER_SIZE];
7868		if (dns_adb_getcookie(fctx->adb, query->addrinfo, cookie,
7869				      sizeof(cookie)) > CLIENT_COOKIE_SIZE)
7870		{
7871			if (isc_log_wouldlog(dns_lctx, ISC_LOG_INFO)) {
7872				char addrbuf[ISC_SOCKADDR_FORMATSIZE];
7873				isc_sockaddr_format(&query->addrinfo->sockaddr,
7874						    addrbuf, sizeof(addrbuf));
7875				isc_log_write(
7876					dns_lctx, DNS_LOGCATEGORY_RESOLVER,
7877					DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
7878					"missing expected cookie "
7879					"from %s",
7880					addrbuf);
7881			}
7882			rctx.retryopts |= DNS_FETCHOPT_TCP;
7883			rctx.resend = true;
7884			rctx_done(&rctx, result);
7885			return;
7886		}
7887	}
7888
7889	rctx_edns(&rctx);
7890
7891	/*
7892	 * Deal with truncated responses by retrying using TCP.
7893	 */
7894	if ((query->rmessage->flags & DNS_MESSAGEFLAG_TC) != 0) {
7895		rctx.truncated = true;
7896	}
7897
7898	if (rctx.truncated) {
7899		inc_stats(fctx->res, dns_resstatscounter_truncated);
7900		if ((rctx.retryopts & DNS_FETCHOPT_TCP) != 0) {
7901			rctx.broken_server = DNS_R_TRUNCATEDTCP;
7902			rctx.next_server = true;
7903		} else {
7904			rctx.retryopts |= DNS_FETCHOPT_TCP;
7905			rctx.resend = true;
7906		}
7907		FCTXTRACE3("message truncated", result);
7908		rctx_done(&rctx, result);
7909		return;
7910	}
7911
7912	/*
7913	 * Is it a query response?
7914	 */
7915	if (query->rmessage->opcode != dns_opcode_query) {
7916		rctx.broken_server = DNS_R_UNEXPECTEDOPCODE;
7917		rctx.next_server = true;
7918		FCTXTRACE("invalid message opcode");
7919		rctx_done(&rctx, result);
7920		return;
7921	}
7922
7923	/*
7924	 * Update statistics about erroneous responses.
7925	 */
7926	switch (query->rmessage->rcode) {
7927	case dns_rcode_noerror:
7928		/* no error */
7929		break;
7930	case dns_rcode_nxdomain:
7931		inc_stats(fctx->res, dns_resstatscounter_nxdomain);
7932		break;
7933	case dns_rcode_servfail:
7934		inc_stats(fctx->res, dns_resstatscounter_servfail);
7935		break;
7936	case dns_rcode_formerr:
7937		inc_stats(fctx->res, dns_resstatscounter_formerr);
7938		break;
7939	case dns_rcode_refused:
7940		inc_stats(fctx->res, dns_resstatscounter_refused);
7941		break;
7942	case dns_rcode_badvers:
7943		inc_stats(fctx->res, dns_resstatscounter_badvers);
7944		break;
7945	case dns_rcode_badcookie:
7946		inc_stats(fctx->res, dns_resstatscounter_badcookie);
7947		break;
7948	default:
7949		inc_stats(fctx->res, dns_resstatscounter_othererror);
7950		break;
7951	}
7952
7953	/*
7954	 * Bad server?
7955	 */
7956	result = rctx_badserver(&rctx, result);
7957	if (result == ISC_R_COMPLETE) {
7958		return;
7959	}
7960
7961	/*
7962	 * Lame server?
7963	 */
7964	result = rctx_lameserver(&rctx);
7965	if (result == ISC_R_COMPLETE) {
7966		return;
7967	}
7968
7969	/*
7970	 * Handle delegation-only zones like NET or COM.
7971	 */
7972	rctx_delonly_zone(&rctx);
7973
7974	/*
7975	 * Optionally call dns_rdata_checkowner() and
7976	 * dns_rdata_checknames() to validate the names in the response
7977	 * message.
7978	 */
7979	if ((fctx->res->options & DNS_RESOLVER_CHECKNAMES) != 0) {
7980		checknames(query->rmessage);
7981	}
7982
7983	/*
7984	 * Clear cache bits.
7985	 */
7986	FCTX_ATTR_CLR(fctx, (FCTX_ATTR_WANTNCACHE | FCTX_ATTR_WANTCACHE));
7987
7988	/*
7989	 * Did we get any answers?
7990	 */
7991	if (query->rmessage->counts[DNS_SECTION_ANSWER] > 0 &&
7992	    (query->rmessage->rcode == dns_rcode_noerror ||
7993	     query->rmessage->rcode == dns_rcode_yxdomain ||
7994	     query->rmessage->rcode == dns_rcode_nxdomain))
7995	{
7996		result = rctx_answer(&rctx);
7997		if (result == ISC_R_COMPLETE) {
7998			return;
7999		}
8000	} else if (query->rmessage->counts[DNS_SECTION_AUTHORITY] > 0 ||
8001		   query->rmessage->rcode == dns_rcode_noerror ||
8002		   query->rmessage->rcode == dns_rcode_nxdomain)
8003	{
8004		/*
8005		 * This might be an NXDOMAIN, NXRRSET, or referral.
8006		 * Call rctx_answer_none() to determine which it is.
8007		 */
8008		result = rctx_answer_none(&rctx);
8009		switch (result) {
8010		case ISC_R_SUCCESS:
8011		case DNS_R_CHASEDSSERVERS:
8012			break;
8013		case DNS_R_DELEGATION:
8014			/*
8015			 * With NOFOLLOW we want to pass return
8016			 * DNS_R_DELEGATION to resume_qmin.
8017			 */
8018			if ((fctx->options & DNS_FETCHOPT_NOFOLLOW) == 0) {
8019				result = ISC_R_SUCCESS;
8020			}
8021			break;
8022		default:
8023			/*
8024			 * Something has gone wrong.
8025			 */
8026			if (result == DNS_R_FORMERR) {
8027				rctx.next_server = true;
8028			}
8029			FCTXTRACE3("rctx_answer_none", result);
8030			rctx_done(&rctx, result);
8031			return;
8032		}
8033	} else {
8034		/*
8035		 * The server is insane.
8036		 */
8037		/* XXXRTH Log */
8038		rctx.broken_server = DNS_R_UNEXPECTEDRCODE;
8039		rctx.next_server = true;
8040		FCTXTRACE("broken server: unexpected rcode");
8041		rctx_done(&rctx, result);
8042		return;
8043	}
8044
8045	/*
8046	 * Follow additional section data chains.
8047	 */
8048	rctx_additional(&rctx);
8049
8050	/*
8051	 * Cache the cacheable parts of the message.  This may also
8052	 * cause work to be queued to the DNSSEC validator.
8053	 */
8054	if (WANTCACHE(fctx)) {
8055		isc_result_t tresult;
8056		tresult = cache_message(fctx, query->rmessage, query->addrinfo,
8057					rctx.now);
8058		if (tresult != ISC_R_SUCCESS) {
8059			FCTXTRACE3("cache_message complete", tresult);
8060			rctx_done(&rctx, tresult);
8061			return;
8062		}
8063	}
8064
8065	/*
8066	 * Negative caching
8067	 */
8068	rctx_ncache(&rctx);
8069
8070	FCTXTRACE("resquery_response done");
8071	rctx_done(&rctx, result);
8072}
8073
8074/*
8075 * rctx_respinit():
8076 * Initialize the response context structure 'rctx' to all zeroes, then
8077 * set the task, event, query and fctx information from
8078 * resquery_response().
8079 */
8080static void
8081rctx_respinit(resquery_t *query, fetchctx_t *fctx, isc_result_t result,
8082	      isc_region_t *region, respctx_t *rctx) {
8083	*rctx = (respctx_t){ .result = result,
8084			     .query = query,
8085			     .fctx = fctx,
8086			     .broken_type = badns_response,
8087			     .retryopts = query->options };
8088	if (result == ISC_R_SUCCESS) {
8089		REQUIRE(region != NULL);
8090		isc_buffer_init(&rctx->buffer, region->base, region->length);
8091		isc_buffer_add(&rctx->buffer, region->length);
8092	} else {
8093		isc_buffer_initnull(&rctx->buffer);
8094	}
8095	TIME_NOW(&rctx->tnow);
8096	rctx->finish = &rctx->tnow;
8097	rctx->now = (isc_stdtime_t)isc_time_seconds(&rctx->tnow);
8098}
8099
8100/*
8101 * rctx_answer_init():
8102 * Clear and reinitialize those portions of 'rctx' that will be needed
8103 * when scanning the answer section of the response message. This can be
8104 * called more than once if scanning needs to be restarted (though
8105 * currently there are no cases in which this occurs).
8106 */
8107static void
8108rctx_answer_init(respctx_t *rctx) {
8109	fetchctx_t *fctx = rctx->fctx;
8110
8111	rctx->aa = ((rctx->query->rmessage->flags & DNS_MESSAGEFLAG_AA) != 0);
8112	if (rctx->aa) {
8113		rctx->trust = dns_trust_authanswer;
8114	} else {
8115		rctx->trust = dns_trust_answer;
8116	}
8117
8118	/*
8119	 * There can be multiple RRSIG and SIG records at a name so
8120	 * we treat these types as a subset of ANY.
8121	 */
8122	rctx->type = fctx->type;
8123	if (rctx->type == dns_rdatatype_rrsig ||
8124	    rctx->type == dns_rdatatype_sig)
8125	{
8126		rctx->type = dns_rdatatype_any;
8127	}
8128
8129	/*
8130	 * Bigger than any valid DNAME label count.
8131	 */
8132	rctx->dname_labels = dns_name_countlabels(fctx->name);
8133	rctx->domain_labels = dns_name_countlabels(fctx->domain);
8134
8135	rctx->found_type = dns_rdatatype_none;
8136
8137	rctx->aname = NULL;
8138	rctx->ardataset = NULL;
8139
8140	rctx->cname = NULL;
8141	rctx->crdataset = NULL;
8142
8143	rctx->dname = NULL;
8144	rctx->drdataset = NULL;
8145
8146	rctx->ns_name = NULL;
8147	rctx->ns_rdataset = NULL;
8148
8149	rctx->soa_name = NULL;
8150	rctx->ds_name = NULL;
8151	rctx->found_name = NULL;
8152}
8153
8154/*
8155 * rctx_dispfail():
8156 * Handle the case where the dispatcher failed
8157 */
8158static isc_result_t
8159rctx_dispfail(respctx_t *rctx) {
8160	fetchctx_t *fctx = rctx->fctx;
8161
8162	if (rctx->result == ISC_R_SUCCESS) {
8163		return (ISC_R_SUCCESS);
8164	}
8165
8166	/*
8167	 * There's no hope for this response.
8168	 */
8169	rctx->next_server = true;
8170
8171	/*
8172	 * If this is a network failure, the operation is cancelled,
8173	 * or the network manager is being shut down, we mark the server
8174	 * as bad so that we won't try it for this fetch again. Also
8175	 * adjust finish and no_response so that we penalize this
8176	 * address in SRTT adjustments later.
8177	 */
8178	switch (rctx->result) {
8179	case ISC_R_EOF:
8180	case ISC_R_HOSTUNREACH:
8181	case ISC_R_NETUNREACH:
8182	case ISC_R_CONNREFUSED:
8183	case ISC_R_CONNECTIONRESET:
8184	case ISC_R_INVALIDPROTO:
8185	case ISC_R_CANCELED:
8186	case ISC_R_SHUTTINGDOWN:
8187		rctx->broken_server = rctx->result;
8188		rctx->broken_type = badns_unreachable;
8189		rctx->finish = NULL;
8190		rctx->no_response = true;
8191		break;
8192	default:
8193		break;
8194	}
8195
8196	FCTXTRACE3("dispatcher failure", rctx->result);
8197	rctx_done(rctx, ISC_R_SUCCESS);
8198	return (ISC_R_COMPLETE);
8199}
8200
8201/*
8202 * rctx_timedout():
8203 * Handle the case where a dispatch read timed out.
8204 */
8205static isc_result_t
8206rctx_timedout(respctx_t *rctx) {
8207	fetchctx_t *fctx = rctx->fctx;
8208
8209	if (rctx->result == ISC_R_TIMEDOUT) {
8210		isc_time_t now;
8211
8212		inc_stats(fctx->res, dns_resstatscounter_querytimeout);
8213		FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
8214		fctx->timeout = true;
8215		fctx->timeouts++;
8216
8217		isc_time_now(&now);
8218		/* netmgr timeouts are accurate to the millisecond */
8219		if (isc_time_microdiff(&fctx->expires, &now) < US_PER_MS) {
8220			FCTXTRACE("query timed out; stopped trying to make "
8221				  "fetch happen");
8222		} else {
8223			FCTXTRACE("query timed out; trying next server");
8224			/* try next server */
8225			rctx->no_response = true;
8226			rctx->finish = NULL;
8227			rctx->next_server = true;
8228		}
8229
8230		rctx_done(rctx, rctx->result);
8231		return (ISC_R_COMPLETE);
8232	}
8233
8234	return (ISC_R_SUCCESS);
8235}
8236
8237/*
8238 * rctx_parse():
8239 * Parse the response message.
8240 */
8241static isc_result_t
8242rctx_parse(respctx_t *rctx) {
8243	isc_result_t result;
8244	fetchctx_t *fctx = rctx->fctx;
8245	resquery_t *query = rctx->query;
8246
8247	result = dns_message_parse(query->rmessage, &rctx->buffer, 0);
8248	if (result == ISC_R_SUCCESS) {
8249		return (ISC_R_SUCCESS);
8250	}
8251
8252	FCTXTRACE3("message failed to parse", result);
8253
8254	switch (result) {
8255	case ISC_R_UNEXPECTEDEND:
8256		if (query->rmessage->question_ok &&
8257		    (query->rmessage->flags & DNS_MESSAGEFLAG_TC) != 0 &&
8258		    (rctx->retryopts & DNS_FETCHOPT_TCP) == 0)
8259		{
8260			/*
8261			 * We defer retrying via TCP for a bit so we can
8262			 * check out this message further.
8263			 */
8264			rctx->truncated = true;
8265			return (ISC_R_SUCCESS);
8266		}
8267
8268		/*
8269		 * Either the message ended prematurely,
8270		 * and/or wasn't marked as being truncated,
8271		 * and/or this is a response to a query we
8272		 * sent over TCP.  In all of these cases,
8273		 * something is wrong with the remote
8274		 * server and we don't want to retry using
8275		 * TCP.
8276		 */
8277		if ((rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0) {
8278			/*
8279			 * The problem might be that they
8280			 * don't understand EDNS0.  Turn it
8281			 * off and try again.
8282			 */
8283			rctx->retryopts |= DNS_FETCHOPT_NOEDNS0;
8284			rctx->resend = true;
8285			add_bad_edns(fctx, &query->addrinfo->sockaddr);
8286			inc_stats(fctx->res, dns_resstatscounter_edns0fail);
8287		} else {
8288			rctx->broken_server = result;
8289			rctx->next_server = true;
8290		}
8291
8292		rctx_done(rctx, result);
8293		break;
8294	case DNS_R_FORMERR:
8295		if ((rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0) {
8296			/*
8297			 * The problem might be that they
8298			 * don't understand EDNS0.  Turn it
8299			 * off and try again.
8300			 */
8301			rctx->retryopts |= DNS_FETCHOPT_NOEDNS0;
8302			rctx->resend = true;
8303			add_bad_edns(fctx, &query->addrinfo->sockaddr);
8304			inc_stats(fctx->res, dns_resstatscounter_edns0fail);
8305		} else {
8306			rctx->broken_server = DNS_R_UNEXPECTEDRCODE;
8307			rctx->next_server = true;
8308		}
8309
8310		rctx_done(rctx, result);
8311		break;
8312	default:
8313		/*
8314		 * Something bad has happened.
8315		 */
8316		rctx_done(rctx, result);
8317		break;
8318	}
8319
8320	return (ISC_R_COMPLETE);
8321}
8322
8323/*
8324 * rctx_opt():
8325 * Process the OPT record in the response.
8326 */
8327static void
8328rctx_opt(respctx_t *rctx) {
8329	resquery_t *query = rctx->query;
8330	fetchctx_t *fctx = rctx->fctx;
8331	dns_rdata_t rdata;
8332	isc_buffer_t optbuf;
8333	isc_result_t result;
8334	uint16_t optcode;
8335	uint16_t optlen;
8336	unsigned char *optvalue;
8337	dns_adbaddrinfo_t *addrinfo;
8338	unsigned char cookie[CLIENT_COOKIE_SIZE];
8339	bool seen_cookie = false;
8340	bool seen_nsid = false;
8341
8342	result = dns_rdataset_first(rctx->opt);
8343	if (result == ISC_R_SUCCESS) {
8344		dns_rdata_init(&rdata);
8345		dns_rdataset_current(rctx->opt, &rdata);
8346		isc_buffer_init(&optbuf, rdata.data, rdata.length);
8347		isc_buffer_add(&optbuf, rdata.length);
8348		while (isc_buffer_remaininglength(&optbuf) >= 4) {
8349			optcode = isc_buffer_getuint16(&optbuf);
8350			optlen = isc_buffer_getuint16(&optbuf);
8351			INSIST(optlen <= isc_buffer_remaininglength(&optbuf));
8352			switch (optcode) {
8353			case DNS_OPT_NSID:
8354				if (!seen_nsid && (query->options &
8355						   DNS_FETCHOPT_WANTNSID) != 0)
8356				{
8357					log_nsid(&optbuf, optlen, query,
8358						 ISC_LOG_INFO, fctx->res->mctx);
8359				}
8360				isc_buffer_forward(&optbuf, optlen);
8361				seen_nsid = true;
8362				break;
8363			case DNS_OPT_COOKIE:
8364				/*
8365				 * Only process the first cookie option.
8366				 */
8367				if (seen_cookie) {
8368					isc_buffer_forward(&optbuf, optlen);
8369					break;
8370				}
8371				optvalue = isc_buffer_current(&optbuf);
8372				compute_cc(query, cookie, sizeof(cookie));
8373				INSIST(query->rmessage->cc_bad == 0 &&
8374				       query->rmessage->cc_ok == 0);
8375				if (optlen >= CLIENT_COOKIE_SIZE &&
8376				    memcmp(cookie, optvalue,
8377					   CLIENT_COOKIE_SIZE) == 0)
8378				{
8379					if (optlen == CLIENT_COOKIE_SIZE) {
8380						query->rmessage->cc_echoed = 1;
8381					} else {
8382						query->rmessage->cc_ok = 1;
8383						inc_stats(
8384							fctx->res,
8385							dns_resstatscounter_cookieok);
8386						addrinfo = query->addrinfo;
8387						dns_adb_setcookie(
8388							fctx->adb, addrinfo,
8389							optvalue, optlen);
8390					}
8391				} else {
8392					query->rmessage->cc_bad = 1;
8393				}
8394				isc_buffer_forward(&optbuf, optlen);
8395				inc_stats(fctx->res,
8396					  dns_resstatscounter_cookiein);
8397				seen_cookie = true;
8398				break;
8399			default:
8400				isc_buffer_forward(&optbuf, optlen);
8401				break;
8402			}
8403		}
8404		INSIST(isc_buffer_remaininglength(&optbuf) == 0U);
8405	}
8406}
8407
8408/*
8409 * rctx_edns():
8410 * Determine whether the remote server is using EDNS correctly or
8411 * incorrectly and record that information if needed.
8412 */
8413static void
8414rctx_edns(respctx_t *rctx) {
8415	resquery_t *query = rctx->query;
8416	fetchctx_t *fctx = rctx->fctx;
8417
8418	/*
8419	 * We have an affirmative response to the query and we have
8420	 * previously got a response from this server which indicated
8421	 * EDNS may not be supported so we can now cache the lack of
8422	 * EDNS support.
8423	 */
8424	if (rctx->opt == NULL && !EDNSOK(query->addrinfo) &&
8425	    (query->rmessage->rcode == dns_rcode_noerror ||
8426	     query->rmessage->rcode == dns_rcode_nxdomain ||
8427	     query->rmessage->rcode == dns_rcode_refused ||
8428	     query->rmessage->rcode == dns_rcode_yxdomain) &&
8429	    bad_edns(fctx, &query->addrinfo->sockaddr))
8430	{
8431		dns_message_logpacket(
8432			query->rmessage, "received packet (bad edns) from",
8433			&query->addrinfo->sockaddr, DNS_LOGCATEGORY_RESOLVER,
8434			DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
8435			fctx->res->mctx);
8436		dns_adb_changeflags(fctx->adb, query->addrinfo,
8437				    FCTX_ADDRINFO_NOEDNS0,
8438				    FCTX_ADDRINFO_NOEDNS0);
8439	} else if (rctx->opt == NULL &&
8440		   (query->rmessage->flags & DNS_MESSAGEFLAG_TC) == 0 &&
8441		   !EDNSOK(query->addrinfo) &&
8442		   (query->rmessage->rcode == dns_rcode_noerror ||
8443		    query->rmessage->rcode == dns_rcode_nxdomain) &&
8444		   (rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0)
8445	{
8446		/*
8447		 * We didn't get a OPT record in response to a EDNS
8448		 * query.
8449		 *
8450		 * Old versions of named incorrectly drop the OPT record
8451		 * when there is a signed, truncated response so we
8452		 * check that TC is not set.
8453		 *
8454		 * Record that the server is not talking EDNS.  While
8455		 * this should be safe to do for any rcode we limit it
8456		 * to NOERROR and NXDOMAIN.
8457		 */
8458		dns_message_logpacket(
8459			query->rmessage, "received packet (no opt) from",
8460			&query->addrinfo->sockaddr, DNS_LOGCATEGORY_RESOLVER,
8461			DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3),
8462			fctx->res->mctx);
8463		dns_adb_changeflags(fctx->adb, query->addrinfo,
8464				    FCTX_ADDRINFO_NOEDNS0,
8465				    FCTX_ADDRINFO_NOEDNS0);
8466	}
8467
8468	/*
8469	 * If we get a non error EDNS response record the fact so we
8470	 * won't fallback to plain DNS in the future for this server.
8471	 */
8472	if (rctx->opt != NULL && !EDNSOK(query->addrinfo) &&
8473	    (rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0 &&
8474	    (query->rmessage->rcode == dns_rcode_noerror ||
8475	     query->rmessage->rcode == dns_rcode_nxdomain ||
8476	     query->rmessage->rcode == dns_rcode_refused ||
8477	     query->rmessage->rcode == dns_rcode_yxdomain))
8478	{
8479		dns_adb_changeflags(fctx->adb, query->addrinfo,
8480				    FCTX_ADDRINFO_EDNSOK, FCTX_ADDRINFO_EDNSOK);
8481	}
8482}
8483
8484/*
8485 * rctx_answer():
8486 * We might have answers, or we might have a malformed delegation with
8487 * records in the answer section. Call rctx_answer_positive() or
8488 * rctx_answer_none() as appropriate.
8489 */
8490static isc_result_t
8491rctx_answer(respctx_t *rctx) {
8492	isc_result_t result;
8493	fetchctx_t *fctx = rctx->fctx;
8494	resquery_t *query = rctx->query;
8495
8496	if ((query->rmessage->flags & DNS_MESSAGEFLAG_AA) != 0 ||
8497	    ISFORWARDER(query->addrinfo))
8498	{
8499		result = rctx_answer_positive(rctx);
8500		if (result != ISC_R_SUCCESS) {
8501			FCTXTRACE3("rctx_answer_positive (AA/fwd)", result);
8502		}
8503	} else if (iscname(query->rmessage, fctx->name) &&
8504		   fctx->type != dns_rdatatype_any &&
8505		   fctx->type != dns_rdatatype_cname)
8506	{
8507		/*
8508		 * A BIND8 server could return a non-authoritative
8509		 * answer when a CNAME is followed.  We should treat
8510		 * it as a valid answer.
8511		 */
8512		result = rctx_answer_positive(rctx);
8513		if (result != ISC_R_SUCCESS) {
8514			FCTXTRACE3("rctx_answer_positive (!ANY/!CNAME)",
8515				   result);
8516		}
8517	} else if (fctx->type != dns_rdatatype_ns && !betterreferral(rctx)) {
8518		result = rctx_answer_positive(rctx);
8519		if (result != ISC_R_SUCCESS) {
8520			FCTXTRACE3("rctx_answer_positive (!NS)", result);
8521		}
8522	} else {
8523		/*
8524		 * This may be a delegation. First let's check for
8525		 */
8526
8527		if (fctx->type == dns_rdatatype_ns) {
8528			/*
8529			 * A BIND 8 server could incorrectly return a
8530			 * non-authoritative answer to an NS query
8531			 * instead of a referral. Since this answer
8532			 * lacks the SIGs necessary to do DNSSEC
8533			 * validation, we must invoke the following
8534			 * special kludge to treat it as a referral.
8535			 */
8536			rctx->ns_in_answer = true;
8537			result = rctx_answer_none(rctx);
8538			if (result != ISC_R_SUCCESS) {
8539				FCTXTRACE3("rctx_answer_none (NS)", result);
8540			}
8541		} else {
8542			/*
8543			 * Some other servers may still somehow include
8544			 * an answer when it should return a referral
8545			 * with an empty answer.  Check to see if we can
8546			 * treat this as a referral by ignoring the
8547			 * answer.  Further more, there may be an
8548			 * implementation that moves A/AAAA glue records
8549			 * to the answer section for that type of
8550			 * delegation when the query is for that glue
8551			 * record. glue_in_answer will handle
8552			 * such a corner case.
8553			 */
8554			rctx->glue_in_answer = true;
8555			result = rctx_answer_none(rctx);
8556			if (result != ISC_R_SUCCESS) {
8557				FCTXTRACE3("rctx_answer_none", result);
8558			}
8559		}
8560
8561		if (result == DNS_R_DELEGATION) {
8562			/*
8563			 * With NOFOLLOW we want to return DNS_R_DELEGATION to
8564			 * resume_qmin.
8565			 */
8566			if ((rctx->fctx->options & DNS_FETCHOPT_NOFOLLOW) != 0)
8567			{
8568				return (result);
8569			}
8570			result = ISC_R_SUCCESS;
8571		} else {
8572			/*
8573			 * At this point, AA is not set, the response
8574			 * is not a referral, and the server is not a
8575			 * forwarder.  It is technically lame and it's
8576			 * easier to treat it as such than to figure out
8577			 * some more elaborate course of action.
8578			 */
8579			rctx->broken_server = DNS_R_LAME;
8580			rctx->next_server = true;
8581			FCTXTRACE3("rctx_answer lame", result);
8582			rctx_done(rctx, result);
8583			return (ISC_R_COMPLETE);
8584		}
8585	}
8586
8587	if (result != ISC_R_SUCCESS) {
8588		if (result == DNS_R_FORMERR) {
8589			rctx->next_server = true;
8590		}
8591		FCTXTRACE3("rctx_answer failed", result);
8592		rctx_done(rctx, result);
8593		return (ISC_R_COMPLETE);
8594	}
8595
8596	return (ISC_R_SUCCESS);
8597}
8598
8599/*
8600 * rctx_answer_positive():
8601 * Handles positive responses. Depending which type of answer this is
8602 * (matching QNAME/QTYPE, CNAME, DNAME, ANY) calls the proper routine
8603 * to handle it (rctx_answer_match(), rctx_answer_cname(),
8604 * rctx_answer_dname(), rctx_answer_any()).
8605 */
8606static isc_result_t
8607rctx_answer_positive(respctx_t *rctx) {
8608	isc_result_t result;
8609	fetchctx_t *fctx = rctx->fctx;
8610
8611	FCTXTRACE("rctx_answer_positive");
8612
8613	rctx_answer_init(rctx);
8614	rctx_answer_scan(rctx);
8615
8616	/*
8617	 * Determine which type of positive answer this is:
8618	 * type ANY, CNAME, DNAME, or an answer matching QNAME/QTYPE.
8619	 * Call the appropriate routine to handle the answer type.
8620	 */
8621	if (rctx->aname != NULL && rctx->type == dns_rdatatype_any) {
8622		result = rctx_answer_any(rctx);
8623		if (result == ISC_R_COMPLETE) {
8624			return (rctx->result);
8625		}
8626	} else if (rctx->aname != NULL) {
8627		result = rctx_answer_match(rctx);
8628		if (result == ISC_R_COMPLETE) {
8629			return (rctx->result);
8630		}
8631	} else if (rctx->cname != NULL) {
8632		result = rctx_answer_cname(rctx);
8633		if (result == ISC_R_COMPLETE) {
8634			return (rctx->result);
8635		}
8636	} else if (rctx->dname != NULL) {
8637		result = rctx_answer_dname(rctx);
8638		if (result == ISC_R_COMPLETE) {
8639			return (rctx->result);
8640		}
8641	} else {
8642		log_formerr(fctx, "reply has no answer");
8643		return (DNS_R_FORMERR);
8644	}
8645
8646	/*
8647	 * This response is now potentially cacheable.
8648	 */
8649	FCTX_ATTR_SET(fctx, FCTX_ATTR_WANTCACHE);
8650
8651	/*
8652	 * Did chaining end before we got the final answer?
8653	 */
8654	if (rctx->chaining) {
8655		return (ISC_R_SUCCESS);
8656	}
8657
8658	/*
8659	 * We didn't end with an incomplete chain, so the rcode should
8660	 * be "no error".
8661	 */
8662	if (rctx->query->rmessage->rcode != dns_rcode_noerror) {
8663		log_formerr(fctx, "CNAME/DNAME chain complete, but RCODE "
8664				  "indicates error");
8665		return (DNS_R_FORMERR);
8666	}
8667
8668	/*
8669	 * Cache records in the authority section, if
8670	 * there are any suitable for caching.
8671	 */
8672	rctx_authority_positive(rctx);
8673
8674	log_ns_ttl(fctx, "rctx_answer");
8675
8676	if (rctx->ns_rdataset != NULL &&
8677	    dns_name_equal(fctx->domain, rctx->ns_name) &&
8678	    !dns_name_equal(rctx->ns_name, dns_rootname))
8679	{
8680		trim_ns_ttl(fctx, rctx->ns_name, rctx->ns_rdataset);
8681	}
8682
8683	return (ISC_R_SUCCESS);
8684}
8685
8686/*
8687 * rctx_answer_scan():
8688 * Perform a single pass over the answer section of a response, looking
8689 * for an answer that matches QNAME/QTYPE, or a CNAME matching QNAME, or
8690 * a covering DNAME. If more than one rdataset is found matching these
8691 * criteria, then only one is kept. Order of preference is 1) the
8692 * shortest DNAME, 2) the first matching answer, or 3) the first CNAME.
8693 */
8694static void
8695rctx_answer_scan(respctx_t *rctx) {
8696	isc_result_t result;
8697	fetchctx_t *fctx = rctx->fctx;
8698	dns_rdataset_t *rdataset = NULL;
8699
8700	for (result = dns_message_firstname(rctx->query->rmessage,
8701					    DNS_SECTION_ANSWER);
8702	     result == ISC_R_SUCCESS;
8703	     result = dns_message_nextname(rctx->query->rmessage,
8704					   DNS_SECTION_ANSWER))
8705	{
8706		int order;
8707		unsigned int nlabels;
8708		dns_namereln_t namereln;
8709		dns_name_t *name = NULL;
8710
8711		dns_message_currentname(rctx->query->rmessage,
8712					DNS_SECTION_ANSWER, &name);
8713		namereln = dns_name_fullcompare(fctx->name, name, &order,
8714						&nlabels);
8715		switch (namereln) {
8716		case dns_namereln_equal:
8717			for (rdataset = ISC_LIST_HEAD(name->list);
8718			     rdataset != NULL;
8719			     rdataset = ISC_LIST_NEXT(rdataset, link))
8720			{
8721				if (rdataset->type == rctx->type ||
8722				    rctx->type == dns_rdatatype_any)
8723				{
8724					rctx->aname = name;
8725					if (rctx->type != dns_rdatatype_any) {
8726						rctx->ardataset = rdataset;
8727					}
8728					break;
8729				}
8730				if (rdataset->type == dns_rdatatype_cname) {
8731					rctx->cname = name;
8732					rctx->crdataset = rdataset;
8733					break;
8734				}
8735			}
8736			break;
8737
8738		case dns_namereln_subdomain:
8739			/*
8740			 * Don't accept DNAME from parent namespace.
8741			 */
8742			if (name_external(name, dns_rdatatype_dname, fctx)) {
8743				continue;
8744			}
8745
8746			/*
8747			 * In-scope DNAME records must have at least
8748			 * as many labels as the domain being queried.
8749			 * They also must be less that qname's labels
8750			 * and any previously found dname.
8751			 */
8752			if (nlabels >= rctx->dname_labels ||
8753			    nlabels < rctx->domain_labels)
8754			{
8755				continue;
8756			}
8757
8758			/*
8759			 * We are looking for the shortest DNAME if
8760			 * there are multiple ones (which there
8761			 * shouldn't be).
8762			 */
8763			for (rdataset = ISC_LIST_HEAD(name->list);
8764			     rdataset != NULL;
8765			     rdataset = ISC_LIST_NEXT(rdataset, link))
8766			{
8767				if (rdataset->type != dns_rdatatype_dname) {
8768					continue;
8769				}
8770				rctx->dname = name;
8771				rctx->drdataset = rdataset;
8772				rctx->dname_labels = nlabels;
8773				break;
8774			}
8775			break;
8776		default:
8777			break;
8778		}
8779	}
8780
8781	/*
8782	 * If a DNAME was found, then any CNAME or other answer matching
8783	 * QNAME that may also have been found must be ignored.
8784	 * Similarly, if a matching answer was found along with a CNAME,
8785	 * the CNAME must be ignored.
8786	 */
8787	if (rctx->dname != NULL) {
8788		rctx->aname = NULL;
8789		rctx->ardataset = NULL;
8790		rctx->cname = NULL;
8791		rctx->crdataset = NULL;
8792	} else if (rctx->aname != NULL) {
8793		rctx->cname = NULL;
8794		rctx->crdataset = NULL;
8795	}
8796}
8797
8798/*
8799 * rctx_answer_any():
8800 * Handle responses to queries of type ANY. Scan the answer section,
8801 * and as long as each RRset is of a type that is valid in the answer
8802 * section, and the rdata isn't filtered, cache it.
8803 */
8804static isc_result_t
8805rctx_answer_any(respctx_t *rctx) {
8806	dns_rdataset_t *rdataset = NULL;
8807	fetchctx_t *fctx = rctx->fctx;
8808
8809	for (rdataset = ISC_LIST_HEAD(rctx->aname->list); rdataset != NULL;
8810	     rdataset = ISC_LIST_NEXT(rdataset, link))
8811	{
8812		if (!validinanswer(rdataset, fctx)) {
8813			rctx->result = DNS_R_FORMERR;
8814			return (ISC_R_COMPLETE);
8815		}
8816
8817		if ((fctx->type == dns_rdatatype_sig ||
8818		     fctx->type == dns_rdatatype_rrsig) &&
8819		    rdataset->type != fctx->type)
8820		{
8821			continue;
8822		}
8823
8824		if ((rdataset->type == dns_rdatatype_a ||
8825		     rdataset->type == dns_rdatatype_aaaa) &&
8826		    !is_answeraddress_allowed(fctx->res->view, rctx->aname,
8827					      rdataset))
8828		{
8829			rctx->result = DNS_R_SERVFAIL;
8830			return (ISC_R_COMPLETE);
8831		}
8832
8833		if ((rdataset->type == dns_rdatatype_cname ||
8834		     rdataset->type == dns_rdatatype_dname) &&
8835		    !is_answertarget_allowed(fctx, fctx->name, rctx->aname,
8836					     rdataset, NULL))
8837		{
8838			rctx->result = DNS_R_SERVFAIL;
8839			return (ISC_R_COMPLETE);
8840		}
8841
8842		rctx->aname->attributes |= DNS_NAMEATTR_CACHE;
8843		rctx->aname->attributes |= DNS_NAMEATTR_ANSWER;
8844		rdataset->attributes |= DNS_RDATASETATTR_ANSWER;
8845		rdataset->attributes |= DNS_RDATASETATTR_CACHE;
8846		rdataset->trust = rctx->trust;
8847
8848		(void)dns_rdataset_additionaldata(rdataset, rctx->aname,
8849						  check_related, rctx);
8850	}
8851
8852	return (ISC_R_SUCCESS);
8853}
8854
8855/*
8856 * rctx_answer_match():
8857 * Handle responses that match the QNAME/QTYPE of the resolver query.
8858 * If QTYPE is valid in the answer section and the rdata isn't filtered,
8859 * the answer can be cached. If there is additional section data related
8860 * to the answer, it can be cached as well.
8861 */
8862static isc_result_t
8863rctx_answer_match(respctx_t *rctx) {
8864	dns_rdataset_t *sigrdataset = NULL;
8865	fetchctx_t *fctx = rctx->fctx;
8866
8867	if (!validinanswer(rctx->ardataset, fctx)) {
8868		rctx->result = DNS_R_FORMERR;
8869		return (ISC_R_COMPLETE);
8870	}
8871
8872	if ((rctx->ardataset->type == dns_rdatatype_a ||
8873	     rctx->ardataset->type == dns_rdatatype_aaaa) &&
8874	    !is_answeraddress_allowed(fctx->res->view, rctx->aname,
8875				      rctx->ardataset))
8876	{
8877		rctx->result = DNS_R_SERVFAIL;
8878		return (ISC_R_COMPLETE);
8879	}
8880	if ((rctx->ardataset->type == dns_rdatatype_cname ||
8881	     rctx->ardataset->type == dns_rdatatype_dname) &&
8882	    rctx->type != rctx->ardataset->type &&
8883	    rctx->type != dns_rdatatype_any &&
8884	    !is_answertarget_allowed(fctx, fctx->name, rctx->aname,
8885				     rctx->ardataset, NULL))
8886	{
8887		rctx->result = DNS_R_SERVFAIL;
8888		return (ISC_R_COMPLETE);
8889	}
8890
8891	rctx->aname->attributes |= DNS_NAMEATTR_CACHE;
8892	rctx->aname->attributes |= DNS_NAMEATTR_ANSWER;
8893	rctx->ardataset->attributes |= DNS_RDATASETATTR_ANSWER;
8894	rctx->ardataset->attributes |= DNS_RDATASETATTR_CACHE;
8895	rctx->ardataset->trust = rctx->trust;
8896	(void)dns_rdataset_additionaldata(rctx->ardataset, rctx->aname,
8897					  check_related, rctx);
8898
8899	for (sigrdataset = ISC_LIST_HEAD(rctx->aname->list);
8900	     sigrdataset != NULL;
8901	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
8902	{
8903		if (!validinanswer(sigrdataset, fctx)) {
8904			rctx->result = DNS_R_FORMERR;
8905			return (ISC_R_COMPLETE);
8906		}
8907
8908		if (sigrdataset->type != dns_rdatatype_rrsig ||
8909		    sigrdataset->covers != rctx->type)
8910		{
8911			continue;
8912		}
8913
8914		sigrdataset->attributes |= DNS_RDATASETATTR_ANSWERSIG;
8915		sigrdataset->attributes |= DNS_RDATASETATTR_CACHE;
8916		sigrdataset->trust = rctx->trust;
8917		break;
8918	}
8919
8920	return (ISC_R_SUCCESS);
8921}
8922
8923/*
8924 * rctx_answer_cname():
8925 * Handle answers containing a CNAME. Cache the CNAME, and flag that
8926 * there may be additional chain answers to find.
8927 */
8928static isc_result_t
8929rctx_answer_cname(respctx_t *rctx) {
8930	dns_rdataset_t *sigrdataset = NULL;
8931	fetchctx_t *fctx = rctx->fctx;
8932
8933	if (!validinanswer(rctx->crdataset, fctx)) {
8934		rctx->result = DNS_R_FORMERR;
8935		return (ISC_R_COMPLETE);
8936	}
8937
8938	if (rctx->type == dns_rdatatype_rrsig ||
8939	    rctx->type == dns_rdatatype_key || rctx->type == dns_rdatatype_nsec)
8940	{
8941		char buf[DNS_RDATATYPE_FORMATSIZE];
8942		dns_rdatatype_format(rctx->type, buf, sizeof(buf));
8943		log_formerr(fctx, "CNAME response for %s RR", buf);
8944		rctx->result = DNS_R_FORMERR;
8945		return (ISC_R_COMPLETE);
8946	}
8947
8948	if (!is_answertarget_allowed(fctx, fctx->name, rctx->cname,
8949				     rctx->crdataset, NULL))
8950	{
8951		rctx->result = DNS_R_SERVFAIL;
8952		return (ISC_R_COMPLETE);
8953	}
8954
8955	rctx->cname->attributes |= DNS_NAMEATTR_CACHE;
8956	rctx->cname->attributes |= DNS_NAMEATTR_ANSWER;
8957	rctx->cname->attributes |= DNS_NAMEATTR_CHAINING;
8958	rctx->crdataset->attributes |= DNS_RDATASETATTR_ANSWER;
8959	rctx->crdataset->attributes |= DNS_RDATASETATTR_CACHE;
8960	rctx->crdataset->attributes |= DNS_RDATASETATTR_CHAINING;
8961	rctx->crdataset->trust = rctx->trust;
8962
8963	for (sigrdataset = ISC_LIST_HEAD(rctx->cname->list);
8964	     sigrdataset != NULL;
8965	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
8966	{
8967		if (!validinanswer(sigrdataset, fctx)) {
8968			rctx->result = DNS_R_FORMERR;
8969			return (ISC_R_COMPLETE);
8970		}
8971
8972		if (sigrdataset->type != dns_rdatatype_rrsig ||
8973		    sigrdataset->covers != dns_rdatatype_cname)
8974		{
8975			continue;
8976		}
8977
8978		sigrdataset->attributes |= DNS_RDATASETATTR_ANSWERSIG;
8979		sigrdataset->attributes |= DNS_RDATASETATTR_CACHE;
8980		sigrdataset->trust = rctx->trust;
8981		break;
8982	}
8983
8984	rctx->chaining = true;
8985	return (ISC_R_SUCCESS);
8986}
8987
8988/*
8989 * rctx_answer_dname():
8990 * Handle responses with covering DNAME records.
8991 */
8992static isc_result_t
8993rctx_answer_dname(respctx_t *rctx) {
8994	dns_rdataset_t *sigrdataset = NULL;
8995	fetchctx_t *fctx = rctx->fctx;
8996
8997	if (!validinanswer(rctx->drdataset, fctx)) {
8998		rctx->result = DNS_R_FORMERR;
8999		return (ISC_R_COMPLETE);
9000	}
9001
9002	if (!is_answertarget_allowed(fctx, fctx->name, rctx->dname,
9003				     rctx->drdataset, &rctx->chaining))
9004	{
9005		rctx->result = DNS_R_SERVFAIL;
9006		return (ISC_R_COMPLETE);
9007	}
9008
9009	rctx->dname->attributes |= DNS_NAMEATTR_CACHE;
9010	rctx->dname->attributes |= DNS_NAMEATTR_ANSWER;
9011	rctx->dname->attributes |= DNS_NAMEATTR_CHAINING;
9012	rctx->drdataset->attributes |= DNS_RDATASETATTR_ANSWER;
9013	rctx->drdataset->attributes |= DNS_RDATASETATTR_CACHE;
9014	rctx->drdataset->attributes |= DNS_RDATASETATTR_CHAINING;
9015	rctx->drdataset->trust = rctx->trust;
9016
9017	for (sigrdataset = ISC_LIST_HEAD(rctx->dname->list);
9018	     sigrdataset != NULL;
9019	     sigrdataset = ISC_LIST_NEXT(sigrdataset, link))
9020	{
9021		if (!validinanswer(sigrdataset, fctx)) {
9022			rctx->result = DNS_R_FORMERR;
9023			return (ISC_R_COMPLETE);
9024		}
9025
9026		if (sigrdataset->type != dns_rdatatype_rrsig ||
9027		    sigrdataset->covers != dns_rdatatype_dname)
9028		{
9029			continue;
9030		}
9031
9032		sigrdataset->attributes |= DNS_RDATASETATTR_ANSWERSIG;
9033		sigrdataset->attributes |= DNS_RDATASETATTR_CACHE;
9034		sigrdataset->trust = rctx->trust;
9035		break;
9036	}
9037
9038	return (ISC_R_SUCCESS);
9039}
9040
9041/*
9042 * rctx_authority_positive():
9043 * Examine the records in the authority section (if there are any) for a
9044 * positive answer.  We expect the names for all rdatasets in this
9045 * section to be subdomains of the domain being queried; any that are
9046 * not are skipped.  We expect to find only *one* owner name; any names
9047 * after the first one processed are ignored. We expect to find only
9048 * rdatasets of type NS, RRSIG, or SIG; all others are ignored. Whatever
9049 * remains can be cached at trust level authauthority or additional
9050 * (depending on whether the AA bit was set on the answer).
9051 */
9052static void
9053rctx_authority_positive(respctx_t *rctx) {
9054	fetchctx_t *fctx = rctx->fctx;
9055	bool done = false;
9056	isc_result_t result;
9057
9058	result = dns_message_firstname(rctx->query->rmessage,
9059				       DNS_SECTION_AUTHORITY);
9060	while (!done && result == ISC_R_SUCCESS) {
9061		dns_name_t *name = NULL;
9062
9063		dns_message_currentname(rctx->query->rmessage,
9064					DNS_SECTION_AUTHORITY, &name);
9065
9066		if (!name_external(name, dns_rdatatype_ns, fctx)) {
9067			dns_rdataset_t *rdataset = NULL;
9068
9069			/*
9070			 * We expect to find NS or SIG NS rdatasets, and
9071			 * nothing else.
9072			 */
9073			for (rdataset = ISC_LIST_HEAD(name->list);
9074			     rdataset != NULL;
9075			     rdataset = ISC_LIST_NEXT(rdataset, link))
9076			{
9077				if (rdataset->type == dns_rdatatype_ns ||
9078				    (rdataset->type == dns_rdatatype_rrsig &&
9079				     rdataset->covers == dns_rdatatype_ns))
9080				{
9081					name->attributes |= DNS_NAMEATTR_CACHE;
9082					rdataset->attributes |=
9083						DNS_RDATASETATTR_CACHE;
9084
9085					if (rctx->aa) {
9086						rdataset->trust =
9087							dns_trust_authauthority;
9088					} else {
9089						rdataset->trust =
9090							dns_trust_additional;
9091					}
9092
9093					if (rdataset->type == dns_rdatatype_ns)
9094					{
9095						rctx->ns_name = name;
9096						rctx->ns_rdataset = rdataset;
9097					}
9098					/*
9099					 * Mark any additional data
9100					 * related to this rdataset.
9101					 */
9102					(void)dns_rdataset_additionaldata(
9103						rdataset, name, check_related,
9104						rctx);
9105					done = true;
9106				}
9107			}
9108		}
9109
9110		result = dns_message_nextname(rctx->query->rmessage,
9111					      DNS_SECTION_AUTHORITY);
9112	}
9113}
9114
9115/*
9116 * rctx_answer_none():
9117 * Handles a response without an answer: this is either a negative
9118 * response (NXDOMAIN or NXRRSET) or a referral. Determine which it is,
9119 * then either scan the authority section for negative caching and
9120 * DNSSEC proof of nonexistence, or else call rctx_referral().
9121 */
9122static isc_result_t
9123rctx_answer_none(respctx_t *rctx) {
9124	isc_result_t result;
9125	fetchctx_t *fctx = rctx->fctx;
9126
9127	FCTXTRACE("rctx_answer_none");
9128
9129	rctx_answer_init(rctx);
9130
9131	/*
9132	 * Sometimes we can tell if its a negative response by looking
9133	 * at the message header.
9134	 */
9135	if (rctx->query->rmessage->rcode == dns_rcode_nxdomain ||
9136	    (rctx->query->rmessage->counts[DNS_SECTION_ANSWER] == 0 &&
9137	     rctx->query->rmessage->counts[DNS_SECTION_AUTHORITY] == 0))
9138	{
9139		rctx->negative = true;
9140	}
9141
9142	/*
9143	 * Process the authority section
9144	 */
9145	result = rctx_authority_negative(rctx);
9146	if (result == ISC_R_COMPLETE) {
9147		return (rctx->result);
9148	}
9149
9150	log_ns_ttl(fctx, "rctx_answer_none");
9151
9152	if (rctx->ns_rdataset != NULL &&
9153	    dns_name_equal(fctx->domain, rctx->ns_name) &&
9154	    !dns_name_equal(rctx->ns_name, dns_rootname))
9155	{
9156		trim_ns_ttl(fctx, rctx->ns_name, rctx->ns_rdataset);
9157	}
9158
9159	/*
9160	 * A negative response has a SOA record (Type 2)
9161	 * and a optional NS RRset (Type 1) or it has neither
9162	 * a SOA or a NS RRset (Type 3, handled above) or
9163	 * rcode is NXDOMAIN (handled above) in which case
9164	 * the NS RRset is allowed (Type 4).
9165	 */
9166	if (rctx->soa_name != NULL) {
9167		rctx->negative = true;
9168	}
9169
9170	if (!rctx->ns_in_answer && !rctx->glue_in_answer) {
9171		/*
9172		 * Process DNSSEC records in the authority section.
9173		 */
9174		result = rctx_authority_dnssec(rctx);
9175		if (result == ISC_R_COMPLETE) {
9176			return (rctx->result);
9177		}
9178	}
9179
9180	/*
9181	 * Trigger lookups for DNS nameservers.
9182	 */
9183	if (rctx->negative &&
9184	    rctx->query->rmessage->rcode == dns_rcode_noerror &&
9185	    fctx->type == dns_rdatatype_ds && rctx->soa_name != NULL &&
9186	    dns_name_equal(rctx->soa_name, fctx->name) &&
9187	    !dns_name_equal(fctx->name, dns_rootname))
9188	{
9189		return (DNS_R_CHASEDSSERVERS);
9190	}
9191
9192	/*
9193	 * Did we find anything?
9194	 */
9195	if (!rctx->negative && rctx->ns_name == NULL) {
9196		/*
9197		 * The responder is insane.
9198		 */
9199		if (rctx->found_name == NULL) {
9200			log_formerr(fctx, "invalid response");
9201			return (DNS_R_FORMERR);
9202		}
9203		if (!dns_name_issubdomain(rctx->found_name, fctx->domain)) {
9204			char nbuf[DNS_NAME_FORMATSIZE];
9205			char dbuf[DNS_NAME_FORMATSIZE];
9206			char tbuf[DNS_RDATATYPE_FORMATSIZE];
9207
9208			dns_rdatatype_format(rctx->found_type, tbuf,
9209					     sizeof(tbuf));
9210			dns_name_format(rctx->found_name, nbuf, sizeof(nbuf));
9211			dns_name_format(fctx->domain, dbuf, sizeof(dbuf));
9212
9213			log_formerr(fctx,
9214				    "Name %s (%s) not subdomain"
9215				    " of zone %s -- invalid response",
9216				    nbuf, tbuf, dbuf);
9217		} else {
9218			log_formerr(fctx, "invalid response");
9219		}
9220		return (DNS_R_FORMERR);
9221	}
9222
9223	/*
9224	 * If we found both NS and SOA, they should be the same name.
9225	 */
9226	if (rctx->ns_name != NULL && rctx->soa_name != NULL &&
9227	    rctx->ns_name != rctx->soa_name)
9228	{
9229		log_formerr(fctx, "NS/SOA mismatch");
9230		return (DNS_R_FORMERR);
9231	}
9232
9233	/*
9234	 * Handle a referral.
9235	 */
9236	result = rctx_referral(rctx);
9237	if (result == ISC_R_COMPLETE) {
9238		return (rctx->result);
9239	}
9240
9241	/*
9242	 * Since we're not doing a referral, we don't want to cache any
9243	 * NS RRs we may have found.
9244	 */
9245	if (rctx->ns_name != NULL) {
9246		rctx->ns_name->attributes &= ~DNS_NAMEATTR_CACHE;
9247	}
9248
9249	if (rctx->negative) {
9250		FCTX_ATTR_SET(fctx, FCTX_ATTR_WANTNCACHE);
9251	}
9252
9253	return (ISC_R_SUCCESS);
9254}
9255
9256/*
9257 * rctx_authority_negative():
9258 * Scan the authority section of a negative answer, handling
9259 * NS and SOA records. (Note that this function does *not* handle
9260 * DNSSEC records; those are addressed separately in
9261 * rctx_authority_dnssec() below.)
9262 */
9263static isc_result_t
9264rctx_authority_negative(respctx_t *rctx) {
9265	isc_result_t result;
9266	fetchctx_t *fctx = rctx->fctx;
9267	dns_section_t section;
9268	dns_rdataset_t *rdataset = NULL;
9269	bool finished = false;
9270
9271	if (rctx->ns_in_answer) {
9272		INSIST(fctx->type == dns_rdatatype_ns);
9273		section = DNS_SECTION_ANSWER;
9274	} else {
9275		section = DNS_SECTION_AUTHORITY;
9276	}
9277
9278	result = dns_message_firstname(rctx->query->rmessage, section);
9279	if (result != ISC_R_SUCCESS) {
9280		return (ISC_R_SUCCESS);
9281	}
9282
9283	while (!finished) {
9284		dns_name_t *name = NULL;
9285
9286		dns_message_currentname(rctx->query->rmessage, section, &name);
9287		result = dns_message_nextname(rctx->query->rmessage, section);
9288		if (result != ISC_R_SUCCESS) {
9289			finished = true;
9290		}
9291
9292		if (!dns_name_issubdomain(name, fctx->domain)) {
9293			continue;
9294		}
9295
9296		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
9297		     rdataset = ISC_LIST_NEXT(rdataset, link))
9298		{
9299			dns_rdatatype_t type = rdataset->type;
9300			if (type == dns_rdatatype_rrsig) {
9301				type = rdataset->covers;
9302			}
9303			if (((type == dns_rdatatype_ns ||
9304			      type == dns_rdatatype_soa) &&
9305			     !dns_name_issubdomain(fctx->name, name)))
9306			{
9307				char qbuf[DNS_NAME_FORMATSIZE];
9308				char nbuf[DNS_NAME_FORMATSIZE];
9309				char tbuf[DNS_RDATATYPE_FORMATSIZE];
9310				dns_rdatatype_format(type, tbuf, sizeof(tbuf));
9311				dns_name_format(name, nbuf, sizeof(nbuf));
9312				dns_name_format(fctx->name, qbuf, sizeof(qbuf));
9313				log_formerr(fctx,
9314					    "unrelated %s %s in "
9315					    "%s authority section",
9316					    tbuf, nbuf, qbuf);
9317				break;
9318			}
9319
9320			switch (type) {
9321			case dns_rdatatype_ns:
9322				/*
9323				 * NS or RRSIG NS.
9324				 *
9325				 * Only one set of NS RRs is allowed.
9326				 */
9327				if (rdataset->type == dns_rdatatype_ns) {
9328					if (rctx->ns_name != NULL &&
9329					    name != rctx->ns_name)
9330					{
9331						log_formerr(fctx, "multiple NS "
9332								  "RRsets "
9333								  "in "
9334								  "authority "
9335								  "section");
9336						rctx->result = DNS_R_FORMERR;
9337						return (ISC_R_COMPLETE);
9338					}
9339					rctx->ns_name = name;
9340					rctx->ns_rdataset = rdataset;
9341				}
9342				name->attributes |= DNS_NAMEATTR_CACHE;
9343				rdataset->attributes |= DNS_RDATASETATTR_CACHE;
9344				rdataset->trust = dns_trust_glue;
9345				break;
9346			case dns_rdatatype_soa:
9347				/*
9348				 * SOA, or RRSIG SOA.
9349				 *
9350				 * Only one SOA is allowed.
9351				 */
9352				if (rdataset->type == dns_rdatatype_soa) {
9353					if (rctx->soa_name != NULL &&
9354					    name != rctx->soa_name)
9355					{
9356						log_formerr(fctx, "multiple "
9357								  "SOA RRs "
9358								  "in "
9359								  "authority "
9360								  "section");
9361						rctx->result = DNS_R_FORMERR;
9362						return (ISC_R_COMPLETE);
9363					}
9364					rctx->soa_name = name;
9365				}
9366				name->attributes |= DNS_NAMEATTR_NCACHE;
9367				rdataset->attributes |= DNS_RDATASETATTR_NCACHE;
9368				if (rctx->aa) {
9369					rdataset->trust =
9370						dns_trust_authauthority;
9371				} else if (ISFORWARDER(fctx->addrinfo)) {
9372					rdataset->trust = dns_trust_answer;
9373				} else {
9374					rdataset->trust = dns_trust_additional;
9375				}
9376				break;
9377			default:
9378				continue;
9379			}
9380		}
9381	}
9382
9383	return (ISC_R_SUCCESS);
9384}
9385
9386/*
9387 * rctx_ncache():
9388 * Cache the negatively cacheable parts of the message.  This may
9389 * also cause work to be queued to the DNSSEC validator.
9390 */
9391static void
9392rctx_ncache(respctx_t *rctx) {
9393	isc_result_t result;
9394	dns_rdatatype_t covers;
9395	fetchctx_t *fctx = rctx->fctx;
9396
9397	if (!WANTNCACHE(fctx)) {
9398		return;
9399	}
9400
9401	/*
9402	 * Cache DS NXDOMAIN separately to other types.
9403	 */
9404	if (rctx->query->rmessage->rcode == dns_rcode_nxdomain &&
9405	    fctx->type != dns_rdatatype_ds)
9406	{
9407		covers = dns_rdatatype_any;
9408	} else {
9409		covers = fctx->type;
9410	}
9411
9412	/*
9413	 * Cache any negative cache entries in the message.
9414	 */
9415	result = ncache_message(fctx, rctx->query->rmessage,
9416				rctx->query->addrinfo, covers, rctx->now);
9417	if (result != ISC_R_SUCCESS) {
9418		FCTXTRACE3("ncache_message complete", result);
9419	}
9420}
9421
9422/*
9423 * rctx_authority_dnssec():
9424 *
9425 * Scan the authority section of a negative answer or referral,
9426 * handling DNSSEC records (i.e. NSEC, NSEC3, DS).
9427 */
9428static isc_result_t
9429rctx_authority_dnssec(respctx_t *rctx) {
9430	isc_result_t result;
9431	fetchctx_t *fctx = rctx->fctx;
9432	dns_rdataset_t *rdataset = NULL;
9433	bool finished = false;
9434
9435	REQUIRE(!rctx->ns_in_answer && !rctx->glue_in_answer);
9436
9437	result = dns_message_firstname(rctx->query->rmessage,
9438				       DNS_SECTION_AUTHORITY);
9439	if (result != ISC_R_SUCCESS) {
9440		return (ISC_R_SUCCESS);
9441	}
9442
9443	while (!finished) {
9444		dns_name_t *name = NULL;
9445
9446		dns_message_currentname(rctx->query->rmessage,
9447					DNS_SECTION_AUTHORITY, &name);
9448		result = dns_message_nextname(rctx->query->rmessage,
9449					      DNS_SECTION_AUTHORITY);
9450		if (result != ISC_R_SUCCESS) {
9451			finished = true;
9452		}
9453
9454		if (!dns_name_issubdomain(name, fctx->domain)) {
9455			/*
9456			 * Invalid name found; preserve it for logging
9457			 * later.
9458			 */
9459			rctx->found_name = name;
9460			rctx->found_type = ISC_LIST_HEAD(name->list)->type;
9461			continue;
9462		}
9463
9464		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
9465		     rdataset = ISC_LIST_NEXT(rdataset, link))
9466		{
9467			bool checknta = true;
9468			bool secure_domain = false;
9469			dns_rdatatype_t type = rdataset->type;
9470
9471			if (type == dns_rdatatype_rrsig) {
9472				type = rdataset->covers;
9473			}
9474
9475			switch (type) {
9476			case dns_rdatatype_nsec:
9477			case dns_rdatatype_nsec3:
9478				if (rctx->negative) {
9479					name->attributes |= DNS_NAMEATTR_NCACHE;
9480					rdataset->attributes |=
9481						DNS_RDATASETATTR_NCACHE;
9482				} else if (type == dns_rdatatype_nsec) {
9483					name->attributes |= DNS_NAMEATTR_CACHE;
9484					rdataset->attributes |=
9485						DNS_RDATASETATTR_CACHE;
9486				}
9487
9488				if (rctx->aa) {
9489					rdataset->trust =
9490						dns_trust_authauthority;
9491				} else if (ISFORWARDER(fctx->addrinfo)) {
9492					rdataset->trust = dns_trust_answer;
9493				} else {
9494					rdataset->trust = dns_trust_additional;
9495				}
9496				/*
9497				 * No additional data needs to be
9498				 * marked.
9499				 */
9500				break;
9501			case dns_rdatatype_ds:
9502				/*
9503				 * DS or SIG DS.
9504				 *
9505				 * These should only be here if this is
9506				 * a referral, and there should only be
9507				 * one DS RRset.
9508				 */
9509				if (rctx->ns_name == NULL) {
9510					log_formerr(fctx, "DS with no "
9511							  "referral");
9512					rctx->result = DNS_R_FORMERR;
9513					return (ISC_R_COMPLETE);
9514				}
9515
9516				if (rdataset->type == dns_rdatatype_ds) {
9517					if (rctx->ds_name != NULL &&
9518					    name != rctx->ds_name)
9519					{
9520						log_formerr(fctx, "DS doesn't "
9521								  "match "
9522								  "referral "
9523								  "(NS)");
9524						rctx->result = DNS_R_FORMERR;
9525						return (ISC_R_COMPLETE);
9526					}
9527					rctx->ds_name = name;
9528				}
9529
9530				name->attributes |= DNS_NAMEATTR_CACHE;
9531				rdataset->attributes |= DNS_RDATASETATTR_CACHE;
9532
9533				if ((fctx->options & DNS_FETCHOPT_NONTA) != 0) {
9534					checknta = false;
9535				}
9536				if (fctx->res->view->enablevalidation) {
9537					result = issecuredomain(
9538						fctx->res->view, name,
9539						dns_rdatatype_ds, fctx->now,
9540						checknta, NULL, &secure_domain);
9541					if (result != ISC_R_SUCCESS) {
9542						return (result);
9543					}
9544				}
9545				if (secure_domain) {
9546					rdataset->trust =
9547						dns_trust_pending_answer;
9548				} else if (rctx->aa) {
9549					rdataset->trust =
9550						dns_trust_authauthority;
9551				} else if (ISFORWARDER(fctx->addrinfo)) {
9552					rdataset->trust = dns_trust_answer;
9553				} else {
9554					rdataset->trust = dns_trust_additional;
9555				}
9556				break;
9557			default:
9558				continue;
9559			}
9560		}
9561	}
9562
9563	return (ISC_R_SUCCESS);
9564}
9565
9566/*
9567 * rctx_referral():
9568 * Handles referral responses. Check for sanity, find glue as needed,
9569 * and update the fetch context to follow the delegation.
9570 */
9571static isc_result_t
9572rctx_referral(respctx_t *rctx) {
9573	isc_result_t result;
9574	fetchctx_t *fctx = rctx->fctx;
9575
9576	if (rctx->negative || rctx->ns_name == NULL) {
9577		return (ISC_R_SUCCESS);
9578	}
9579
9580	/*
9581	 * We already know ns_name is a subdomain of fctx->domain.
9582	 * If ns_name is equal to fctx->domain, we're not making
9583	 * progress.  We return DNS_R_FORMERR so that we'll keep
9584	 * trying other servers.
9585	 */
9586	if (dns_name_equal(rctx->ns_name, fctx->domain)) {
9587		log_formerr(fctx, "non-improving referral");
9588		rctx->result = DNS_R_FORMERR;
9589		return (ISC_R_COMPLETE);
9590	}
9591
9592	/*
9593	 * If the referral name is not a parent of the query
9594	 * name, consider the responder insane.
9595	 */
9596	if (!dns_name_issubdomain(fctx->name, rctx->ns_name)) {
9597		/* Logged twice */
9598		log_formerr(fctx, "referral to non-parent");
9599		FCTXTRACE("referral to non-parent");
9600		rctx->result = DNS_R_FORMERR;
9601		return (ISC_R_COMPLETE);
9602	}
9603
9604	/*
9605	 * Mark any additional data related to this rdataset.
9606	 * It's important that we do this before we change the
9607	 * query domain.
9608	 */
9609	INSIST(rctx->ns_rdataset != NULL);
9610	FCTX_ATTR_SET(fctx, FCTX_ATTR_GLUING);
9611	(void)dns_rdataset_additionaldata(rctx->ns_rdataset, rctx->ns_name,
9612					  check_related, rctx);
9613#if CHECK_FOR_GLUE_IN_ANSWER
9614	/*
9615	 * Look in the answer section for "glue" that is incorrectly
9616	 * returned as a answer.  This is needed if the server also
9617	 * minimizes the response size by not adding records to the
9618	 * additional section that are in the answer section or if
9619	 * the record gets dropped due to message size constraints.
9620	 */
9621	if (rctx->glue_in_answer &&
9622	    (fctx->type == dns_rdatatype_aaaa || fctx->type == dns_rdatatype_a))
9623	{
9624		(void)dns_rdataset_additionaldata(
9625			rctx->ns_rdataset, rctx->ns_name, check_answer, fctx);
9626	}
9627#endif /* if CHECK_FOR_GLUE_IN_ANSWER */
9628	FCTX_ATTR_CLR(fctx, FCTX_ATTR_GLUING);
9629
9630	/*
9631	 * NS rdatasets with 0 TTL cause problems.
9632	 * dns_view_findzonecut() will not find them when we
9633	 * try to follow the referral, and we'll SERVFAIL
9634	 * because the best nameservers are now above QDOMAIN.
9635	 * We force the TTL to 1 second to prevent this.
9636	 */
9637	if (rctx->ns_rdataset->ttl == 0) {
9638		rctx->ns_rdataset->ttl = 1;
9639	}
9640
9641	/*
9642	 * Set the current query domain to the referral name.
9643	 *
9644	 * XXXRTH  We should check if we're in forward-only mode, and
9645	 *		if so we should bail out.
9646	 */
9647	INSIST(dns_name_countlabels(fctx->domain) > 0);
9648	fcount_decr(fctx);
9649
9650	if (dns_rdataset_isassociated(&fctx->nameservers)) {
9651		dns_rdataset_disassociate(&fctx->nameservers);
9652	}
9653
9654	dns_name_copy(rctx->ns_name, fctx->domain);
9655
9656	if ((fctx->options & DNS_FETCHOPT_QMINIMIZE) != 0) {
9657		dns_name_copy(rctx->ns_name, fctx->qmindcname);
9658
9659		fctx_minimize_qname(fctx);
9660	}
9661
9662	result = fcount_incr(fctx, true);
9663	if (result != ISC_R_SUCCESS) {
9664		rctx->result = result;
9665		return (ISC_R_COMPLETE);
9666	}
9667
9668	FCTX_ATTR_SET(fctx, FCTX_ATTR_WANTCACHE);
9669	fctx->ns_ttl_ok = false;
9670	log_ns_ttl(fctx, "DELEGATION");
9671	rctx->result = DNS_R_DELEGATION;
9672
9673	/*
9674	 * Reinitialize 'rctx' to prepare for following the delegation:
9675	 * set the get_nameservers and next_server flags appropriately
9676	 * and reset the fetch context counters.
9677	 *
9678	 */
9679	if ((rctx->fctx->options & DNS_FETCHOPT_NOFOLLOW) == 0) {
9680		rctx->get_nameservers = true;
9681		rctx->next_server = true;
9682		rctx->fctx->restarts = 0;
9683		rctx->fctx->referrals++;
9684		rctx->fctx->querysent = 0;
9685		rctx->fctx->lamecount = 0;
9686		rctx->fctx->quotacount = 0;
9687		rctx->fctx->neterr = 0;
9688		rctx->fctx->badresp = 0;
9689		rctx->fctx->adberr = 0;
9690	}
9691
9692	return (ISC_R_COMPLETE);
9693}
9694
9695/*
9696 * rctx_additional():
9697 * Scan the additional section of a response to find records related
9698 * to answers we were interested in.
9699 */
9700static void
9701rctx_additional(respctx_t *rctx) {
9702	bool rescan;
9703	dns_section_t section = DNS_SECTION_ADDITIONAL;
9704	isc_result_t result;
9705
9706again:
9707	rescan = false;
9708
9709	for (result = dns_message_firstname(rctx->query->rmessage, section);
9710	     result == ISC_R_SUCCESS;
9711	     result = dns_message_nextname(rctx->query->rmessage, section))
9712	{
9713		dns_name_t *name = NULL;
9714		dns_rdataset_t *rdataset;
9715		dns_message_currentname(rctx->query->rmessage,
9716					DNS_SECTION_ADDITIONAL, &name);
9717		if ((name->attributes & DNS_NAMEATTR_CHASE) == 0) {
9718			continue;
9719		}
9720		name->attributes &= ~DNS_NAMEATTR_CHASE;
9721		for (rdataset = ISC_LIST_HEAD(name->list); rdataset != NULL;
9722		     rdataset = ISC_LIST_NEXT(rdataset, link))
9723		{
9724			if (CHASE(rdataset)) {
9725				rdataset->attributes &= ~DNS_RDATASETATTR_CHASE;
9726				(void)dns_rdataset_additionaldata(
9727					rdataset, name, check_related, rctx);
9728				rescan = true;
9729			}
9730		}
9731	}
9732	if (rescan) {
9733		goto again;
9734	}
9735}
9736
9737/*
9738 * rctx_nextserver():
9739 * We found something wrong with the remote server, but it may be
9740 * useful to try another one.
9741 */
9742static void
9743rctx_nextserver(respctx_t *rctx, dns_message_t *message,
9744		dns_adbaddrinfo_t *addrinfo, isc_result_t result) {
9745	fetchctx_t *fctx = rctx->fctx;
9746	bool retrying = true;
9747
9748	if (result == DNS_R_FORMERR) {
9749		rctx->broken_server = DNS_R_FORMERR;
9750	}
9751	if (rctx->broken_server != ISC_R_SUCCESS) {
9752		/*
9753		 * Add this server to the list of bad servers for
9754		 * this fctx.
9755		 */
9756		add_bad(fctx, message, addrinfo, rctx->broken_server,
9757			rctx->broken_type);
9758	}
9759
9760	if (rctx->get_nameservers) {
9761		dns_fixedname_t foundname, founddc;
9762		dns_name_t *name, *fname, *dcname;
9763		unsigned int findoptions = 0;
9764
9765		fname = dns_fixedname_initname(&foundname);
9766		dcname = dns_fixedname_initname(&founddc);
9767
9768		if (result != ISC_R_SUCCESS) {
9769			fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL);
9770			return;
9771		}
9772		if (dns_rdatatype_atparent(fctx->type)) {
9773			findoptions |= DNS_DBFIND_NOEXACT;
9774		}
9775		if ((rctx->retryopts & DNS_FETCHOPT_UNSHARED) == 0) {
9776			name = fctx->name;
9777		} else {
9778			name = fctx->domain;
9779		}
9780		result = dns_view_findzonecut(
9781			fctx->res->view, name, fname, dcname, fctx->now,
9782			findoptions, true, true, &fctx->nameservers, NULL);
9783		if (result != ISC_R_SUCCESS) {
9784			FCTXTRACE("couldn't find a zonecut");
9785			fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL);
9786			return;
9787		}
9788		if (!dns_name_issubdomain(fname, fctx->domain)) {
9789			/*
9790			 * The best nameservers are now above our
9791			 * QDOMAIN.
9792			 */
9793			FCTXTRACE("nameservers now above QDOMAIN");
9794			fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL);
9795			return;
9796		}
9797
9798		fcount_decr(fctx);
9799
9800		dns_name_copy(fname, fctx->domain);
9801		dns_name_copy(dcname, fctx->qmindcname);
9802
9803		result = fcount_incr(fctx, true);
9804		if (result != ISC_R_SUCCESS) {
9805			fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL);
9806			return;
9807		}
9808		fctx->ns_ttl = fctx->nameservers.ttl;
9809		fctx->ns_ttl_ok = true;
9810		fctx_cancelqueries(fctx, true, false);
9811		fctx_cleanup(fctx);
9812		retrying = false;
9813	}
9814
9815	/*
9816	 * Try again.
9817	 */
9818	fctx_try(fctx, retrying, false);
9819}
9820
9821/*
9822 * rctx_resend():
9823 *
9824 * Resend the query, probably with the options changed. Calls
9825 * fctx_query(), passing rctx->retryopts (which is based on
9826 * query->options, but may have been updated since the last time
9827 * fctx_query() was called).
9828 */
9829static void
9830rctx_resend(respctx_t *rctx, dns_adbaddrinfo_t *addrinfo) {
9831	fetchctx_t *fctx = rctx->fctx;
9832	isc_result_t result;
9833
9834	FCTXTRACE("resend");
9835	inc_stats(fctx->res, dns_resstatscounter_retry);
9836	result = fctx_query(fctx, addrinfo, rctx->retryopts);
9837	if (result != ISC_R_SUCCESS) {
9838		fctx_done_detach(&rctx->fctx, result);
9839	}
9840}
9841
9842/*
9843 * rctx_next():
9844 * We got what appeared to be a response but it didn't match the
9845 * question or the cookie; it may have been meant for someone else, or
9846 * it may be a spoofing attack. Drop it and continue listening for the
9847 * response we wanted.
9848 */
9849static isc_result_t
9850rctx_next(respctx_t *rctx) {
9851	fetchctx_t *fctx = rctx->fctx;
9852	isc_result_t result;
9853
9854	FCTXTRACE("nextitem");
9855	inc_stats(rctx->fctx->res, dns_resstatscounter_nextitem);
9856	INSIST(rctx->query->dispentry != NULL);
9857	dns_message_reset(rctx->query->rmessage, DNS_MESSAGE_INTENTPARSE);
9858	result = dns_dispatch_getnext(rctx->query->dispentry);
9859	return (result);
9860}
9861
9862/*
9863 * rctx_chaseds():
9864 * Look up the parent zone's NS records so that DS records can be
9865 * fetched.
9866 */
9867static void
9868rctx_chaseds(respctx_t *rctx, dns_message_t *message,
9869	     dns_adbaddrinfo_t *addrinfo, isc_result_t result) {
9870	fetchctx_t *fctx = rctx->fctx;
9871	isc_task_t *task = NULL;
9872	unsigned int n;
9873
9874	add_bad(fctx, message, addrinfo, result, rctx->broken_type);
9875	fctx_cancelqueries(fctx, true, false);
9876	fctx_cleanup(fctx);
9877
9878	n = dns_name_countlabels(fctx->name);
9879	dns_name_getlabelsequence(fctx->name, 1, n - 1, fctx->nsname);
9880
9881	FCTXTRACE("suspending DS lookup to find parent's NS records");
9882
9883	fctx_addref(fctx);
9884	task = fctx->res->buckets[fctx->bucketnum].task;
9885	result = dns_resolver_createfetch(
9886		fctx->res, fctx->nsname, dns_rdatatype_ns, NULL, NULL, NULL,
9887		NULL, 0, fctx->options, 0, NULL, task, resume_dslookup, fctx,
9888		&fctx->nsrrset, NULL, &fctx->nsfetch);
9889	if (result != ISC_R_SUCCESS) {
9890		if (result == DNS_R_DUPLICATE) {
9891			result = DNS_R_SERVFAIL;
9892		}
9893		fctx_detach(&fctx);
9894		fctx_done_detach(&rctx->fctx, result);
9895	}
9896}
9897
9898/*
9899 * rctx_done():
9900 * This resolver query response is finished, either because we
9901 * encountered a problem or because we've gotten all the information
9902 * from it that we can.  We either wait for another response, resend the
9903 * query to the same server, resend to a new server, or clean up and
9904 * shut down the fetch.
9905 */
9906static void
9907rctx_done(respctx_t *rctx, isc_result_t result) {
9908	resquery_t *query = rctx->query;
9909	fetchctx_t *fctx = rctx->fctx;
9910	dns_adbaddrinfo_t *addrinfo = query->addrinfo;
9911	dns_message_t *message = NULL;
9912
9913	/*
9914	 * Need to attach to the message until the scope
9915	 * of this function ends, since there are many places
9916	 * where the message is used and/or may be destroyed
9917	 * before this function ends.
9918	 */
9919	dns_message_attach(query->rmessage, &message);
9920
9921	FCTXTRACE4("query canceled in rctx_done();",
9922		   rctx->no_response ? "no response" : "responding", result);
9923
9924#ifdef ENABLE_AFL
9925	if (dns_fuzzing_resolver &&
9926	    (rctx->next_server || rctx->resend || rctx->nextitem))
9927	{
9928		fctx_cancelquery(&query, rctx->finish, rctx->no_response,
9929				 false);
9930		fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL);
9931		goto detach;
9932	}
9933#endif /* ifdef ENABLE_AFL */
9934
9935	if (rctx->nextitem) {
9936		REQUIRE(!rctx->next_server);
9937		REQUIRE(!rctx->resend);
9938
9939		result = rctx_next(rctx);
9940		if (result == ISC_R_SUCCESS) {
9941			goto detach;
9942		}
9943	}
9944
9945	/* Cancel the query */
9946	fctx_cancelquery(&query, rctx->finish, rctx->no_response, false);
9947
9948	/*
9949	 * If nobody's waiting for results, don't resend or try next server.
9950	 */
9951	LOCK(&fctx->res->buckets[fctx->bucketnum].lock);
9952	if (ISC_LIST_EMPTY(fctx->events)) {
9953		rctx->next_server = false;
9954		rctx->resend = false;
9955	}
9956	UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock);
9957
9958	if (rctx->next_server) {
9959		rctx_nextserver(rctx, message, addrinfo, result);
9960	} else if (rctx->resend) {
9961		rctx_resend(rctx, addrinfo);
9962	} else if (result == DNS_R_CHASEDSSERVERS) {
9963		rctx_chaseds(rctx, message, addrinfo, result);
9964	} else if (result == ISC_R_SUCCESS && !HAVE_ANSWER(fctx)) {
9965		/*
9966		 * All has gone well so far, but we are waiting for the DNSSEC
9967		 * validator to validate the answer.
9968		 */
9969		FCTXTRACE("wait for validator");
9970		fctx_cancelqueries(fctx, true, false);
9971	} else {
9972		/*
9973		 * We're done.
9974		 */
9975		fctx_done_detach(&rctx->fctx, result);
9976	}
9977
9978detach:
9979	dns_message_detach(&message);
9980}
9981
9982/*
9983 * rctx_logpacket():
9984 * Log the incoming packet; also log to DNSTAP if configured.
9985 */
9986static void
9987rctx_logpacket(respctx_t *rctx) {
9988#ifdef HAVE_DNSTAP
9989	isc_result_t result;
9990	fetchctx_t *fctx = rctx->fctx;
9991	isc_sockaddr_t localaddr, *la = NULL;
9992	unsigned char zone[DNS_NAME_MAXWIRE];
9993	dns_dtmsgtype_t dtmsgtype;
9994	dns_compress_t cctx;
9995	isc_region_t zr;
9996	isc_buffer_t zb;
9997#endif /* HAVE_DNSTAP */
9998
9999	dns_message_logfmtpacket(
10000		rctx->query->rmessage, "received packet from",
10001		&rctx->query->addrinfo->sockaddr, DNS_LOGCATEGORY_RESOLVER,
10002		DNS_LOGMODULE_PACKETS, &dns_master_style_comment,
10003		ISC_LOG_DEBUG(10), rctx->fctx->res->mctx);
10004
10005#ifdef HAVE_DNSTAP
10006	/*
10007	 * Log the response via dnstap.
10008	 */
10009	memset(&zr, 0, sizeof(zr));
10010	result = dns_compress_init(&cctx, -1, fctx->res->mctx);
10011	if (result == ISC_R_SUCCESS) {
10012		isc_buffer_init(&zb, zone, sizeof(zone));
10013		dns_compress_setmethods(&cctx, DNS_COMPRESS_NONE);
10014		result = dns_name_towire(fctx->domain, &cctx, &zb);
10015		if (result == ISC_R_SUCCESS) {
10016			isc_buffer_usedregion(&zb, &zr);
10017		}
10018		dns_compress_invalidate(&cctx);
10019	}
10020
10021	if ((fctx->qmessage->flags & DNS_MESSAGEFLAG_RD) != 0) {
10022		dtmsgtype = DNS_DTTYPE_FR;
10023	} else {
10024		dtmsgtype = DNS_DTTYPE_RR;
10025	}
10026
10027	result = dns_dispentry_getlocaladdress(rctx->query->dispentry,
10028					       &localaddr);
10029	if (result == ISC_R_SUCCESS) {
10030		la = &localaddr;
10031	}
10032
10033	dns_dt_send(fctx->res->view, dtmsgtype, la,
10034		    &rctx->query->addrinfo->sockaddr,
10035		    ((rctx->query->options & DNS_FETCHOPT_TCP) != 0), &zr,
10036		    &rctx->query->start, NULL, &rctx->buffer);
10037#endif /* HAVE_DNSTAP */
10038}
10039
10040/*
10041 * rctx_badserver():
10042 * Is the remote server broken, or does it dislike us?
10043 */
10044static isc_result_t
10045rctx_badserver(respctx_t *rctx, isc_result_t result) {
10046	fetchctx_t *fctx = rctx->fctx;
10047	resquery_t *query = rctx->query;
10048	isc_buffer_t b;
10049	char code[64];
10050	dns_rcode_t rcode = rctx->query->rmessage->rcode;
10051
10052	if (rcode == dns_rcode_noerror || rcode == dns_rcode_yxdomain ||
10053	    rcode == dns_rcode_nxdomain)
10054	{
10055		return (ISC_R_SUCCESS);
10056	}
10057
10058	if ((rcode == dns_rcode_formerr) && rctx->opt == NULL &&
10059	    (rctx->retryopts & DNS_FETCHOPT_NOEDNS0) == 0)
10060	{
10061		/*
10062		 * It's very likely they don't like EDNS0.
10063		 */
10064		rctx->retryopts |= DNS_FETCHOPT_NOEDNS0;
10065		rctx->resend = true;
10066		/*
10067		 * Remember that they may not like EDNS0.
10068		 */
10069		add_bad_edns(fctx, &query->addrinfo->sockaddr);
10070		inc_stats(fctx->res, dns_resstatscounter_edns0fail);
10071	} else if (rcode == dns_rcode_formerr) {
10072		if (query->rmessage->cc_echoed) {
10073			/*
10074			 * Retry without DNS COOKIE.
10075			 */
10076			query->addrinfo->flags |= FCTX_ADDRINFO_NOCOOKIE;
10077			rctx->resend = true;
10078			log_formerr(fctx, "server sent FORMERR with echoed DNS "
10079					  "COOKIE");
10080		} else {
10081			/*
10082			 * The server (or forwarder) doesn't understand us,
10083			 * but others might.
10084			 */
10085			rctx->next_server = true;
10086			rctx->broken_server = DNS_R_REMOTEFORMERR;
10087			log_formerr(fctx, "server sent FORMERR");
10088		}
10089	} else if (rcode == dns_rcode_badvers) {
10090		unsigned int version;
10091#if DNS_EDNS_VERSION > 0
10092		unsigned int flags, mask;
10093#endif /* if DNS_EDNS_VERSION > 0 */
10094
10095		INSIST(rctx->opt != NULL);
10096		version = (rctx->opt->ttl >> 16) & 0xff;
10097#if DNS_EDNS_VERSION > 0
10098		flags = (version << DNS_FETCHOPT_EDNSVERSIONSHIFT) |
10099			DNS_FETCHOPT_EDNSVERSIONSET;
10100		mask = DNS_FETCHOPT_EDNSVERSIONMASK |
10101		       DNS_FETCHOPT_EDNSVERSIONSET;
10102#endif /* if DNS_EDNS_VERSION > 0 */
10103
10104		/*
10105		 * Record that we got a good EDNS response.
10106		 */
10107		if (query->ednsversion > (int)version &&
10108		    !EDNSOK(query->addrinfo))
10109		{
10110			dns_adb_changeflags(fctx->adb, query->addrinfo,
10111					    FCTX_ADDRINFO_EDNSOK,
10112					    FCTX_ADDRINFO_EDNSOK);
10113		}
10114
10115		/*
10116		 * RFC 2671 was not clear that unknown options should
10117		 * be ignored.  RFC 6891 is clear that that they
10118		 * should be ignored. If we are supporting the
10119		 * experimental EDNS > 0 then perform strict
10120		 * version checking of badvers responses.  We won't
10121		 * be sending COOKIE etc. in that case.
10122		 */
10123#if DNS_EDNS_VERSION > 0
10124		if ((int)version < query->ednsversion) {
10125			dns_adb_changeflags(fctx->adb, query->addrinfo, flags,
10126					    mask);
10127			rctx->resend = true;
10128		} else {
10129			rctx->broken_server = DNS_R_BADVERS;
10130			rctx->next_server = true;
10131		}
10132#else  /* if DNS_EDNS_VERSION > 0 */
10133		rctx->broken_server = DNS_R_BADVERS;
10134		rctx->next_server = true;
10135#endif /* if DNS_EDNS_VERSION > 0 */
10136	} else if (rcode == dns_rcode_badcookie && rctx->query->rmessage->cc_ok)
10137	{
10138		/*
10139		 * We have recorded the new cookie.
10140		 */
10141		if (BADCOOKIE(query->addrinfo)) {
10142			rctx->retryopts |= DNS_FETCHOPT_TCP;
10143		}
10144		query->addrinfo->flags |= FCTX_ADDRINFO_BADCOOKIE;
10145		rctx->resend = true;
10146	} else {
10147		rctx->broken_server = DNS_R_UNEXPECTEDRCODE;
10148		rctx->next_server = true;
10149	}
10150
10151	isc_buffer_init(&b, code, sizeof(code) - 1);
10152	dns_rcode_totext(rcode, &b);
10153	code[isc_buffer_usedlength(&b)] = '\0';
10154	FCTXTRACE2("remote server broken: returned ", code);
10155	rctx_done(rctx, result);
10156
10157	return (ISC_R_COMPLETE);
10158}
10159
10160/*
10161 * rctx_lameserver():
10162 * Is the server lame?
10163 */
10164static isc_result_t
10165rctx_lameserver(respctx_t *rctx) {
10166	isc_result_t result = ISC_R_SUCCESS;
10167	fetchctx_t *fctx = rctx->fctx;
10168	resquery_t *query = rctx->query;
10169
10170	if (ISFORWARDER(query->addrinfo) || !is_lame(fctx, query->rmessage)) {
10171		return (ISC_R_SUCCESS);
10172	}
10173
10174	inc_stats(fctx->res, dns_resstatscounter_lame);
10175	log_lame(fctx, query->addrinfo);
10176	if (fctx->res->lame_ttl != 0) {
10177		result = dns_adb_marklame(fctx->adb, query->addrinfo,
10178					  fctx->name, fctx->type,
10179					  rctx->now + fctx->res->lame_ttl);
10180		if (result != ISC_R_SUCCESS) {
10181			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
10182				      DNS_LOGMODULE_RESOLVER, ISC_LOG_ERROR,
10183				      "could not mark server as lame: %s",
10184				      isc_result_totext(result));
10185		}
10186	}
10187	rctx->broken_server = DNS_R_LAME;
10188	rctx->next_server = true;
10189	FCTXTRACE("lame server");
10190	rctx_done(rctx, result);
10191
10192	return (ISC_R_COMPLETE);
10193}
10194
10195/*
10196 * rctx_delonly_zone():
10197 * Handle delegation-only zones like NET and COM.
10198 */
10199static void
10200rctx_delonly_zone(respctx_t *rctx) {
10201	fetchctx_t *fctx = rctx->fctx;
10202	char namebuf[DNS_NAME_FORMATSIZE];
10203	char domainbuf[DNS_NAME_FORMATSIZE];
10204	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
10205	char classbuf[64];
10206	char typebuf[64];
10207
10208	if (ISFORWARDER(rctx->query->addrinfo) ||
10209	    !dns_view_isdelegationonly(fctx->res->view, fctx->domain) ||
10210	    dns_name_equal(fctx->domain, fctx->name) ||
10211	    !fix_mustbedelegationornxdomain(rctx->query->rmessage, fctx))
10212	{
10213		return;
10214	}
10215
10216	dns_name_format(fctx->name, namebuf, sizeof(namebuf));
10217	dns_name_format(fctx->domain, domainbuf, sizeof(domainbuf));
10218	dns_rdatatype_format(fctx->type, typebuf, sizeof(typebuf));
10219	dns_rdataclass_format(fctx->res->rdclass, classbuf, sizeof(classbuf));
10220	isc_sockaddr_format(&rctx->query->addrinfo->sockaddr, addrbuf,
10221			    sizeof(addrbuf));
10222
10223	isc_log_write(dns_lctx, DNS_LOGCATEGORY_DELEGATION_ONLY,
10224		      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
10225		      "enforced delegation-only for '%s' (%s/%s/%s) from %s",
10226		      domainbuf, namebuf, typebuf, classbuf, addrbuf);
10227}
10228
10229/***
10230 *** Resolver Methods
10231 ***/
10232static void
10233destroy(dns_resolver_t *res) {
10234	unsigned int i;
10235	alternate_t *a;
10236
10237	isc_refcount_destroy(&res->references);
10238	REQUIRE(!atomic_load_acquire(&res->priming));
10239	REQUIRE(res->primefetch == NULL);
10240
10241	RTRACE("destroy");
10242
10243	REQUIRE(atomic_load_acquire(&res->nfctx) == 0);
10244
10245	isc_mutex_destroy(&res->primelock);
10246	isc_mutex_destroy(&res->lock);
10247	for (i = 0; i < res->nbuckets; i++) {
10248		INSIST(ISC_LIST_EMPTY(res->buckets[i].fctxs));
10249		isc_task_shutdown(res->buckets[i].task);
10250		isc_task_detach(&res->buckets[i].task);
10251		isc_mutex_destroy(&res->buckets[i].lock);
10252	}
10253	isc_mem_put(res->mctx, res->buckets,
10254		    res->nbuckets * sizeof(fctxbucket_t));
10255	for (i = 0; i < HASHSIZE(res->dhashbits); i++) {
10256		INSIST(ISC_LIST_EMPTY(res->dbuckets[i].list));
10257		isc_mutex_destroy(&res->dbuckets[i].lock);
10258	}
10259	isc_mem_put(res->mctx, res->dbuckets,
10260		    HASHSIZE(res->dhashbits) * sizeof(zonebucket_t));
10261	if (res->dispatches4 != NULL) {
10262		dns_dispatchset_destroy(&res->dispatches4);
10263	}
10264	if (res->dispatches6 != NULL) {
10265		dns_dispatchset_destroy(&res->dispatches6);
10266	}
10267	while ((a = ISC_LIST_HEAD(res->alternates)) != NULL) {
10268		ISC_LIST_UNLINK(res->alternates, a, link);
10269		if (!a->isaddress) {
10270			dns_name_free(&a->_u._n.name, res->mctx);
10271		}
10272		isc_mem_put(res->mctx, a, sizeof(*a));
10273	}
10274	dns_resolver_reset_algorithms(res);
10275	dns_resolver_reset_ds_digests(res);
10276	dns_badcache_destroy(&res->badcache);
10277	dns_resolver_resetmustbesecure(res);
10278	isc_timer_destroy(&res->spillattimer);
10279	res->magic = 0;
10280	isc_mem_putanddetach(&res->mctx, res, sizeof(*res));
10281}
10282
10283static void
10284send_shutdown_events(dns_resolver_t *res) {
10285	isc_event_t *event, *next_event;
10286	isc_task_t *etask;
10287
10288	LOCK(&res->lock);
10289	for (event = ISC_LIST_HEAD(res->whenshutdown); event != NULL;
10290	     event = next_event)
10291	{
10292		next_event = ISC_LIST_NEXT(event, ev_link);
10293		ISC_LIST_UNLINK(res->whenshutdown, event, ev_link);
10294		etask = event->ev_sender;
10295		event->ev_sender = res;
10296		isc_task_sendanddetach(&etask, &event);
10297	}
10298	UNLOCK(&res->lock);
10299}
10300
10301static void
10302spillattimer_countdown(isc_task_t *task, isc_event_t *event) {
10303	dns_resolver_t *res = event->ev_arg;
10304	isc_result_t result;
10305	unsigned int count;
10306	bool logit = false;
10307
10308	REQUIRE(VALID_RESOLVER(res));
10309
10310	UNUSED(task);
10311
10312	LOCK(&res->lock);
10313	if (res->spillat > res->spillatmin) {
10314		res->spillat--;
10315		logit = true;
10316	}
10317	if (res->spillat <= res->spillatmin) {
10318		result = isc_timer_reset(res->spillattimer,
10319					 isc_timertype_inactive, NULL, NULL,
10320					 true);
10321		RUNTIME_CHECK(result == ISC_R_SUCCESS);
10322	}
10323	count = res->spillat;
10324	UNLOCK(&res->lock);
10325	if (logit) {
10326		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
10327			      DNS_LOGMODULE_RESOLVER, ISC_LOG_NOTICE,
10328			      "clients-per-query decreased to %u", count);
10329	}
10330
10331	isc_event_free(&event);
10332}
10333
10334isc_result_t
10335dns_resolver_create(dns_view_t *view, isc_taskmgr_t *taskmgr,
10336		    unsigned int ntasks, unsigned int ndisp, isc_nm_t *nm,
10337		    isc_timermgr_t *timermgr, unsigned int options,
10338		    dns_dispatchmgr_t *dispatchmgr, dns_dispatch_t *dispatchv4,
10339		    dns_dispatch_t *dispatchv6, dns_resolver_t **resp) {
10340	isc_result_t result = ISC_R_SUCCESS;
10341	char name[sizeof("res4294967295")];
10342	dns_resolver_t *res = NULL;
10343	isc_task_t *task = NULL;
10344
10345	/*
10346	 * Create a resolver.
10347	 */
10348
10349	REQUIRE(DNS_VIEW_VALID(view));
10350	REQUIRE(ntasks > 0);
10351	REQUIRE(ndisp > 0);
10352	REQUIRE(resp != NULL && *resp == NULL);
10353	REQUIRE(dispatchmgr != NULL);
10354	REQUIRE(dispatchv4 != NULL || dispatchv6 != NULL);
10355
10356	RTRACE("create");
10357	res = isc_mem_get(view->mctx, sizeof(*res));
10358	*res = (dns_resolver_t){ .rdclass = view->rdclass,
10359				 .nm = nm,
10360				 .timermgr = timermgr,
10361				 .taskmgr = taskmgr,
10362				 .dispatchmgr = dispatchmgr,
10363				 .view = view,
10364				 .options = options,
10365				 .udpsize = DEFAULT_EDNS_BUFSIZE,
10366				 .spillatmin = 10,
10367				 .spillat = 10,
10368				 .spillatmax = 100,
10369				 .retryinterval = 10000,
10370				 .nonbackofftries = 3,
10371				 .query_timeout = DEFAULT_QUERY_TIMEOUT,
10372				 .maxdepth = DEFAULT_RECURSION_DEPTH,
10373				 .maxqueries = DEFAULT_MAX_QUERIES,
10374				 .nbuckets = ntasks,
10375				 .dhashbits = RES_DOMAIN_HASH_BITS };
10376
10377	atomic_init(&res->activebuckets, res->nbuckets);
10378
10379	isc_mem_attach(view->mctx, &res->mctx);
10380
10381	res->quotaresp[dns_quotatype_zone] = DNS_R_DROP;
10382	res->quotaresp[dns_quotatype_server] = DNS_R_SERVFAIL;
10383	isc_refcount_init(&res->references, 1);
10384	atomic_init(&res->exiting, false);
10385	atomic_init(&res->priming, false);
10386	atomic_init(&res->zspill, 0);
10387	atomic_init(&res->nfctx, 0);
10388	ISC_LIST_INIT(res->whenshutdown);
10389	ISC_LIST_INIT(res->alternates);
10390
10391	result = dns_badcache_init(res->mctx, DNS_RESOLVER_BADCACHESIZE,
10392				   &res->badcache);
10393	if (result != ISC_R_SUCCESS) {
10394		goto cleanup_res;
10395	}
10396
10397	if (view->resstats != NULL) {
10398		isc_stats_set(view->resstats, res->nbuckets,
10399			      dns_resstatscounter_buckets);
10400	}
10401
10402	res->buckets = isc_mem_get(view->mctx,
10403				   res->nbuckets * sizeof(res->buckets[0]));
10404	for (uint32_t i = 0; i < ntasks; i++) {
10405		res->buckets[i] = (fctxbucket_t){ 0 };
10406
10407		isc_mutex_init(&res->buckets[i].lock);
10408
10409		/*
10410		 * Since we have a pool of tasks we bind them to task
10411		 * queues to spread the load evenly
10412		 */
10413		result = isc_task_create_bound(
10414			taskmgr, 0, &res->buckets[i].task, ISC_NM_TASK_SLOW(i));
10415		if (result != ISC_R_SUCCESS) {
10416			ntasks = i;
10417			isc_mutex_destroy(&res->buckets[i].lock);
10418			goto cleanup_buckets;
10419		}
10420
10421		snprintf(name, sizeof(name), "res%" PRIu32, i);
10422		isc_task_setname(res->buckets[i].task, name, res);
10423
10424		ISC_LIST_INIT(res->buckets[i].fctxs);
10425		atomic_init(&res->buckets[i].exiting, false);
10426	}
10427
10428	res->dbuckets = isc_mem_get(view->mctx,
10429				    HASHSIZE(res->dhashbits) *
10430					    sizeof(res->dbuckets[0]));
10431	for (size_t i = 0; i < HASHSIZE(res->dhashbits); i++) {
10432		res->dbuckets[i] = (zonebucket_t){ .list = { 0 } };
10433		ISC_LIST_INIT(res->dbuckets[i].list);
10434		isc_mutex_init(&res->dbuckets[i].lock);
10435	}
10436
10437	if (dispatchv4 != NULL) {
10438		dns_dispatchset_create(view->mctx, dispatchv4,
10439				       &res->dispatches4, ndisp);
10440	}
10441
10442	if (dispatchv6 != NULL) {
10443		dns_dispatchset_create(view->mctx, dispatchv6,
10444				       &res->dispatches6, ndisp);
10445	}
10446
10447	isc_mutex_init(&res->lock);
10448	isc_mutex_init(&res->primelock);
10449
10450	result = isc_task_create(taskmgr, 0, &task);
10451	if (result != ISC_R_SUCCESS) {
10452		goto cleanup_primelock;
10453	}
10454	isc_task_setname(task, "resolver_task", NULL);
10455
10456	result = isc_timer_create(timermgr, isc_timertype_inactive, NULL, NULL,
10457				  task, spillattimer_countdown, res,
10458				  &res->spillattimer);
10459	isc_task_detach(&task);
10460	if (result != ISC_R_SUCCESS) {
10461		goto cleanup_primelock;
10462	}
10463
10464	res->magic = RES_MAGIC;
10465
10466	*resp = res;
10467
10468	return (ISC_R_SUCCESS);
10469
10470cleanup_primelock:
10471	isc_mutex_destroy(&res->primelock);
10472	isc_mutex_destroy(&res->lock);
10473
10474	if (res->dispatches6 != NULL) {
10475		dns_dispatchset_destroy(&res->dispatches6);
10476	}
10477	if (res->dispatches4 != NULL) {
10478		dns_dispatchset_destroy(&res->dispatches4);
10479	}
10480
10481	for (size_t i = 0; i < HASHSIZE(res->dhashbits); i++) {
10482		isc_mutex_destroy(&res->dbuckets[i].lock);
10483	}
10484	isc_mem_put(view->mctx, res->dbuckets,
10485		    HASHSIZE(res->dhashbits) * sizeof(zonebucket_t));
10486
10487cleanup_buckets:
10488	for (size_t i = 0; i < ntasks; i++) {
10489		isc_mutex_destroy(&res->buckets[i].lock);
10490		isc_task_shutdown(res->buckets[i].task);
10491		isc_task_detach(&res->buckets[i].task);
10492	}
10493	isc_mem_put(view->mctx, res->buckets,
10494		    res->nbuckets * sizeof(fctxbucket_t));
10495
10496	dns_badcache_destroy(&res->badcache);
10497
10498cleanup_res:
10499	isc_mem_put(view->mctx, res, sizeof(*res));
10500
10501	return (result);
10502}
10503
10504static void
10505prime_done(isc_task_t *task, isc_event_t *event) {
10506	dns_resolver_t *res;
10507	dns_fetchevent_t *fevent;
10508	dns_fetch_t *fetch;
10509	dns_db_t *db = NULL;
10510
10511	REQUIRE(event->ev_type == DNS_EVENT_FETCHDONE);
10512	fevent = (dns_fetchevent_t *)event;
10513	res = event->ev_arg;
10514	REQUIRE(VALID_RESOLVER(res));
10515
10516	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
10517		      DNS_LOGMODULE_RESOLVER, ISC_LOG_INFO,
10518		      "resolver priming query complete: %s",
10519		      isc_result_totext(fevent->result));
10520
10521	UNUSED(task);
10522
10523	LOCK(&res->primelock);
10524	fetch = res->primefetch;
10525	res->primefetch = NULL;
10526	UNLOCK(&res->primelock);
10527
10528	atomic_compare_exchange_enforced(&res->priming, &(bool){ true }, false);
10529
10530	if (fevent->result == ISC_R_SUCCESS && res->view->cache != NULL &&
10531	    res->view->hints != NULL)
10532	{
10533		dns_cache_attachdb(res->view->cache, &db);
10534		dns_root_checkhints(res->view, res->view->hints, db);
10535		dns_db_detach(&db);
10536	}
10537
10538	if (fevent->node != NULL) {
10539		dns_db_detachnode(fevent->db, &fevent->node);
10540	}
10541	if (fevent->db != NULL) {
10542		dns_db_detach(&fevent->db);
10543	}
10544	if (dns_rdataset_isassociated(fevent->rdataset)) {
10545		dns_rdataset_disassociate(fevent->rdataset);
10546	}
10547	INSIST(fevent->sigrdataset == NULL);
10548
10549	isc_mem_put(res->mctx, fevent->rdataset, sizeof(*fevent->rdataset));
10550
10551	isc_event_free(&event);
10552	dns_resolver_destroyfetch(&fetch);
10553}
10554
10555void
10556dns_resolver_prime(dns_resolver_t *res) {
10557	bool want_priming = false;
10558	dns_rdataset_t *rdataset;
10559	isc_result_t result;
10560
10561	REQUIRE(VALID_RESOLVER(res));
10562	REQUIRE(res->frozen);
10563
10564	RTRACE("dns_resolver_prime");
10565
10566	if (!atomic_load_acquire(&res->exiting)) {
10567		want_priming = atomic_compare_exchange_strong_acq_rel(
10568			&res->priming, &(bool){ false }, true);
10569	}
10570
10571	if (want_priming) {
10572		/*
10573		 * To avoid any possible recursive locking problems, we
10574		 * start the priming fetch like any other fetch, and
10575		 * holding no resolver locks.  No one else will try to
10576		 * start it because we're the ones who set res->priming
10577		 * to true. Any other callers of dns_resolver_prime()
10578		 * while we're running will see that res->priming is
10579		 * already true and do nothing.
10580		 */
10581		RTRACE("priming");
10582		rdataset = isc_mem_get(res->mctx, sizeof(*rdataset));
10583		dns_rdataset_init(rdataset);
10584
10585		LOCK(&res->primelock);
10586		INSIST(res->primefetch == NULL);
10587		result = dns_resolver_createfetch(
10588			res, dns_rootname, dns_rdatatype_ns, NULL, NULL, NULL,
10589			NULL, 0, DNS_FETCHOPT_NOFORWARD, 0, NULL,
10590			res->buckets[0].task, prime_done, res, rdataset, NULL,
10591			&res->primefetch);
10592		UNLOCK(&res->primelock);
10593
10594		if (result != ISC_R_SUCCESS) {
10595			isc_mem_put(res->mctx, rdataset, sizeof(*rdataset));
10596			atomic_compare_exchange_enforced(
10597				&res->priming, &(bool){ true }, false);
10598		}
10599		inc_stats(res, dns_resstatscounter_priming);
10600	}
10601}
10602
10603void
10604dns_resolver_freeze(dns_resolver_t *res) {
10605	/*
10606	 * Freeze resolver.
10607	 */
10608
10609	REQUIRE(VALID_RESOLVER(res));
10610
10611	res->frozen = true;
10612}
10613
10614void
10615dns_resolver_attach(dns_resolver_t *source, dns_resolver_t **targetp) {
10616	REQUIRE(VALID_RESOLVER(source));
10617	REQUIRE(targetp != NULL && *targetp == NULL);
10618
10619	RRTRACE(source, "attach");
10620
10621	isc_refcount_increment(&source->references);
10622
10623	*targetp = source;
10624}
10625
10626void
10627dns_resolver_whenshutdown(dns_resolver_t *res, isc_task_t *task,
10628			  isc_event_t **eventp) {
10629	isc_event_t *event = NULL;
10630
10631	REQUIRE(VALID_RESOLVER(res));
10632	REQUIRE(eventp != NULL);
10633
10634	event = *eventp;
10635	*eventp = NULL;
10636
10637	LOCK(&res->lock);
10638
10639	if (atomic_load_acquire(&res->exiting) &&
10640	    atomic_load_acquire(&res->activebuckets) == 0)
10641	{
10642		/*
10643		 * We're already shutdown.  Send the event.
10644		 */
10645		event->ev_sender = res;
10646		isc_task_send(task, &event);
10647	} else {
10648		isc_task_attach(task, &(isc_task_t *){ NULL });
10649		event->ev_sender = task;
10650		ISC_LIST_APPEND(res->whenshutdown, event, ev_link);
10651	}
10652
10653	UNLOCK(&res->lock);
10654}
10655
10656void
10657dns_resolver_shutdown(dns_resolver_t *res) {
10658	unsigned int i;
10659	fetchctx_t *fctx;
10660	isc_result_t result;
10661	bool is_false = false;
10662	bool is_done = false;
10663
10664	REQUIRE(VALID_RESOLVER(res));
10665
10666	RTRACE("shutdown");
10667
10668	if (atomic_compare_exchange_strong(&res->exiting, &is_false, true)) {
10669		RTRACE("exiting");
10670
10671		for (i = 0; i < res->nbuckets; i++) {
10672			LOCK(&res->buckets[i].lock);
10673			for (fctx = ISC_LIST_HEAD(res->buckets[i].fctxs);
10674			     fctx != NULL; fctx = ISC_LIST_NEXT(fctx, link))
10675			{
10676				fctx_shutdown(fctx);
10677			}
10678			atomic_store(&res->buckets[i].exiting, true);
10679			if (ISC_LIST_EMPTY(res->buckets[i].fctxs)) {
10680				if (isc_refcount_decrement(
10681					    &res->activebuckets) == 1)
10682				{
10683					is_done = true;
10684				}
10685			}
10686			UNLOCK(&res->buckets[i].lock);
10687		}
10688		if (is_done) {
10689			send_shutdown_events(res);
10690		}
10691		result = isc_timer_reset(res->spillattimer,
10692					 isc_timertype_inactive, NULL, NULL,
10693					 true);
10694		RUNTIME_CHECK(result == ISC_R_SUCCESS);
10695	}
10696}
10697
10698void
10699dns_resolver_detach(dns_resolver_t **resp) {
10700	dns_resolver_t *res;
10701
10702	REQUIRE(resp != NULL);
10703	res = *resp;
10704	*resp = NULL;
10705	REQUIRE(VALID_RESOLVER(res));
10706
10707	RTRACE("detach");
10708
10709	if (isc_refcount_decrement(&res->references) == 1) {
10710		isc_refcount_destroy(&res->activebuckets);
10711		INSIST(atomic_load_acquire(&res->exiting));
10712		destroy(res);
10713	}
10714}
10715
10716static bool
10717fctx_match(fetchctx_t *fctx, const dns_name_t *name, dns_rdatatype_t type,
10718	   unsigned int options) {
10719	/*
10720	 * Don't match fetch contexts that are shutting down.
10721	 */
10722	if (fctx->cloned || fctx->state == fetchstate_done ||
10723	    ISC_LIST_EMPTY(fctx->events))
10724	{
10725		return (false);
10726	}
10727
10728	if (fctx->type != type || fctx->options != options) {
10729		return (false);
10730	}
10731	return (dns_name_equal(fctx->name, name));
10732}
10733
10734static void
10735log_fetch(const dns_name_t *name, dns_rdatatype_t type) {
10736	char namebuf[DNS_NAME_FORMATSIZE];
10737	char typebuf[DNS_RDATATYPE_FORMATSIZE];
10738	int level = ISC_LOG_DEBUG(1);
10739
10740	/*
10741	 * If there's no chance of logging it, don't render (format) the
10742	 * name and RDATA type (further below), and return early.
10743	 */
10744	if (!isc_log_wouldlog(dns_lctx, level)) {
10745		return;
10746	}
10747
10748	dns_name_format(name, namebuf, sizeof(namebuf));
10749	dns_rdatatype_format(type, typebuf, sizeof(typebuf));
10750
10751	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
10752		      DNS_LOGMODULE_RESOLVER, level, "fetch: %s/%s", namebuf,
10753		      typebuf);
10754}
10755
10756static void
10757fctx_minimize_qname(fetchctx_t *fctx) {
10758	isc_result_t result;
10759	unsigned int dlabels, nlabels;
10760	dns_name_t name;
10761
10762	REQUIRE(VALID_FCTX(fctx));
10763
10764	dns_name_init(&name, NULL);
10765
10766	dlabels = dns_name_countlabels(fctx->qmindcname);
10767	nlabels = dns_name_countlabels(fctx->name);
10768
10769	if (dlabels > fctx->qmin_labels) {
10770		fctx->qmin_labels = dlabels + 1;
10771	} else {
10772		fctx->qmin_labels++;
10773	}
10774
10775	if (fctx->ip6arpaskip) {
10776		/*
10777		 * For ip6.arpa we want to skip some of the labels, with
10778		 * boundaries at /16, /32, /48, /56, /64 and /128
10779		 * In 'label count' terms that's equal to
10780		 *    7    11   15   17   19      35
10781		 * We fix fctx->qmin_labels to point to the nearest
10782		 * boundary
10783		 */
10784		if (fctx->qmin_labels < 7) {
10785			fctx->qmin_labels = 7;
10786		} else if (fctx->qmin_labels < 11) {
10787			fctx->qmin_labels = 11;
10788		} else if (fctx->qmin_labels < 15) {
10789			fctx->qmin_labels = 15;
10790		} else if (fctx->qmin_labels < 17) {
10791			fctx->qmin_labels = 17;
10792		} else if (fctx->qmin_labels < 19) {
10793			fctx->qmin_labels = 19;
10794		} else if (fctx->qmin_labels < 35) {
10795			fctx->qmin_labels = 35;
10796		} else {
10797			fctx->qmin_labels = nlabels;
10798		}
10799	} else if (fctx->qmin_labels > DNS_QMIN_MAXLABELS) {
10800		fctx->qmin_labels = DNS_MAX_LABELS + 1;
10801	}
10802
10803	if (fctx->qmin_labels < nlabels) {
10804		dns_rdataset_t rdataset;
10805		dns_fixedname_t fixed;
10806		dns_name_t *fname = dns_fixedname_initname(&fixed);
10807		dns_rdataset_init(&rdataset);
10808		do {
10809			/*
10810			 * We want to query for qmin_labels from fctx->name.
10811			 */
10812			dns_name_split(fctx->name, fctx->qmin_labels, NULL,
10813				       &name);
10814			/*
10815			 * Look to see if we have anything cached about NS
10816			 * RRsets at this name and if so skip this name and
10817			 * try with an additional label prepended.
10818			 */
10819			result = dns_db_find(fctx->cache, &name, NULL,
10820					     dns_rdatatype_ns, 0, 0, NULL,
10821					     fname, &rdataset, NULL);
10822			if (dns_rdataset_isassociated(&rdataset)) {
10823				dns_rdataset_disassociate(&rdataset);
10824			}
10825			switch (result) {
10826			case ISC_R_SUCCESS:
10827			case DNS_R_CNAME:
10828			case DNS_R_DNAME:
10829			case DNS_R_NCACHENXDOMAIN:
10830			case DNS_R_NCACHENXRRSET:
10831				fctx->qmin_labels++;
10832				continue;
10833			default:
10834				break;
10835			}
10836			break;
10837		} while (fctx->qmin_labels < nlabels);
10838	}
10839
10840	if (fctx->qmin_labels < nlabels) {
10841		dns_name_copy(&name, fctx->qminname);
10842		fctx->qmintype = dns_rdatatype_ns;
10843		fctx->minimized = true;
10844	} else {
10845		/* Minimization is done, we'll ask for whole qname */
10846		dns_name_copy(fctx->name, fctx->qminname);
10847		fctx->qmintype = fctx->type;
10848		fctx->minimized = false;
10849	}
10850
10851	char domainbuf[DNS_NAME_FORMATSIZE];
10852	dns_name_format(fctx->qminname, domainbuf, sizeof(domainbuf));
10853	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER,
10854		      DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(5),
10855		      "QNAME minimization - %s minimized, qmintype %d "
10856		      "qminname %s",
10857		      fctx->minimized ? "" : "not", fctx->qmintype, domainbuf);
10858}
10859
10860isc_result_t
10861dns_resolver_createfetch(dns_resolver_t *res, const dns_name_t *name,
10862			 dns_rdatatype_t type, const dns_name_t *domain,
10863			 dns_rdataset_t *nameservers,
10864			 dns_forwarders_t *forwarders,
10865			 const isc_sockaddr_t *client, dns_messageid_t id,
10866			 unsigned int options, unsigned int depth,
10867			 isc_counter_t *qc, isc_task_t *task,
10868			 isc_taskaction_t action, void *arg,
10869			 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
10870			 dns_fetch_t **fetchp) {
10871	dns_fetch_t *fetch;
10872	fetchctx_t *fctx = NULL;
10873	isc_result_t result = ISC_R_SUCCESS;
10874	unsigned int bucketnum;
10875	bool new_fctx = false;
10876	isc_event_t *event;
10877	unsigned int count = 0;
10878	unsigned int spillat;
10879	unsigned int spillatmin;
10880	bool dodestroy = false;
10881
10882	UNUSED(forwarders);
10883
10884	REQUIRE(VALID_RESOLVER(res));
10885	REQUIRE(res->frozen);
10886	/* XXXRTH  Check for meta type */
10887	if (domain != NULL) {
10888		REQUIRE(DNS_RDATASET_VALID(nameservers));
10889		REQUIRE(nameservers->type == dns_rdatatype_ns);
10890	} else {
10891		REQUIRE(nameservers == NULL);
10892	}
10893	REQUIRE(forwarders == NULL);
10894	REQUIRE(!dns_rdataset_isassociated(rdataset));
10895	REQUIRE(sigrdataset == NULL || !dns_rdataset_isassociated(sigrdataset));
10896	REQUIRE(fetchp != NULL && *fetchp == NULL);
10897
10898	if (atomic_load_acquire(&res->exiting)) {
10899		return (ISC_R_SHUTTINGDOWN);
10900	}
10901
10902	log_fetch(name, type);
10903
10904	fetch = isc_mem_get(res->mctx, sizeof(*fetch));
10905	*fetch = (dns_fetch_t){ 0 };
10906
10907	dns_resolver_attach(res, &fetch->res);
10908	isc_mem_attach(res->mctx, &fetch->mctx);
10909
10910	bucketnum = dns_name_fullhash(name, false) % res->nbuckets;
10911
10912	LOCK(&res->lock);
10913	spillat = res->spillat;
10914	spillatmin = res->spillatmin;
10915	UNLOCK(&res->lock);
10916	LOCK(&res->buckets[bucketnum].lock);
10917
10918	if (atomic_load(&res->buckets[bucketnum].exiting)) {
10919		result = ISC_R_SHUTTINGDOWN;
10920		goto unlock;
10921	}
10922
10923	if ((options & DNS_FETCHOPT_UNSHARED) == 0) {
10924		for (fctx = ISC_LIST_HEAD(res->buckets[bucketnum].fctxs);
10925		     fctx != NULL; fctx = ISC_LIST_NEXT(fctx, link))
10926		{
10927			if (fctx_match(fctx, name, type, options)) {
10928				break;
10929			}
10930		}
10931	}
10932
10933	/*
10934	 * Is this a duplicate?
10935	 */
10936	if (fctx != NULL && client != NULL) {
10937		dns_fetchevent_t *fevent;
10938		for (fevent = ISC_LIST_HEAD(fctx->events); fevent != NULL;
10939		     fevent = ISC_LIST_NEXT(fevent, ev_link))
10940		{
10941			if (fevent->client != NULL && fevent->id == id &&
10942			    isc_sockaddr_equal(fevent->client, client))
10943			{
10944				result = DNS_R_DUPLICATE;
10945				goto unlock;
10946			}
10947
10948			/*
10949			 * Only the regular fetch events should be
10950			 * counted for the clients-per-query limit, in
10951			 * case if there are multiple events registered
10952			 * for a single client.
10953			 */
10954			if (fevent->ev_type == DNS_EVENT_FETCHDONE) {
10955				count++;
10956			}
10957		}
10958	}
10959	if (count >= spillatmin && spillatmin != 0) {
10960		INSIST(fctx != NULL);
10961		if (count >= spillat) {
10962			fctx->spilled = true;
10963		}
10964		if (fctx->spilled) {
10965			inc_stats(res, dns_resstatscounter_clientquota);
10966			result = DNS_R_DROP;
10967			goto unlock;
10968		}
10969	}
10970
10971	if (fctx == NULL) {
10972		result = fctx_create(res, task, name, type, domain, nameservers,
10973				     client, options, bucketnum, depth, qc,
10974				     &fctx);
10975		if (result != ISC_R_SUCCESS) {
10976			goto unlock;
10977		}
10978		new_fctx = true;
10979	} else if (fctx->depth > depth) {
10980		fctx->depth = depth;
10981	}
10982
10983	result = fctx_join(fctx, task, client, id, action, arg, rdataset,
10984			   sigrdataset, fetch);
10985
10986	if (result == ISC_R_SUCCESS &&
10987	    ((options & DNS_FETCHOPT_TRYSTALE_ONTIMEOUT) != 0))
10988	{
10989		fctx_add_event(fctx, task, client, id, action, arg, NULL, NULL,
10990			       fetch, DNS_EVENT_TRYSTALE);
10991	}
10992
10993	if (new_fctx) {
10994		if (result == ISC_R_SUCCESS) {
10995			/*
10996			 * Launch this fctx.
10997			 */
10998			event = &fctx->control_event;
10999			fctx_addref(fctx);
11000			ISC_EVENT_INIT(event, sizeof(*event), 0, NULL,
11001				       DNS_EVENT_FETCHCONTROL, fctx_start, fctx,
11002				       NULL, NULL, NULL);
11003			isc_task_send(res->buckets[bucketnum].task, &event);
11004		} else {
11005			dodestroy = true;
11006		}
11007	}
11008
11009unlock:
11010	UNLOCK(&res->buckets[bucketnum].lock);
11011
11012	if (dodestroy) {
11013		fctx_destroy(fctx, false);
11014	}
11015
11016	if (result == ISC_R_SUCCESS) {
11017		FTRACE("created");
11018		*fetchp = fetch;
11019	} else {
11020		dns_resolver_detach(&fetch->res);
11021		isc_mem_putanddetach(&fetch->mctx, fetch, sizeof(*fetch));
11022	}
11023
11024	return (result);
11025}
11026
11027void
11028dns_resolver_cancelfetch(dns_fetch_t *fetch) {
11029	fetchctx_t *fctx = NULL;
11030	dns_resolver_t *res = NULL;
11031	dns_fetchevent_t *event = NULL;
11032	dns_fetchevent_t *event_trystale = NULL;
11033	dns_fetchevent_t *event_fetchdone = NULL;
11034
11035	REQUIRE(DNS_FETCH_VALID(fetch));
11036	fctx = fetch->private;
11037	REQUIRE(VALID_FCTX(fctx));
11038	res = fctx->res;
11039
11040	FTRACE("cancelfetch");
11041
11042	LOCK(&res->buckets[fctx->bucketnum].lock);
11043
11044	/*
11045	 * Find the events for this fetch (as opposed
11046	 * to those for other fetches that have joined the same
11047	 * fctx) and send them with result = ISC_R_CANCELED.
11048	 */
11049	if (fctx->state != fetchstate_done) {
11050		dns_fetchevent_t *next_event = NULL;
11051		for (event = ISC_LIST_HEAD(fctx->events); event != NULL;
11052		     event = next_event)
11053		{
11054			next_event = ISC_LIST_NEXT(event, ev_link);
11055			if (event->fetch == fetch) {
11056				ISC_LIST_UNLINK(fctx->events, event, ev_link);
11057				switch (event->ev_type) {
11058				case DNS_EVENT_TRYSTALE:
11059					INSIST(event_trystale == NULL);
11060					event_trystale = event;
11061					break;
11062				case DNS_EVENT_FETCHDONE:
11063					INSIST(event_fetchdone == NULL);
11064					event_fetchdone = event;
11065					break;
11066				default:
11067					UNREACHABLE();
11068				}
11069				if (event_trystale != NULL &&
11070				    event_fetchdone != NULL)
11071				{
11072					break;
11073				}
11074			}
11075		}
11076	}
11077	/*
11078	 * The "trystale" event must be sent before the "fetchdone" event,
11079	 * because the latter clears the "recursing" query attribute, which is
11080	 * required by both events (handled by the same callback function).
11081	 */
11082	if (event_trystale != NULL) {
11083		isc_task_t *etask = event_trystale->ev_sender;
11084		event_trystale->ev_sender = fctx;
11085		event_trystale->result = ISC_R_CANCELED;
11086		isc_task_sendanddetach(&etask, ISC_EVENT_PTR(&event_trystale));
11087	}
11088	if (event_fetchdone != NULL) {
11089		isc_task_t *etask = event_fetchdone->ev_sender;
11090		event_fetchdone->ev_sender = fctx;
11091		event_fetchdone->result = ISC_R_CANCELED;
11092		isc_task_sendanddetach(&etask, ISC_EVENT_PTR(&event_fetchdone));
11093	}
11094
11095	/*
11096	 * The fctx continues running even if no fetches remain;
11097	 * the answer is still cached.
11098	 */
11099	UNLOCK(&res->buckets[fctx->bucketnum].lock);
11100}
11101
11102void
11103dns_resolver_destroyfetch(dns_fetch_t **fetchp) {
11104	dns_fetch_t *fetch = NULL;
11105	dns_resolver_t *res = NULL;
11106	fetchctx_t *fctx = NULL;
11107	unsigned int bucketnum;
11108
11109	REQUIRE(fetchp != NULL);
11110	fetch = *fetchp;
11111	*fetchp = NULL;
11112	REQUIRE(DNS_FETCH_VALID(fetch));
11113	fctx = fetch->private;
11114	REQUIRE(VALID_FCTX(fctx));
11115	res = fetch->res;
11116
11117	FTRACE("destroyfetch");
11118
11119	fetch->magic = 0;
11120
11121	bucketnum = fctx->bucketnum;
11122	LOCK(&res->buckets[bucketnum].lock);
11123
11124	/*
11125	 * Sanity check: the caller should have gotten its event before
11126	 * trying to destroy the fetch.
11127	 */
11128	if (fctx->state != fetchstate_done) {
11129		dns_fetchevent_t *event = NULL, *next_event = NULL;
11130		for (event = ISC_LIST_HEAD(fctx->events); event != NULL;
11131		     event = next_event)
11132		{
11133			next_event = ISC_LIST_NEXT(event, ev_link);
11134			RUNTIME_CHECK(event->fetch != fetch);
11135		}
11136	}
11137	UNLOCK(&res->buckets[bucketnum].lock);
11138
11139	isc_mem_putanddetach(&fetch->mctx, fetch, sizeof(*fetch));
11140
11141	fctx_detach(&fctx);
11142	dns_resolver_detach(&res);
11143}
11144
11145void
11146dns_resolver_logfetch(dns_fetch_t *fetch, isc_log_t *lctx,
11147		      isc_logcategory_t *category, isc_logmodule_t *module,
11148		      int level, bool duplicateok) {
11149	fetchctx_t *fctx;
11150	dns_resolver_t *res;
11151	char domainbuf[DNS_NAME_FORMATSIZE];
11152
11153	REQUIRE(DNS_FETCH_VALID(fetch));
11154	fctx = fetch->private;
11155	REQUIRE(VALID_FCTX(fctx));
11156	res = fctx->res;
11157
11158	LOCK(&res->buckets[fctx->bucketnum].lock);
11159
11160	INSIST(fctx->exitline >= 0);
11161	if (!fctx->logged || duplicateok) {
11162		dns_name_format(fctx->domain, domainbuf, sizeof(domainbuf));
11163		isc_log_write(lctx, category, module, level,
11164			      "fetch completed at %s:%d for %s in "
11165			      "%" PRIu64 "."
11166			      "%06" PRIu64 ": %s/%s "
11167			      "[domain:%s,referral:%u,restart:%u,qrysent:%u,"
11168			      "timeout:%u,lame:%u,quota:%u,neterr:%u,"
11169			      "badresp:%u,adberr:%u,findfail:%u,valfail:%u]",
11170			      __FILE__, fctx->exitline, fctx->info,
11171			      fctx->duration / US_PER_SEC,
11172			      fctx->duration % US_PER_SEC,
11173			      isc_result_totext(fctx->result),
11174			      isc_result_totext(fctx->vresult), domainbuf,
11175			      fctx->referrals, fctx->restarts, fctx->querysent,
11176			      fctx->timeouts, fctx->lamecount, fctx->quotacount,
11177			      fctx->neterr, fctx->badresp, fctx->adberr,
11178			      fctx->findfail, fctx->valfail);
11179		fctx->logged = true;
11180	}
11181
11182	UNLOCK(&res->buckets[fctx->bucketnum].lock);
11183}
11184
11185dns_dispatchmgr_t *
11186dns_resolver_dispatchmgr(dns_resolver_t *resolver) {
11187	REQUIRE(VALID_RESOLVER(resolver));
11188	return (resolver->dispatchmgr);
11189}
11190
11191dns_dispatch_t *
11192dns_resolver_dispatchv4(dns_resolver_t *resolver) {
11193	REQUIRE(VALID_RESOLVER(resolver));
11194	return (dns_dispatchset_get(resolver->dispatches4));
11195}
11196
11197dns_dispatch_t *
11198dns_resolver_dispatchv6(dns_resolver_t *resolver) {
11199	REQUIRE(VALID_RESOLVER(resolver));
11200	return (dns_dispatchset_get(resolver->dispatches6));
11201}
11202
11203isc_taskmgr_t *
11204dns_resolver_taskmgr(dns_resolver_t *resolver) {
11205	REQUIRE(VALID_RESOLVER(resolver));
11206	return (resolver->taskmgr);
11207}
11208
11209uint32_t
11210dns_resolver_getlamettl(dns_resolver_t *resolver) {
11211	REQUIRE(VALID_RESOLVER(resolver));
11212	return (resolver->lame_ttl);
11213}
11214
11215void
11216dns_resolver_setlamettl(dns_resolver_t *resolver, uint32_t lame_ttl) {
11217	REQUIRE(VALID_RESOLVER(resolver));
11218	resolver->lame_ttl = lame_ttl;
11219}
11220
11221void
11222dns_resolver_addalternate(dns_resolver_t *resolver, const isc_sockaddr_t *alt,
11223			  const dns_name_t *name, in_port_t port) {
11224	alternate_t *a;
11225
11226	REQUIRE(VALID_RESOLVER(resolver));
11227	REQUIRE(!resolver->frozen);
11228	REQUIRE((alt == NULL) ^ (name == NULL));
11229
11230	a = isc_mem_get(resolver->mctx, sizeof(*a));
11231	if (alt != NULL) {
11232		a->isaddress = true;
11233		a->_u.addr = *alt;
11234	} else {
11235		a->isaddress = false;
11236		a->_u._n.port = port;
11237		dns_name_init(&a->_u._n.name, NULL);
11238		dns_name_dup(name, resolver->mctx, &a->_u._n.name);
11239	}
11240	ISC_LINK_INIT(a, link);
11241	ISC_LIST_APPEND(resolver->alternates, a, link);
11242}
11243
11244void
11245dns_resolver_setudpsize(dns_resolver_t *resolver, uint16_t udpsize) {
11246	REQUIRE(VALID_RESOLVER(resolver));
11247	resolver->udpsize = udpsize;
11248}
11249
11250uint16_t
11251dns_resolver_getudpsize(dns_resolver_t *resolver) {
11252	REQUIRE(VALID_RESOLVER(resolver));
11253	return (resolver->udpsize);
11254}
11255
11256void
11257dns_resolver_flushbadcache(dns_resolver_t *resolver, const dns_name_t *name) {
11258	if (name != NULL) {
11259		dns_badcache_flushname(resolver->badcache, name);
11260	} else {
11261		dns_badcache_flush(resolver->badcache);
11262	}
11263}
11264
11265void
11266dns_resolver_flushbadnames(dns_resolver_t *resolver, const dns_name_t *name) {
11267	dns_badcache_flushtree(resolver->badcache, name);
11268}
11269
11270void
11271dns_resolver_addbadcache(dns_resolver_t *resolver, const dns_name_t *name,
11272			 dns_rdatatype_t type, isc_time_t *expire) {
11273#ifdef ENABLE_AFL
11274	if (!dns_fuzzing_resolver)
11275#endif /* ifdef ENABLE_AFL */
11276	{
11277		dns_badcache_add(resolver->badcache, name, type, false, 0,
11278				 expire);
11279	}
11280}
11281
11282bool
11283dns_resolver_getbadcache(dns_resolver_t *resolver, const dns_name_t *name,
11284			 dns_rdatatype_t type, isc_time_t *now) {
11285	return (dns_badcache_find(resolver->badcache, name, type, NULL, now));
11286}
11287
11288void
11289dns_resolver_printbadcache(dns_resolver_t *resolver, FILE *fp) {
11290	(void)dns_badcache_print(resolver->badcache, "Bad cache", fp);
11291}
11292
11293static void
11294free_algorithm(void *node, void *arg) {
11295	unsigned char *algorithms = node;
11296	isc_mem_t *mctx = arg;
11297
11298	isc_mem_put(mctx, algorithms, *algorithms);
11299}
11300
11301void
11302dns_resolver_reset_algorithms(dns_resolver_t *resolver) {
11303	REQUIRE(VALID_RESOLVER(resolver));
11304
11305	if (resolver->algorithms != NULL) {
11306		dns_rbt_destroy(&resolver->algorithms);
11307	}
11308}
11309
11310isc_result_t
11311dns_resolver_disable_algorithm(dns_resolver_t *resolver, const dns_name_t *name,
11312			       unsigned int alg) {
11313	unsigned int len, mask;
11314	unsigned char *tmp;
11315	unsigned char *algorithms;
11316	isc_result_t result;
11317	dns_rbtnode_t *node = NULL;
11318
11319	/*
11320	 * Whether an algorithm is disabled (or not) is stored in a
11321	 * per-name bitfield that is stored as the node data of an
11322	 * RBT.
11323	 */
11324
11325	REQUIRE(VALID_RESOLVER(resolver));
11326	if (alg > 255) {
11327		return (ISC_R_RANGE);
11328	}
11329
11330	if (resolver->algorithms == NULL) {
11331		result = dns_rbt_create(resolver->mctx, free_algorithm,
11332					resolver->mctx, &resolver->algorithms);
11333		if (result != ISC_R_SUCCESS) {
11334			goto cleanup;
11335		}
11336	}
11337
11338	len = alg / 8 + 2;
11339	mask = 1 << (alg % 8);
11340
11341	result = dns_rbt_addnode(resolver->algorithms, name, &node);
11342
11343	if (result == ISC_R_SUCCESS || result == ISC_R_EXISTS) {
11344		algorithms = node->data;
11345		/*
11346		 * If algorithms is set, algorithms[0] contains its
11347		 * length.
11348		 */
11349		if (algorithms == NULL || len > *algorithms) {
11350			/*
11351			 * If no bitfield exists in the node data, or if
11352			 * it is not long enough, allocate a new
11353			 * bitfield and copy the old (smaller) bitfield
11354			 * into it if one exists.
11355			 */
11356			tmp = isc_mem_get(resolver->mctx, len);
11357			memset(tmp, 0, len);
11358			if (algorithms != NULL) {
11359				memmove(tmp, algorithms, *algorithms);
11360			}
11361			tmp[len - 1] |= mask;
11362			/* tmp[0] should contain the length of 'tmp'. */
11363			*tmp = len;
11364			node->data = tmp;
11365			/* Free the older bitfield. */
11366			if (algorithms != NULL) {
11367				isc_mem_put(resolver->mctx, algorithms,
11368					    *algorithms);
11369			}
11370		} else {
11371			algorithms[len - 1] |= mask;
11372		}
11373	}
11374	result = ISC_R_SUCCESS;
11375cleanup:
11376	return (result);
11377}
11378
11379bool
11380dns_resolver_algorithm_supported(dns_resolver_t *resolver,
11381				 const dns_name_t *name, unsigned int alg) {
11382	unsigned int len, mask;
11383	unsigned char *algorithms;
11384	void *data = NULL;
11385	isc_result_t result;
11386	bool found = false;
11387
11388	REQUIRE(VALID_RESOLVER(resolver));
11389
11390	/*
11391	 * DH is unsupported for DNSKEYs, see RFC 4034 sec. A.1.
11392	 */
11393	if ((alg == DST_ALG_DH) || (alg == DST_ALG_INDIRECT)) {
11394		return (false);
11395	}
11396
11397	if (resolver->algorithms == NULL) {
11398		goto unlock;
11399	}
11400	result = dns_rbt_findname(resolver->algorithms, name, 0, NULL, &data);
11401	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
11402		len = alg / 8 + 2;
11403		mask = 1 << (alg % 8);
11404		algorithms = data;
11405		if (len <= *algorithms && (algorithms[len - 1] & mask) != 0) {
11406			found = true;
11407		}
11408	}
11409unlock:
11410	if (found) {
11411		return (false);
11412	}
11413
11414	return (dst_algorithm_supported(alg));
11415}
11416
11417static void
11418free_digest(void *node, void *arg) {
11419	unsigned char *digests = node;
11420	isc_mem_t *mctx = arg;
11421
11422	isc_mem_put(mctx, digests, *digests);
11423}
11424
11425void
11426dns_resolver_reset_ds_digests(dns_resolver_t *resolver) {
11427	REQUIRE(VALID_RESOLVER(resolver));
11428
11429	if (resolver->digests != NULL) {
11430		dns_rbt_destroy(&resolver->digests);
11431	}
11432}
11433
11434isc_result_t
11435dns_resolver_disable_ds_digest(dns_resolver_t *resolver, const dns_name_t *name,
11436			       unsigned int digest_type) {
11437	unsigned int len, mask;
11438	unsigned char *tmp;
11439	unsigned char *digests;
11440	isc_result_t result;
11441	dns_rbtnode_t *node = NULL;
11442
11443	/*
11444	 * Whether a digest is disabled (or not) is stored in a per-name
11445	 * bitfield that is stored as the node data of an RBT.
11446	 */
11447
11448	REQUIRE(VALID_RESOLVER(resolver));
11449	if (digest_type > 255) {
11450		return (ISC_R_RANGE);
11451	}
11452
11453	if (resolver->digests == NULL) {
11454		result = dns_rbt_create(resolver->mctx, free_digest,
11455					resolver->mctx, &resolver->digests);
11456		if (result != ISC_R_SUCCESS) {
11457			goto cleanup;
11458		}
11459	}
11460
11461	len = digest_type / 8 + 2;
11462	mask = 1 << (digest_type % 8);
11463
11464	result = dns_rbt_addnode(resolver->digests, name, &node);
11465
11466	if (result == ISC_R_SUCCESS || result == ISC_R_EXISTS) {
11467		digests = node->data;
11468		/* If digests is set, digests[0] contains its length. */
11469		if (digests == NULL || len > *digests) {
11470			/*
11471			 * If no bitfield exists in the node data, or if
11472			 * it is not long enough, allocate a new
11473			 * bitfield and copy the old (smaller) bitfield
11474			 * into it if one exists.
11475			 */
11476			tmp = isc_mem_get(resolver->mctx, len);
11477			memset(tmp, 0, len);
11478			if (digests != NULL) {
11479				memmove(tmp, digests, *digests);
11480			}
11481			tmp[len - 1] |= mask;
11482			/* tmp[0] should contain the length of 'tmp'. */
11483			*tmp = len;
11484			node->data = tmp;
11485			/* Free the older bitfield. */
11486			if (digests != NULL) {
11487				isc_mem_put(resolver->mctx, digests, *digests);
11488			}
11489		} else {
11490			digests[len - 1] |= mask;
11491		}
11492	}
11493	result = ISC_R_SUCCESS;
11494cleanup:
11495	return (result);
11496}
11497
11498bool
11499dns_resolver_ds_digest_supported(dns_resolver_t *resolver,
11500				 const dns_name_t *name,
11501				 unsigned int digest_type) {
11502	unsigned int len, mask;
11503	unsigned char *digests;
11504	void *data = NULL;
11505	isc_result_t result;
11506	bool found = false;
11507
11508	REQUIRE(VALID_RESOLVER(resolver));
11509
11510	if (resolver->digests == NULL) {
11511		goto unlock;
11512	}
11513	result = dns_rbt_findname(resolver->digests, name, 0, NULL, &data);
11514	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
11515		len = digest_type / 8 + 2;
11516		mask = 1 << (digest_type % 8);
11517		digests = data;
11518		if (len <= *digests && (digests[len - 1] & mask) != 0) {
11519			found = true;
11520		}
11521	}
11522unlock:
11523	if (found) {
11524		return (false);
11525	}
11526	return (dst_ds_digest_supported(digest_type));
11527}
11528
11529void
11530dns_resolver_resetmustbesecure(dns_resolver_t *resolver) {
11531	REQUIRE(VALID_RESOLVER(resolver));
11532
11533	if (resolver->mustbesecure != NULL) {
11534		dns_rbt_destroy(&resolver->mustbesecure);
11535	}
11536}
11537
11538static bool yes = true, no = false;
11539
11540isc_result_t
11541dns_resolver_setmustbesecure(dns_resolver_t *resolver, const dns_name_t *name,
11542			     bool value) {
11543	isc_result_t result;
11544
11545	REQUIRE(VALID_RESOLVER(resolver));
11546
11547	if (resolver->mustbesecure == NULL) {
11548		result = dns_rbt_create(resolver->mctx, NULL, NULL,
11549					&resolver->mustbesecure);
11550		if (result != ISC_R_SUCCESS) {
11551			goto cleanup;
11552		}
11553	}
11554	result = dns_rbt_addname(resolver->mustbesecure, name,
11555				 value ? &yes : &no);
11556cleanup:
11557	return (result);
11558}
11559
11560bool
11561dns_resolver_getmustbesecure(dns_resolver_t *resolver, const dns_name_t *name) {
11562	void *data = NULL;
11563	bool value = false;
11564	isc_result_t result;
11565
11566	REQUIRE(VALID_RESOLVER(resolver));
11567
11568	if (resolver->mustbesecure == NULL) {
11569		goto unlock;
11570	}
11571	result = dns_rbt_findname(resolver->mustbesecure, name, 0, NULL, &data);
11572	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
11573		value = *(bool *)data;
11574	}
11575unlock:
11576	return (value);
11577}
11578
11579void
11580dns_resolver_getclientsperquery(dns_resolver_t *resolver, uint32_t *cur,
11581				uint32_t *min, uint32_t *max) {
11582	REQUIRE(VALID_RESOLVER(resolver));
11583
11584	LOCK(&resolver->lock);
11585	if (cur != NULL) {
11586		*cur = resolver->spillat;
11587	}
11588	if (min != NULL) {
11589		*min = resolver->spillatmin;
11590	}
11591	if (max != NULL) {
11592		*max = resolver->spillatmax;
11593	}
11594	UNLOCK(&resolver->lock);
11595}
11596
11597void
11598dns_resolver_setclientsperquery(dns_resolver_t *resolver, uint32_t min,
11599				uint32_t max) {
11600	REQUIRE(VALID_RESOLVER(resolver));
11601
11602	LOCK(&resolver->lock);
11603	resolver->spillatmin = resolver->spillat = min;
11604	resolver->spillatmax = max;
11605	UNLOCK(&resolver->lock);
11606}
11607
11608void
11609dns_resolver_setfetchesperzone(dns_resolver_t *resolver, uint32_t clients) {
11610	REQUIRE(VALID_RESOLVER(resolver));
11611
11612	atomic_store_release(&resolver->zspill, clients);
11613}
11614
11615bool
11616dns_resolver_getzeronosoattl(dns_resolver_t *resolver) {
11617	REQUIRE(VALID_RESOLVER(resolver));
11618
11619	return (resolver->zero_no_soa_ttl);
11620}
11621
11622void
11623dns_resolver_setzeronosoattl(dns_resolver_t *resolver, bool state) {
11624	REQUIRE(VALID_RESOLVER(resolver));
11625
11626	resolver->zero_no_soa_ttl = state;
11627}
11628
11629unsigned int
11630dns_resolver_getoptions(dns_resolver_t *resolver) {
11631	REQUIRE(VALID_RESOLVER(resolver));
11632
11633	return (resolver->options);
11634}
11635
11636unsigned int
11637dns_resolver_gettimeout(dns_resolver_t *resolver) {
11638	REQUIRE(VALID_RESOLVER(resolver));
11639
11640	return (resolver->query_timeout);
11641}
11642
11643void
11644dns_resolver_settimeout(dns_resolver_t *resolver, unsigned int timeout) {
11645	REQUIRE(VALID_RESOLVER(resolver));
11646
11647	if (timeout <= 300) {
11648		timeout *= 1000;
11649	}
11650
11651	if (timeout == 0) {
11652		timeout = DEFAULT_QUERY_TIMEOUT;
11653	}
11654	if (timeout > MAXIMUM_QUERY_TIMEOUT) {
11655		timeout = MAXIMUM_QUERY_TIMEOUT;
11656	}
11657	if (timeout < MINIMUM_QUERY_TIMEOUT) {
11658		timeout = MINIMUM_QUERY_TIMEOUT;
11659	}
11660
11661	resolver->query_timeout = timeout;
11662}
11663
11664void
11665dns_resolver_setmaxdepth(dns_resolver_t *resolver, unsigned int maxdepth) {
11666	REQUIRE(VALID_RESOLVER(resolver));
11667	resolver->maxdepth = maxdepth;
11668}
11669
11670unsigned int
11671dns_resolver_getmaxdepth(dns_resolver_t *resolver) {
11672	REQUIRE(VALID_RESOLVER(resolver));
11673	return (resolver->maxdepth);
11674}
11675
11676void
11677dns_resolver_setmaxqueries(dns_resolver_t *resolver, unsigned int queries) {
11678	REQUIRE(VALID_RESOLVER(resolver));
11679	resolver->maxqueries = queries;
11680}
11681
11682unsigned int
11683dns_resolver_getmaxqueries(dns_resolver_t *resolver) {
11684	REQUIRE(VALID_RESOLVER(resolver));
11685	return (resolver->maxqueries);
11686}
11687
11688void
11689dns_resolver_dumpfetches(dns_resolver_t *resolver, isc_statsformat_t format,
11690			 FILE *fp) {
11691	REQUIRE(VALID_RESOLVER(resolver));
11692	REQUIRE(fp != NULL);
11693	REQUIRE(format == isc_statsformat_file);
11694
11695	for (size_t i = 0; i < HASHSIZE(resolver->dhashbits); i++) {
11696		fctxcount_t *fc;
11697		LOCK(&resolver->dbuckets[i].lock);
11698		for (fc = ISC_LIST_HEAD(resolver->dbuckets[i].list); fc != NULL;
11699		     fc = ISC_LIST_NEXT(fc, link))
11700		{
11701			dns_name_print(fc->domain, fp);
11702			fprintf(fp,
11703				": %u active (%u spilled, %u "
11704				"allowed)\n",
11705				fc->count, fc->dropped, fc->allowed);
11706		}
11707		UNLOCK(&resolver->dbuckets[i].lock);
11708	}
11709}
11710
11711void
11712dns_resolver_setquotaresponse(dns_resolver_t *resolver, dns_quotatype_t which,
11713			      isc_result_t resp) {
11714	REQUIRE(VALID_RESOLVER(resolver));
11715	REQUIRE(which == dns_quotatype_zone || which == dns_quotatype_server);
11716	REQUIRE(resp == DNS_R_DROP || resp == DNS_R_SERVFAIL);
11717
11718	resolver->quotaresp[which] = resp;
11719}
11720
11721isc_result_t
11722dns_resolver_getquotaresponse(dns_resolver_t *resolver, dns_quotatype_t which) {
11723	REQUIRE(VALID_RESOLVER(resolver));
11724	REQUIRE(which == dns_quotatype_zone || which == dns_quotatype_server);
11725
11726	return (resolver->quotaresp[which]);
11727}
11728
11729unsigned int
11730dns_resolver_getretryinterval(dns_resolver_t *resolver) {
11731	REQUIRE(VALID_RESOLVER(resolver));
11732
11733	return (resolver->retryinterval);
11734}
11735
11736void
11737dns_resolver_setretryinterval(dns_resolver_t *resolver, unsigned int interval) {
11738	REQUIRE(VALID_RESOLVER(resolver));
11739	REQUIRE(interval > 0);
11740
11741	resolver->retryinterval = ISC_MIN(interval, 2000);
11742}
11743
11744unsigned int
11745dns_resolver_getnonbackofftries(dns_resolver_t *resolver) {
11746	REQUIRE(VALID_RESOLVER(resolver));
11747
11748	return (resolver->nonbackofftries);
11749}
11750
11751void
11752dns_resolver_setnonbackofftries(dns_resolver_t *resolver, unsigned int tries) {
11753	REQUIRE(VALID_RESOLVER(resolver));
11754	REQUIRE(tries > 0);
11755
11756	resolver->nonbackofftries = tries;
11757}
11758