1/*	$NetBSD: xfrout.c,v 1.14 2024/02/21 22:52:46 christos Exp $	*/
2
3/*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * SPDX-License-Identifier: MPL-2.0
7 *
8 * This Source Code Form is subject to the terms of the Mozilla Public
9 * License, v. 2.0. If a copy of the MPL was not distributed with this
10 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11 *
12 * See the COPYRIGHT file distributed with this work for additional
13 * information regarding copyright ownership.
14 */
15
16#include <inttypes.h>
17#include <stdbool.h>
18
19#include <isc/formatcheck.h>
20#include <isc/mem.h>
21#include <isc/netmgr.h>
22#include <isc/print.h>
23#include <isc/result.h>
24#include <isc/stats.h>
25#include <isc/util.h>
26
27#include <dns/db.h>
28#include <dns/dbiterator.h>
29#include <dns/dlz.h>
30#include <dns/fixedname.h>
31#include <dns/journal.h>
32#include <dns/message.h>
33#include <dns/peer.h>
34#include <dns/rdataclass.h>
35#include <dns/rdatalist.h>
36#include <dns/rdataset.h>
37#include <dns/rdatasetiter.h>
38#include <dns/rriterator.h>
39#include <dns/soa.h>
40#include <dns/stats.h>
41#include <dns/tsig.h>
42#include <dns/view.h>
43#include <dns/zone.h>
44#include <dns/zt.h>
45
46#include <ns/client.h>
47#include <ns/log.h>
48#include <ns/server.h>
49#include <ns/stats.h>
50#include <ns/xfrout.h>
51
52#include <ns/pfilter.h>
53
54/*! \file
55 * \brief
56 * Outgoing AXFR and IXFR.
57 */
58
59/*
60 * TODO:
61 *  - IXFR over UDP
62 */
63
64#define XFROUT_COMMON_LOGARGS \
65	ns_lctx, DNS_LOGCATEGORY_XFER_OUT, NS_LOGMODULE_XFER_OUT
66
67#define XFROUT_PROTOCOL_LOGARGS XFROUT_COMMON_LOGARGS, ISC_LOG_INFO
68
69#define XFROUT_DEBUG_LOGARGS(n) XFROUT_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
70
71#define XFROUT_RR_LOGARGS XFROUT_COMMON_LOGARGS, XFROUT_RR_LOGLEVEL
72
73#define XFROUT_RR_LOGLEVEL ISC_LOG_DEBUG(8)
74
75/*%
76 * Fail unconditionally and log as a client error.
77 * The test against ISC_R_SUCCESS is there to keep the Solaris compiler
78 * from complaining about "end-of-loop code not reached".
79 */
80#define FAILC(code, msg)                                                 \
81	do {                                                             \
82		result = (code);                                         \
83		ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT,          \
84			      NS_LOGMODULE_XFER_OUT, ISC_LOG_INFO,       \
85			      "bad zone transfer request: %s (%s)", msg, \
86			      isc_result_totext(code));                  \
87		if (result != ISC_R_SUCCESS)                             \
88			goto failure;                                    \
89	} while (0)
90
91#define FAILQ(code, msg, question, rdclass)                                  \
92	do {                                                                 \
93		char _buf1[DNS_NAME_FORMATSIZE];                             \
94		char _buf2[DNS_RDATACLASS_FORMATSIZE];                       \
95		result = (code);                                             \
96		dns_name_format(question, _buf1, sizeof(_buf1));             \
97		dns_rdataclass_format(rdclass, _buf2, sizeof(_buf2));        \
98		ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT,              \
99			      NS_LOGMODULE_XFER_OUT, ISC_LOG_INFO,           \
100			      "bad zone transfer request: '%s/%s': %s (%s)", \
101			      _buf1, _buf2, msg, isc_result_totext(code));   \
102		if (result != ISC_R_SUCCESS)                                 \
103			goto failure;                                        \
104	} while (0)
105
106#define CHECK(op)                            \
107	do {                                 \
108		result = (op);               \
109		if (result != ISC_R_SUCCESS) \
110			goto failure;        \
111	} while (0)
112
113/**************************************************************************/
114
115static void
116inc_stats(ns_client_t *client, dns_zone_t *zone, isc_statscounter_t counter) {
117	ns_stats_increment(client->sctx->nsstats, counter);
118	if (zone != NULL) {
119		isc_stats_t *zonestats = dns_zone_getrequeststats(zone);
120		if (zonestats != NULL) {
121			isc_stats_increment(zonestats, counter);
122		}
123	}
124}
125
126/**************************************************************************/
127
128/*% Log an RR (for debugging) */
129
130static void
131log_rr(dns_name_t *name, dns_rdata_t *rdata, uint32_t ttl) {
132	isc_result_t result;
133	isc_buffer_t buf;
134	char mem[2000];
135	dns_rdatalist_t rdl;
136	dns_rdataset_t rds;
137	dns_rdata_t rd = DNS_RDATA_INIT;
138
139	dns_rdatalist_init(&rdl);
140	rdl.type = rdata->type;
141	rdl.rdclass = rdata->rdclass;
142	rdl.ttl = ttl;
143	if (rdata->type == dns_rdatatype_sig ||
144	    rdata->type == dns_rdatatype_rrsig)
145	{
146		rdl.covers = dns_rdata_covers(rdata);
147	} else {
148		rdl.covers = dns_rdatatype_none;
149	}
150	dns_rdataset_init(&rds);
151	dns_rdata_init(&rd);
152	dns_rdata_clone(rdata, &rd);
153	ISC_LIST_APPEND(rdl.rdata, &rd, link);
154	RUNTIME_CHECK(dns_rdatalist_tordataset(&rdl, &rds) == ISC_R_SUCCESS);
155
156	isc_buffer_init(&buf, mem, sizeof(mem));
157	result = dns_rdataset_totext(&rds, name, false, false, &buf);
158
159	/*
160	 * We could use xfrout_log(), but that would produce
161	 * very long lines with a repetitive prefix.
162	 */
163	if (result == ISC_R_SUCCESS) {
164		/*
165		 * Get rid of final newline.
166		 */
167		INSIST(buf.used >= 1 &&
168		       ((char *)buf.base)[buf.used - 1] == '\n');
169		buf.used--;
170
171		isc_log_write(XFROUT_RR_LOGARGS, "%.*s",
172			      (int)isc_buffer_usedlength(&buf),
173			      (char *)isc_buffer_base(&buf));
174	} else {
175		isc_log_write(XFROUT_RR_LOGARGS, "<RR too large to print>");
176	}
177}
178
179/**************************************************************************/
180/*
181 * An 'rrstream_t' is a polymorphic iterator that returns
182 * a stream of resource records.  There are multiple implementations,
183 * e.g. for generating AXFR and IXFR records streams.
184 */
185
186typedef struct rrstream_methods rrstream_methods_t;
187
188typedef struct rrstream {
189	isc_mem_t *mctx;
190	rrstream_methods_t *methods;
191} rrstream_t;
192
193struct rrstream_methods {
194	isc_result_t (*first)(rrstream_t *);
195	isc_result_t (*next)(rrstream_t *);
196	void (*current)(rrstream_t *, dns_name_t **, uint32_t *,
197			dns_rdata_t **);
198	void (*pause)(rrstream_t *);
199	void (*destroy)(rrstream_t **);
200};
201
202static void
203rrstream_noop_pause(rrstream_t *rs) {
204	UNUSED(rs);
205}
206
207/**************************************************************************/
208/*
209 * An 'ixfr_rrstream_t' is an 'rrstream_t' that returns
210 * an IXFR-like RR stream from a journal file.
211 *
212 * The SOA at the beginning of each sequence of additions
213 * or deletions are included in the stream, but the extra
214 * SOAs at the beginning and end of the entire transfer are
215 * not included.
216 */
217
218typedef struct ixfr_rrstream {
219	rrstream_t common;
220	dns_journal_t *journal;
221} ixfr_rrstream_t;
222
223/* Forward declarations. */
224static void
225ixfr_rrstream_destroy(rrstream_t **sp);
226
227static rrstream_methods_t ixfr_rrstream_methods;
228
229/*
230 * Returns: anything dns_journal_open() or dns_journal_iter_init()
231 * may return.
232 */
233
234static isc_result_t
235ixfr_rrstream_create(isc_mem_t *mctx, const char *journal_filename,
236		     uint32_t begin_serial, uint32_t end_serial, size_t *sizep,
237		     rrstream_t **sp) {
238	isc_result_t result;
239	ixfr_rrstream_t *s = NULL;
240
241	INSIST(sp != NULL && *sp == NULL);
242
243	s = isc_mem_get(mctx, sizeof(*s));
244	s->common.mctx = NULL;
245	isc_mem_attach(mctx, &s->common.mctx);
246	s->common.methods = &ixfr_rrstream_methods;
247	s->journal = NULL;
248
249	CHECK(dns_journal_open(mctx, journal_filename, DNS_JOURNAL_READ,
250			       &s->journal));
251	CHECK(dns_journal_iter_init(s->journal, begin_serial, end_serial,
252				    sizep));
253
254	*sp = (rrstream_t *)s;
255	return (ISC_R_SUCCESS);
256
257failure:
258	ixfr_rrstream_destroy((rrstream_t **)(void *)&s);
259	return (result);
260}
261
262static isc_result_t
263ixfr_rrstream_first(rrstream_t *rs) {
264	ixfr_rrstream_t *s = (ixfr_rrstream_t *)rs;
265	return (dns_journal_first_rr(s->journal));
266}
267
268static isc_result_t
269ixfr_rrstream_next(rrstream_t *rs) {
270	ixfr_rrstream_t *s = (ixfr_rrstream_t *)rs;
271	return (dns_journal_next_rr(s->journal));
272}
273
274static void
275ixfr_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
276		      dns_rdata_t **rdata) {
277	ixfr_rrstream_t *s = (ixfr_rrstream_t *)rs;
278	dns_journal_current_rr(s->journal, name, ttl, rdata);
279}
280
281static void
282ixfr_rrstream_destroy(rrstream_t **rsp) {
283	ixfr_rrstream_t *s = (ixfr_rrstream_t *)*rsp;
284	if (s->journal != NULL) {
285		dns_journal_destroy(&s->journal);
286	}
287	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
288}
289
290static rrstream_methods_t ixfr_rrstream_methods = {
291	ixfr_rrstream_first, ixfr_rrstream_next, ixfr_rrstream_current,
292	rrstream_noop_pause, ixfr_rrstream_destroy
293};
294
295/**************************************************************************/
296/*
297 * An 'axfr_rrstream_t' is an 'rrstream_t' that returns
298 * an AXFR-like RR stream from a database.
299 *
300 * The SOAs at the beginning and end of the transfer are
301 * not included in the stream.
302 */
303
304typedef struct axfr_rrstream {
305	rrstream_t common;
306	dns_rriterator_t it;
307	bool it_valid;
308} axfr_rrstream_t;
309
310/*
311 * Forward declarations.
312 */
313static void
314axfr_rrstream_destroy(rrstream_t **rsp);
315
316static rrstream_methods_t axfr_rrstream_methods;
317
318static isc_result_t
319axfr_rrstream_create(isc_mem_t *mctx, dns_db_t *db, dns_dbversion_t *ver,
320		     rrstream_t **sp) {
321	axfr_rrstream_t *s;
322	isc_result_t result;
323
324	INSIST(sp != NULL && *sp == NULL);
325
326	s = isc_mem_get(mctx, sizeof(*s));
327	s->common.mctx = NULL;
328	isc_mem_attach(mctx, &s->common.mctx);
329	s->common.methods = &axfr_rrstream_methods;
330	s->it_valid = false;
331
332	CHECK(dns_rriterator_init(&s->it, db, ver, 0));
333	s->it_valid = true;
334
335	*sp = (rrstream_t *)s;
336	return (ISC_R_SUCCESS);
337
338failure:
339	axfr_rrstream_destroy((rrstream_t **)(void *)&s);
340	return (result);
341}
342
343static isc_result_t
344axfr_rrstream_first(rrstream_t *rs) {
345	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
346	isc_result_t result;
347	result = dns_rriterator_first(&s->it);
348	if (result != ISC_R_SUCCESS) {
349		return (result);
350	}
351	/* Skip SOA records. */
352	for (;;) {
353		dns_name_t *name_dummy = NULL;
354		uint32_t ttl_dummy;
355		dns_rdata_t *rdata = NULL;
356		dns_rriterator_current(&s->it, &name_dummy, &ttl_dummy, NULL,
357				       &rdata);
358		if (rdata->type != dns_rdatatype_soa) {
359			break;
360		}
361		result = dns_rriterator_next(&s->it);
362		if (result != ISC_R_SUCCESS) {
363			break;
364		}
365	}
366	return (result);
367}
368
369static isc_result_t
370axfr_rrstream_next(rrstream_t *rs) {
371	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
372	isc_result_t result;
373
374	/* Skip SOA records. */
375	for (;;) {
376		dns_name_t *name_dummy = NULL;
377		uint32_t ttl_dummy;
378		dns_rdata_t *rdata = NULL;
379		result = dns_rriterator_next(&s->it);
380		if (result != ISC_R_SUCCESS) {
381			break;
382		}
383		dns_rriterator_current(&s->it, &name_dummy, &ttl_dummy, NULL,
384				       &rdata);
385		if (rdata->type != dns_rdatatype_soa) {
386			break;
387		}
388	}
389	return (result);
390}
391
392static void
393axfr_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
394		      dns_rdata_t **rdata) {
395	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
396	dns_rriterator_current(&s->it, name, ttl, NULL, rdata);
397}
398
399static void
400axfr_rrstream_pause(rrstream_t *rs) {
401	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
402	dns_rriterator_pause(&s->it);
403}
404
405static void
406axfr_rrstream_destroy(rrstream_t **rsp) {
407	axfr_rrstream_t *s = (axfr_rrstream_t *)*rsp;
408	if (s->it_valid) {
409		dns_rriterator_destroy(&s->it);
410	}
411	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
412}
413
414static rrstream_methods_t axfr_rrstream_methods = {
415	axfr_rrstream_first, axfr_rrstream_next, axfr_rrstream_current,
416	axfr_rrstream_pause, axfr_rrstream_destroy
417};
418
419/**************************************************************************/
420/*
421 * An 'soa_rrstream_t' is a degenerate 'rrstream_t' that returns
422 * a single SOA record.
423 */
424
425typedef struct soa_rrstream {
426	rrstream_t common;
427	dns_difftuple_t *soa_tuple;
428} soa_rrstream_t;
429
430/*
431 * Forward declarations.
432 */
433static void
434soa_rrstream_destroy(rrstream_t **rsp);
435
436static rrstream_methods_t soa_rrstream_methods;
437
438static isc_result_t
439soa_rrstream_create(isc_mem_t *mctx, dns_db_t *db, dns_dbversion_t *ver,
440		    rrstream_t **sp) {
441	soa_rrstream_t *s;
442	isc_result_t result;
443
444	INSIST(sp != NULL && *sp == NULL);
445
446	s = isc_mem_get(mctx, sizeof(*s));
447	s->common.mctx = NULL;
448	isc_mem_attach(mctx, &s->common.mctx);
449	s->common.methods = &soa_rrstream_methods;
450	s->soa_tuple = NULL;
451
452	CHECK(dns_db_createsoatuple(db, ver, mctx, DNS_DIFFOP_EXISTS,
453				    &s->soa_tuple));
454
455	*sp = (rrstream_t *)s;
456	return (ISC_R_SUCCESS);
457
458failure:
459	soa_rrstream_destroy((rrstream_t **)(void *)&s);
460	return (result);
461}
462
463static isc_result_t
464soa_rrstream_first(rrstream_t *rs) {
465	UNUSED(rs);
466	return (ISC_R_SUCCESS);
467}
468
469static isc_result_t
470soa_rrstream_next(rrstream_t *rs) {
471	UNUSED(rs);
472	return (ISC_R_NOMORE);
473}
474
475static void
476soa_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
477		     dns_rdata_t **rdata) {
478	soa_rrstream_t *s = (soa_rrstream_t *)rs;
479	*name = &s->soa_tuple->name;
480	*ttl = s->soa_tuple->ttl;
481	*rdata = &s->soa_tuple->rdata;
482}
483
484static void
485soa_rrstream_destroy(rrstream_t **rsp) {
486	soa_rrstream_t *s = (soa_rrstream_t *)*rsp;
487	if (s->soa_tuple != NULL) {
488		dns_difftuple_free(&s->soa_tuple);
489	}
490	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
491}
492
493static rrstream_methods_t soa_rrstream_methods = {
494	soa_rrstream_first, soa_rrstream_next, soa_rrstream_current,
495	rrstream_noop_pause, soa_rrstream_destroy
496};
497
498/**************************************************************************/
499/*
500 * A 'compound_rrstream_t' objects owns a soa_rrstream
501 * and another rrstream, the "data stream".  It returns
502 * a concatenated stream consisting of the soa_rrstream, then
503 * the data stream, then the soa_rrstream again.
504 *
505 * The component streams are owned by the compound_rrstream_t
506 * and are destroyed with it.
507 */
508
509typedef struct compound_rrstream {
510	rrstream_t common;
511	rrstream_t *components[3];
512	int state;
513	isc_result_t result;
514} compound_rrstream_t;
515
516/*
517 * Forward declarations.
518 */
519static void
520compound_rrstream_destroy(rrstream_t **rsp);
521
522static isc_result_t
523compound_rrstream_next(rrstream_t *rs);
524
525static rrstream_methods_t compound_rrstream_methods;
526
527/*
528 * Requires:
529 *	soa_stream != NULL && *soa_stream != NULL
530 *	data_stream != NULL && *data_stream != NULL
531 *	sp != NULL && *sp == NULL
532 *
533 * Ensures:
534 *	*soa_stream == NULL
535 *	*data_stream == NULL
536 *	*sp points to a valid compound_rrstream_t
537 *	The soa and data streams will be destroyed
538 *	when the compound_rrstream_t is destroyed.
539 */
540static isc_result_t
541compound_rrstream_create(isc_mem_t *mctx, rrstream_t **soa_stream,
542			 rrstream_t **data_stream, rrstream_t **sp) {
543	compound_rrstream_t *s;
544
545	INSIST(sp != NULL && *sp == NULL);
546
547	s = isc_mem_get(mctx, sizeof(*s));
548	s->common.mctx = NULL;
549	isc_mem_attach(mctx, &s->common.mctx);
550	s->common.methods = &compound_rrstream_methods;
551	s->components[0] = *soa_stream;
552	s->components[1] = *data_stream;
553	s->components[2] = *soa_stream;
554	s->state = -1;
555	s->result = ISC_R_FAILURE;
556
557	*data_stream = NULL;
558	*soa_stream = NULL;
559	*sp = (rrstream_t *)s;
560	return (ISC_R_SUCCESS);
561}
562
563static isc_result_t
564compound_rrstream_first(rrstream_t *rs) {
565	compound_rrstream_t *s = (compound_rrstream_t *)rs;
566	s->state = 0;
567	do {
568		rrstream_t *curstream = s->components[s->state];
569		s->result = curstream->methods->first(curstream);
570	} while (s->result == ISC_R_NOMORE && s->state < 2);
571	return (s->result);
572}
573
574static isc_result_t
575compound_rrstream_next(rrstream_t *rs) {
576	compound_rrstream_t *s = (compound_rrstream_t *)rs;
577	rrstream_t *curstream = s->components[s->state];
578	s->result = curstream->methods->next(curstream);
579	while (s->result == ISC_R_NOMORE) {
580		/*
581		 * Make sure locks held by the current stream
582		 * are released before we switch streams.
583		 */
584		curstream->methods->pause(curstream);
585		if (s->state == 2) {
586			return (ISC_R_NOMORE);
587		}
588		s->state++;
589		curstream = s->components[s->state];
590		s->result = curstream->methods->first(curstream);
591	}
592	return (s->result);
593}
594
595static void
596compound_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
597			  dns_rdata_t **rdata) {
598	compound_rrstream_t *s = (compound_rrstream_t *)rs;
599	rrstream_t *curstream;
600	INSIST(0 <= s->state && s->state < 3);
601	INSIST(s->result == ISC_R_SUCCESS);
602	curstream = s->components[s->state];
603	curstream->methods->current(curstream, name, ttl, rdata);
604}
605
606static void
607compound_rrstream_pause(rrstream_t *rs) {
608	compound_rrstream_t *s = (compound_rrstream_t *)rs;
609	rrstream_t *curstream;
610	INSIST(0 <= s->state && s->state < 3);
611	curstream = s->components[s->state];
612	curstream->methods->pause(curstream);
613}
614
615static void
616compound_rrstream_destroy(rrstream_t **rsp) {
617	compound_rrstream_t *s = (compound_rrstream_t *)*rsp;
618	s->components[0]->methods->destroy(&s->components[0]);
619	s->components[1]->methods->destroy(&s->components[1]);
620	s->components[2] = NULL; /* Copy of components[0]. */
621	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
622}
623
624static rrstream_methods_t compound_rrstream_methods = {
625	compound_rrstream_first, compound_rrstream_next,
626	compound_rrstream_current, compound_rrstream_pause,
627	compound_rrstream_destroy
628};
629
630/**************************************************************************/
631
632/*%
633 * Structure holding outgoing transfer statistics
634 */
635struct xfr_stats {
636	uint64_t nmsg;	  /*%< Number of messages sent */
637	uint64_t nrecs;	  /*%< Number of records sent */
638	uint64_t nbytes;  /*%< Number of bytes sent */
639	isc_time_t start; /*%< Start time of the transfer */
640	isc_time_t end;	  /*%< End time of the transfer */
641};
642
643/*%
644 * An 'xfrout_ctx_t' contains the state of an outgoing AXFR or IXFR
645 * in progress.
646 */
647typedef struct {
648	isc_mem_t *mctx;
649	ns_client_t *client;
650	unsigned int id;       /* ID of request */
651	dns_name_t *qname;     /* Question name of request */
652	dns_rdatatype_t qtype; /* dns_rdatatype_{a,i}xfr */
653	dns_rdataclass_t qclass;
654	dns_zone_t *zone; /* (necessary for stats) */
655	dns_db_t *db;
656	dns_dbversion_t *ver;
657	isc_quota_t *quota;
658	rrstream_t *stream;  /* The XFR RR stream */
659	bool question_added; /* QUESTION section sent? */
660	bool end_of_stream;  /* EOS has been reached */
661	isc_buffer_t buf;    /* Buffer for message owner
662			      * names and rdatas */
663	isc_buffer_t txbuf;  /* Transmit message buffer */
664	size_t cbytes;	     /* Length of current message */
665	void *txmem;
666	unsigned int txmemlen;
667	dns_tsigkey_t *tsigkey; /* Key used to create TSIG */
668	isc_buffer_t *lasttsig; /* the last TSIG */
669	bool verified_tsig;	/* verified request MAC */
670	bool many_answers;
671	int sends; /* Send in progress */
672	bool shuttingdown;
673	bool poll;
674	const char *mnemonic;	/* Style of transfer */
675	uint32_t end_serial;	/* Serial number after XFR is done */
676	struct xfr_stats stats; /*%< Transfer statistics */
677
678	/* Timeouts */
679	uint64_t maxtime; /*%< Maximum XFR timeout (in ms) */
680	isc_nm_timer_t *maxtime_timer;
681
682	uint64_t idletime; /*%< XFR idle timeout (in ms) */
683} xfrout_ctx_t;
684
685static void
686xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id,
687		  dns_name_t *qname, dns_rdatatype_t qtype,
688		  dns_rdataclass_t qclass, dns_zone_t *zone, dns_db_t *db,
689		  dns_dbversion_t *ver, isc_quota_t *quota, rrstream_t *stream,
690		  dns_tsigkey_t *tsigkey, isc_buffer_t *lasttsig,
691		  bool verified_tsig, unsigned int maxtime,
692		  unsigned int idletime, bool many_answers,
693		  xfrout_ctx_t **xfrp);
694
695static void
696sendstream(xfrout_ctx_t *xfr);
697
698static void
699xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg);
700
701static void
702xfrout_fail(xfrout_ctx_t *xfr, isc_result_t result, const char *msg);
703
704static void
705xfrout_maybe_destroy(xfrout_ctx_t *xfr);
706
707static void
708xfrout_ctx_destroy(xfrout_ctx_t **xfrp);
709
710static void
711xfrout_client_timeout(void *arg, isc_result_t result);
712
713static void
714xfrout_log1(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
715	    int level, const char *fmt, ...) ISC_FORMAT_PRINTF(5, 6);
716
717static void
718xfrout_log(xfrout_ctx_t *xfr, int level, const char *fmt, ...)
719	ISC_FORMAT_PRINTF(3, 4);
720
721/**************************************************************************/
722
723void
724ns_xfr_start(ns_client_t *client, dns_rdatatype_t reqtype) {
725	isc_result_t result;
726	dns_name_t *question_name;
727	dns_rdataset_t *question_rdataset;
728	dns_zone_t *zone = NULL, *raw = NULL, *mayberaw;
729	dns_db_t *db = NULL;
730	dns_dbversion_t *ver = NULL;
731	dns_rdataclass_t question_class;
732	rrstream_t *soa_stream = NULL;
733	rrstream_t *data_stream = NULL;
734	rrstream_t *stream = NULL;
735	dns_difftuple_t *current_soa_tuple = NULL;
736	dns_name_t *soa_name;
737	dns_rdataset_t *soa_rdataset;
738	dns_rdata_t soa_rdata = DNS_RDATA_INIT;
739	bool have_soa = false;
740	const char *mnemonic = NULL;
741	isc_mem_t *mctx = client->mctx;
742	dns_message_t *request = client->message;
743	xfrout_ctx_t *xfr = NULL;
744	isc_quota_t *quota = NULL;
745	dns_transfer_format_t format = client->view->transfer_format;
746	isc_netaddr_t na;
747	dns_peer_t *peer = NULL;
748	isc_buffer_t *tsigbuf = NULL;
749	char *journalfile;
750	char msg[NS_CLIENT_ACLMSGSIZE("zone transfer")];
751	char keyname[DNS_NAME_FORMATSIZE];
752	bool is_poll = false;
753	bool is_dlz = false;
754	bool is_ixfr = false;
755	bool useviewacl = false;
756	uint32_t begin_serial = 0, current_serial;
757
758	switch (reqtype) {
759	case dns_rdatatype_axfr:
760		mnemonic = "AXFR";
761		break;
762	case dns_rdatatype_ixfr:
763		mnemonic = "IXFR";
764		break;
765	default:
766		UNREACHABLE();
767	}
768
769	ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT, NS_LOGMODULE_XFER_OUT,
770		      ISC_LOG_DEBUG(6), "%s request", mnemonic);
771	/*
772	 * Apply quota.
773	 */
774	result = isc_quota_attach(&client->sctx->xfroutquota, &quota);
775	if (result != ISC_R_SUCCESS) {
776		isc_log_write(XFROUT_COMMON_LOGARGS, ISC_LOG_WARNING,
777			      "%s request denied: %s", mnemonic,
778			      isc_result_totext(result));
779		goto failure;
780	}
781
782	/*
783	 * Interpret the question section.
784	 */
785	result = dns_message_firstname(request, DNS_SECTION_QUESTION);
786	INSIST(result == ISC_R_SUCCESS);
787
788	/*
789	 * The question section must contain exactly one question, and
790	 * it must be for AXFR/IXFR as appropriate.
791	 */
792	question_name = NULL;
793	dns_message_currentname(request, DNS_SECTION_QUESTION, &question_name);
794	question_rdataset = ISC_LIST_HEAD(question_name->list);
795	question_class = question_rdataset->rdclass;
796	INSIST(question_rdataset->type == reqtype);
797	if (ISC_LIST_NEXT(question_rdataset, link) != NULL) {
798		FAILC(DNS_R_FORMERR, "multiple questions");
799	}
800	result = dns_message_nextname(request, DNS_SECTION_QUESTION);
801	if (result != ISC_R_NOMORE) {
802		FAILC(DNS_R_FORMERR, "multiple questions");
803	}
804
805	result = dns_zt_find(client->view->zonetable, question_name, 0, NULL,
806			     &zone);
807
808	if (result != ISC_R_SUCCESS || dns_zone_gettype(zone) == dns_zone_dlz) {
809		/*
810		 * The normal zone table does not have a match, or this is
811		 * marked in the zone table as a DLZ zone. Check the DLZ
812		 * databases for a match.
813		 */
814		if (!ISC_LIST_EMPTY(client->view->dlz_searched)) {
815			result = dns_dlzallowzonexfr(client->view,
816						     question_name,
817						     &client->peeraddr, &db);
818			if (result == ISC_R_DEFAULT) {
819				useviewacl = true;
820				result = ISC_R_SUCCESS;
821			}
822			if (result == ISC_R_NOPERM) {
823				char _buf1[DNS_NAME_FORMATSIZE];
824				char _buf2[DNS_RDATACLASS_FORMATSIZE];
825
826				result = DNS_R_REFUSED;
827				dns_name_format(question_name, _buf1,
828						sizeof(_buf1));
829				dns_rdataclass_format(question_class, _buf2,
830						      sizeof(_buf2));
831				ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
832					      NS_LOGMODULE_XFER_OUT,
833					      ISC_LOG_ERROR,
834					      "zone transfer '%s/%s' denied",
835					      _buf1, _buf2);
836				pfilter_notify(result, client, "zonexfr");
837				goto failure;
838			}
839			if (result != ISC_R_SUCCESS) {
840				pfilter_notify(result, client, "zonexfr");
841				FAILQ(DNS_R_NOTAUTH, "non-authoritative zone",
842				      question_name, question_class);
843			}
844			is_dlz = true;
845		} else {
846			/*
847			 * not DLZ and not in normal zone table, we are
848			 * not authoritative
849			 */
850			FAILQ(DNS_R_NOTAUTH, "non-authoritative zone",
851			      question_name, question_class);
852		}
853	} else {
854		/* zone table has a match */
855		switch (dns_zone_gettype(zone)) {
856		/*
857		 * Primary, secondary, and mirror zones are OK for transfer.
858		 */
859		case dns_zone_primary:
860		case dns_zone_secondary:
861		case dns_zone_mirror:
862		case dns_zone_dlz:
863			break;
864		default:
865			FAILQ(DNS_R_NOTAUTH, "non-authoritative zone",
866			      question_name, question_class);
867		}
868		CHECK(dns_zone_getdb(zone, &db));
869		dns_db_currentversion(db, &ver);
870	}
871
872	xfrout_log1(client, question_name, question_class, ISC_LOG_DEBUG(6),
873		    "%s question section OK", mnemonic);
874
875	/*
876	 * Check the authority section.  Look for a SOA record with
877	 * the same name and class as the question.
878	 */
879	for (result = dns_message_firstname(request, DNS_SECTION_AUTHORITY);
880	     result == ISC_R_SUCCESS;
881	     result = dns_message_nextname(request, DNS_SECTION_AUTHORITY))
882	{
883		soa_name = NULL;
884		dns_message_currentname(request, DNS_SECTION_AUTHORITY,
885					&soa_name);
886
887		/*
888		 * Ignore data whose owner name is not the zone apex.
889		 */
890		if (!dns_name_equal(soa_name, question_name)) {
891			continue;
892		}
893
894		for (soa_rdataset = ISC_LIST_HEAD(soa_name->list);
895		     soa_rdataset != NULL;
896		     soa_rdataset = ISC_LIST_NEXT(soa_rdataset, link))
897		{
898			/*
899			 * Ignore non-SOA data.
900			 */
901			if (soa_rdataset->type != dns_rdatatype_soa) {
902				continue;
903			}
904			if (soa_rdataset->rdclass != question_class) {
905				continue;
906			}
907
908			CHECK(dns_rdataset_first(soa_rdataset));
909			dns_rdataset_current(soa_rdataset, &soa_rdata);
910			result = dns_rdataset_next(soa_rdataset);
911			if (result == ISC_R_SUCCESS) {
912				FAILC(DNS_R_FORMERR, "IXFR authority section "
913						     "has multiple SOAs");
914			}
915			have_soa = true;
916			goto got_soa;
917		}
918	}
919got_soa:
920	if (result != ISC_R_NOMORE) {
921		CHECK(result);
922	}
923
924	xfrout_log1(client, question_name, question_class, ISC_LOG_DEBUG(6),
925		    "%s authority section OK", mnemonic);
926
927	/*
928	 * If not a DLZ zone or we are falling back to the view's transfer
929	 * ACL, decide whether to allow this transfer.
930	 */
931	if (!is_dlz || useviewacl) {
932		dns_acl_t *acl;
933
934		ns_client_aclmsg("zone transfer", question_name, reqtype,
935				 client->view->rdclass, msg, sizeof(msg));
936		if (useviewacl) {
937			acl = client->view->transferacl;
938		} else {
939			acl = dns_zone_getxfracl(zone);
940		}
941		CHECK(ns_client_checkacl(client, NULL, msg, acl, true,
942					 ISC_LOG_ERROR));
943	}
944
945	/*
946	 * AXFR over UDP is not possible.
947	 */
948	if (reqtype == dns_rdatatype_axfr &&
949	    (client->attributes & NS_CLIENTATTR_TCP) == 0)
950	{
951		FAILC(DNS_R_FORMERR, "attempted AXFR over UDP");
952	}
953
954	/*
955	 * Look up the requesting server in the peer table.
956	 */
957	isc_netaddr_fromsockaddr(&na, &client->peeraddr);
958	(void)dns_peerlist_peerbyaddr(client->view->peers, &na, &peer);
959
960	/*
961	 * Decide on the transfer format (one-answer or many-answers).
962	 */
963	if (peer != NULL) {
964		(void)dns_peer_gettransferformat(peer, &format);
965	}
966
967	/*
968	 * Get a dynamically allocated copy of the current SOA.
969	 */
970	if (is_dlz) {
971		dns_db_currentversion(db, &ver);
972	}
973
974	CHECK(dns_db_createsoatuple(db, ver, mctx, DNS_DIFFOP_EXISTS,
975				    &current_soa_tuple));
976
977	current_serial = dns_soa_getserial(&current_soa_tuple->rdata);
978	if (reqtype == dns_rdatatype_ixfr) {
979		size_t jsize;
980		uint64_t dbsize;
981
982		if (!have_soa) {
983			FAILC(DNS_R_FORMERR, "IXFR request missing SOA");
984		}
985
986		begin_serial = dns_soa_getserial(&soa_rdata);
987
988		/*
989		 * RFC1995 says "If an IXFR query with the same or
990		 * newer version number than that of the server
991		 * is received, it is replied to with a single SOA
992		 * record of the server's current version, just as
993		 * in AXFR".  The claim about AXFR is incorrect,
994		 * but other than that, we do as the RFC says.
995		 *
996		 * Sending a single SOA record is also how we refuse
997		 * IXFR over UDP (currently, we always do).
998		 */
999		if (DNS_SERIAL_GE(begin_serial, current_serial) ||
1000		    (client->attributes & NS_CLIENTATTR_TCP) == 0)
1001		{
1002			CHECK(soa_rrstream_create(mctx, db, ver, &stream));
1003			is_poll = true;
1004			goto have_stream;
1005		}
1006
1007		/*
1008		 * Outgoing IXFR may have been disabled for this peer
1009		 * or globally.
1010		 */
1011		if ((client->attributes & NS_CLIENTATTR_TCP) != 0) {
1012			bool provide_ixfr;
1013
1014			provide_ixfr = client->view->provideixfr;
1015			if (peer != NULL) {
1016				(void)dns_peer_getprovideixfr(peer,
1017							      &provide_ixfr);
1018			}
1019			if (!provide_ixfr) {
1020				xfrout_log1(client, question_name,
1021					    question_class, ISC_LOG_DEBUG(4),
1022					    "IXFR delta response disabled due "
1023					    "to 'provide-ixfr no;' being set");
1024				mnemonic = "AXFR-style IXFR";
1025				goto axfr_fallback;
1026			}
1027		}
1028
1029		journalfile = is_dlz ? NULL : dns_zone_getjournal(zone);
1030		if (journalfile != NULL) {
1031			result = ixfr_rrstream_create(
1032				mctx, journalfile, begin_serial, current_serial,
1033				&jsize, &data_stream);
1034		} else {
1035			result = ISC_R_NOTFOUND;
1036		}
1037		if (result == ISC_R_NOTFOUND || result == ISC_R_RANGE) {
1038			xfrout_log1(client, question_name, question_class,
1039				    ISC_LOG_INFO,
1040				    "IXFR version not in journal, "
1041				    "falling back to AXFR");
1042			mnemonic = "AXFR-style IXFR";
1043			goto axfr_fallback;
1044		}
1045		CHECK(result);
1046
1047		result = dns_db_getsize(db, ver, NULL, &dbsize);
1048		if (result == ISC_R_SUCCESS) {
1049			uint32_t ratio = dns_zone_getixfrratio(zone);
1050			if (ratio != 0 && ((100 * jsize) / dbsize) > ratio) {
1051				data_stream->methods->destroy(&data_stream);
1052				data_stream = NULL;
1053				xfrout_log1(client, question_name,
1054					    question_class, ISC_LOG_INFO,
1055					    "IXFR delta size (%zu bytes) "
1056					    "exceeds the maximum ratio to "
1057					    "database size "
1058					    "(%" PRIu64 " bytes), "
1059					    "falling back to AXFR",
1060					    jsize, dbsize);
1061				mnemonic = "AXFR-style IXFR";
1062				goto axfr_fallback;
1063			} else {
1064				xfrout_log1(client, question_name,
1065					    question_class, ISC_LOG_DEBUG(4),
1066					    "IXFR delta size (%zu bytes); "
1067					    "database size "
1068					    "(%" PRIu64 " bytes)",
1069					    jsize, dbsize);
1070			}
1071		}
1072		is_ixfr = true;
1073	} else {
1074	axfr_fallback:
1075		CHECK(axfr_rrstream_create(mctx, db, ver, &data_stream));
1076	}
1077
1078	/*
1079	 * Bracket the data stream with SOAs.
1080	 */
1081	CHECK(soa_rrstream_create(mctx, db, ver, &soa_stream));
1082	CHECK(compound_rrstream_create(mctx, &soa_stream, &data_stream,
1083				       &stream));
1084	soa_stream = NULL;
1085	data_stream = NULL;
1086
1087have_stream:
1088	CHECK(dns_message_getquerytsig(request, mctx, &tsigbuf));
1089	/*
1090	 * Create the xfrout context object.  This transfers the ownership
1091	 * of "stream", "db", "ver", and "quota" to the xfrout context object.
1092	 */
1093
1094	if (is_dlz) {
1095		xfrout_ctx_create(mctx, client, request->id, question_name,
1096				  reqtype, question_class, zone, db, ver, quota,
1097				  stream, dns_message_gettsigkey(request),
1098				  tsigbuf, request->verified_sig, 3600, 3600,
1099				  (format == dns_many_answers) ? true : false,
1100				  &xfr);
1101	} else {
1102		xfrout_ctx_create(
1103			mctx, client, request->id, question_name, reqtype,
1104			question_class, zone, db, ver, quota, stream,
1105			dns_message_gettsigkey(request), tsigbuf,
1106			request->verified_sig, dns_zone_getmaxxfrout(zone),
1107			dns_zone_getidleout(zone),
1108			(format == dns_many_answers) ? true : false, &xfr);
1109	}
1110
1111	xfr->end_serial = current_serial;
1112	xfr->mnemonic = mnemonic;
1113	stream = NULL;
1114	quota = NULL;
1115
1116	CHECK(xfr->stream->methods->first(xfr->stream));
1117
1118	if (xfr->tsigkey != NULL) {
1119		dns_name_format(&xfr->tsigkey->name, keyname, sizeof(keyname));
1120	} else {
1121		keyname[0] = '\0';
1122	}
1123	xfr->poll = is_poll;
1124	if (is_poll) {
1125		xfr->mnemonic = "IXFR poll response";
1126		xfrout_log1(client, question_name, question_class,
1127			    ISC_LOG_DEBUG(1), "IXFR poll up to date%s%s",
1128			    (xfr->tsigkey != NULL) ? ": TSIG " : "", keyname);
1129	} else if (is_ixfr) {
1130		xfrout_log1(client, question_name, question_class, ISC_LOG_INFO,
1131			    "%s started%s%s (serial %u -> %u)", mnemonic,
1132			    (xfr->tsigkey != NULL) ? ": TSIG " : "", keyname,
1133			    begin_serial, current_serial);
1134	} else {
1135		xfrout_log1(client, question_name, question_class, ISC_LOG_INFO,
1136			    "%s started%s%s (serial %u)", mnemonic,
1137			    (xfr->tsigkey != NULL) ? ": TSIG " : "", keyname,
1138			    current_serial);
1139	}
1140
1141	if (zone != NULL) {
1142		dns_zone_getraw(zone, &raw);
1143		mayberaw = (raw != NULL) ? raw : zone;
1144		if ((client->attributes & NS_CLIENTATTR_WANTEXPIRE) != 0 &&
1145		    (dns_zone_gettype(mayberaw) == dns_zone_secondary ||
1146		     dns_zone_gettype(mayberaw) == dns_zone_mirror))
1147		{
1148			isc_time_t expiretime;
1149			uint32_t secs;
1150			dns_zone_getexpiretime(zone, &expiretime);
1151			secs = isc_time_seconds(&expiretime);
1152			if (secs >= client->now && result == ISC_R_SUCCESS) {
1153				client->attributes |= NS_CLIENTATTR_HAVEEXPIRE;
1154				client->expire = secs - client->now;
1155			}
1156		}
1157		if (raw != NULL) {
1158			dns_zone_detach(&raw);
1159		}
1160	}
1161
1162	/* Start the timers */
1163	if (xfr->maxtime > 0) {
1164		xfrout_log(xfr, ISC_LOG_DEBUG(1),
1165			   "starting maxtime timer %" PRIu64 " ms",
1166			   xfr->maxtime);
1167		isc_nm_timer_start(xfr->maxtime_timer, xfr->maxtime);
1168	}
1169
1170	/*
1171	 * Hand the context over to sendstream().  Set xfr to NULL;
1172	 * sendstream() is responsible for either passing the
1173	 * context on to a later event handler or destroying it.
1174	 */
1175	sendstream(xfr);
1176	xfr = NULL;
1177
1178	result = ISC_R_SUCCESS;
1179
1180failure:
1181	if (result == DNS_R_REFUSED) {
1182		inc_stats(client, zone, ns_statscounter_xfrrej);
1183	}
1184	if (quota != NULL) {
1185		isc_quota_detach(&quota);
1186	}
1187	if (current_soa_tuple != NULL) {
1188		dns_difftuple_free(&current_soa_tuple);
1189	}
1190	if (stream != NULL) {
1191		stream->methods->destroy(&stream);
1192	}
1193	if (soa_stream != NULL) {
1194		soa_stream->methods->destroy(&soa_stream);
1195	}
1196	if (data_stream != NULL) {
1197		data_stream->methods->destroy(&data_stream);
1198	}
1199	if (ver != NULL) {
1200		dns_db_closeversion(db, &ver, false);
1201	}
1202	if (db != NULL) {
1203		dns_db_detach(&db);
1204	}
1205	if (zone != NULL) {
1206		dns_zone_detach(&zone);
1207	}
1208	/* XXX kludge */
1209	if (xfr != NULL) {
1210		xfrout_fail(xfr, result, "setting up zone transfer");
1211	} else if (result != ISC_R_SUCCESS) {
1212		ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT,
1213			      NS_LOGMODULE_XFER_OUT, ISC_LOG_DEBUG(3),
1214			      "zone transfer setup failed");
1215		ns_client_error(client, result);
1216		isc_nmhandle_detach(&client->reqhandle);
1217	}
1218}
1219
1220static void
1221xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id,
1222		  dns_name_t *qname, dns_rdatatype_t qtype,
1223		  dns_rdataclass_t qclass, dns_zone_t *zone, dns_db_t *db,
1224		  dns_dbversion_t *ver, isc_quota_t *quota, rrstream_t *stream,
1225		  dns_tsigkey_t *tsigkey, isc_buffer_t *lasttsig,
1226		  bool verified_tsig, unsigned int maxtime,
1227		  unsigned int idletime, bool many_answers,
1228		  xfrout_ctx_t **xfrp) {
1229	xfrout_ctx_t *xfr = NULL;
1230	unsigned int len = NS_CLIENT_TCP_BUFFER_SIZE;
1231	void *mem = NULL;
1232
1233	REQUIRE(xfrp != NULL && *xfrp == NULL);
1234
1235	xfr = isc_mem_get(mctx, sizeof(*xfr));
1236	*xfr = (xfrout_ctx_t){
1237		.client = client,
1238		.id = id,
1239		.qname = qname,
1240		.qtype = qtype,
1241		.qclass = qclass,
1242		.maxtime = maxtime * 1000,   /* in milliseconds */
1243		.idletime = idletime * 1000, /* In milliseconds */
1244		.tsigkey = tsigkey,
1245		.lasttsig = lasttsig,
1246		.verified_tsig = verified_tsig,
1247		.many_answers = many_answers,
1248	};
1249
1250	isc_mem_attach(mctx, &xfr->mctx);
1251
1252	if (zone != NULL) { /* zone will be NULL if it's DLZ */
1253		dns_zone_attach(zone, &xfr->zone);
1254	}
1255	dns_db_attach(db, &xfr->db);
1256	dns_db_attachversion(db, ver, &xfr->ver);
1257
1258	isc_time_now(&xfr->stats.start);
1259
1260	isc_nm_timer_create(xfr->client->handle, xfrout_client_timeout, xfr,
1261			    &xfr->maxtime_timer);
1262
1263	/*
1264	 * Allocate a temporary buffer for the uncompressed response
1265	 * message data.  The buffer size must be 65535 bytes
1266	 * (NS_CLIENT_TCP_BUFFER_SIZE): small enough that compressed
1267	 * data will fit in a single TCP message, and big enough to
1268	 * hold a maximum-sized RR.
1269	 *
1270	 * Note that although 65535-byte RRs are allowed in principle, they
1271	 * cannot be zone-transferred (at least not if uncompressible),
1272	 * because the message and RR headers would push the size of the
1273	 * TCP message over the 65535 byte limit.
1274	 */
1275	mem = isc_mem_get(mctx, len);
1276	isc_buffer_init(&xfr->buf, mem, len);
1277
1278	/*
1279	 * Allocate another temporary buffer for the compressed
1280	 * response message.
1281	 */
1282	mem = isc_mem_get(mctx, len);
1283	isc_buffer_init(&xfr->txbuf, (char *)mem, len);
1284	xfr->txmem = mem;
1285	xfr->txmemlen = len;
1286
1287	/*
1288	 * These MUST be after the last "goto failure;" / CHECK to
1289	 * prevent a double free by the caller.
1290	 */
1291	xfr->quota = quota;
1292	xfr->stream = stream;
1293
1294	*xfrp = xfr;
1295}
1296
1297/*
1298 * Arrange to send as much as we can of "stream" without blocking.
1299 *
1300 * Requires:
1301 *	The stream iterator is initialized and points at an RR,
1302 *      or possibly at the end of the stream (that is, the
1303 *      _first method of the iterator has been called).
1304 */
1305static void
1306sendstream(xfrout_ctx_t *xfr) {
1307	dns_message_t *tcpmsg = NULL;
1308	dns_message_t *msg = NULL; /* Client message if UDP, tcpmsg if TCP */
1309	isc_result_t result;
1310	dns_rdataset_t *qrdataset;
1311	dns_name_t *msgname = NULL;
1312	dns_rdata_t *msgrdata = NULL;
1313	dns_rdatalist_t *msgrdl = NULL;
1314	dns_rdataset_t *msgrds = NULL;
1315	dns_compress_t cctx;
1316	bool cleanup_cctx = false;
1317	bool is_tcp;
1318	int n_rrs;
1319
1320	isc_buffer_clear(&xfr->buf);
1321	isc_buffer_clear(&xfr->txbuf);
1322
1323	is_tcp = ((xfr->client->attributes & NS_CLIENTATTR_TCP) != 0);
1324	if (!is_tcp) {
1325		/*
1326		 * In the UDP case, we put the response data directly into
1327		 * the client message.
1328		 */
1329		msg = xfr->client->message;
1330		CHECK(dns_message_reply(msg, true));
1331	} else {
1332		/*
1333		 * TCP. Build a response dns_message_t, temporarily storing
1334		 * the raw, uncompressed owner names and RR data contiguously
1335		 * in xfr->buf.  We know that if the uncompressed data fits
1336		 * in xfr->buf, the compressed data will surely fit in a TCP
1337		 * message.
1338		 */
1339
1340		dns_message_create(xfr->mctx, DNS_MESSAGE_INTENTRENDER,
1341				   &tcpmsg);
1342		msg = tcpmsg;
1343
1344		msg->id = xfr->id;
1345		msg->rcode = dns_rcode_noerror;
1346		msg->flags = DNS_MESSAGEFLAG_QR | DNS_MESSAGEFLAG_AA;
1347		if ((xfr->client->attributes & NS_CLIENTATTR_RA) != 0) {
1348			msg->flags |= DNS_MESSAGEFLAG_RA;
1349		}
1350		CHECK(dns_message_settsigkey(msg, xfr->tsigkey));
1351		CHECK(dns_message_setquerytsig(msg, xfr->lasttsig));
1352		if (xfr->lasttsig != NULL) {
1353			isc_buffer_free(&xfr->lasttsig);
1354		}
1355		msg->verified_sig = xfr->verified_tsig;
1356
1357		/*
1358		 * Add a EDNS option to the message?
1359		 */
1360		if ((xfr->client->attributes & NS_CLIENTATTR_WANTOPT) != 0) {
1361			dns_rdataset_t *opt = NULL;
1362
1363			CHECK(ns_client_addopt(xfr->client, msg, &opt));
1364			CHECK(dns_message_setopt(msg, opt));
1365			/*
1366			 * Add to first message only.
1367			 */
1368			xfr->client->attributes &= ~NS_CLIENTATTR_WANTNSID;
1369			xfr->client->attributes &= ~NS_CLIENTATTR_HAVEEXPIRE;
1370		}
1371
1372		/*
1373		 * Account for reserved space.
1374		 */
1375		if (xfr->tsigkey != NULL) {
1376			INSIST(msg->reserved != 0U);
1377		}
1378		isc_buffer_add(&xfr->buf, msg->reserved);
1379
1380		/*
1381		 * Include a question section in the first message only.
1382		 * BIND 8.2.1 will not recognize an IXFR if it does not
1383		 * have a question section.
1384		 */
1385		if (!xfr->question_added) {
1386			dns_name_t *qname = NULL;
1387			isc_region_t r;
1388
1389			/*
1390			 * Reserve space for the 12-byte message header
1391			 * and 4 bytes of question.
1392			 */
1393			isc_buffer_add(&xfr->buf, 12 + 4);
1394
1395			qrdataset = NULL;
1396			result = dns_message_gettemprdataset(msg, &qrdataset);
1397			if (result != ISC_R_SUCCESS) {
1398				goto failure;
1399			}
1400			dns_rdataset_makequestion(qrdataset,
1401						  xfr->client->message->rdclass,
1402						  xfr->qtype);
1403
1404			result = dns_message_gettempname(msg, &qname);
1405			if (result != ISC_R_SUCCESS) {
1406				goto failure;
1407			}
1408			isc_buffer_availableregion(&xfr->buf, &r);
1409			INSIST(r.length >= xfr->qname->length);
1410			r.length = xfr->qname->length;
1411			isc_buffer_putmem(&xfr->buf, xfr->qname->ndata,
1412					  xfr->qname->length);
1413			dns_name_fromregion(qname, &r);
1414			ISC_LIST_INIT(qname->list);
1415			ISC_LIST_APPEND(qname->list, qrdataset, link);
1416
1417			dns_message_addname(msg, qname, DNS_SECTION_QUESTION);
1418			xfr->question_added = true;
1419		} else {
1420			/*
1421			 * Reserve space for the 12-byte message header
1422			 */
1423			isc_buffer_add(&xfr->buf, 12);
1424			msg->tcp_continuation = 1;
1425		}
1426	}
1427
1428	/*
1429	 * Try to fit in as many RRs as possible, unless "one-answer"
1430	 * format has been requested.
1431	 */
1432	for (n_rrs = 0;; n_rrs++) {
1433		dns_name_t *name = NULL;
1434		uint32_t ttl;
1435		dns_rdata_t *rdata = NULL;
1436
1437		unsigned int size;
1438		isc_region_t r;
1439
1440		msgname = NULL;
1441		msgrdata = NULL;
1442		msgrdl = NULL;
1443		msgrds = NULL;
1444
1445		xfr->stream->methods->current(xfr->stream, &name, &ttl, &rdata);
1446		size = name->length + 10 + rdata->length;
1447		isc_buffer_availableregion(&xfr->buf, &r);
1448		if (size >= r.length) {
1449			/*
1450			 * RR would not fit.  If there are other RRs in the
1451			 * buffer, send them now and leave this RR to the
1452			 * next message.  If this RR overflows the buffer
1453			 * all by itself, fail.
1454			 *
1455			 * In theory some RRs might fit in a TCP message
1456			 * when compressed even if they do not fit when
1457			 * uncompressed, but surely we don't want
1458			 * to send such monstrosities to an unsuspecting
1459			 * secondary.
1460			 */
1461			if (n_rrs == 0) {
1462				xfrout_log(xfr, ISC_LOG_WARNING,
1463					   "RR too large for zone transfer "
1464					   "(%d bytes)",
1465					   size);
1466				/* XXX DNS_R_RRTOOLARGE? */
1467				result = ISC_R_NOSPACE;
1468				goto failure;
1469			}
1470			break;
1471		}
1472
1473		if (isc_log_wouldlog(ns_lctx, XFROUT_RR_LOGLEVEL)) {
1474			log_rr(name, rdata, ttl); /* XXX */
1475		}
1476
1477		result = dns_message_gettempname(msg, &msgname);
1478		if (result != ISC_R_SUCCESS) {
1479			goto failure;
1480		}
1481		isc_buffer_availableregion(&xfr->buf, &r);
1482		INSIST(r.length >= name->length);
1483		r.length = name->length;
1484		isc_buffer_putmem(&xfr->buf, name->ndata, name->length);
1485		dns_name_fromregion(msgname, &r);
1486
1487		/* Reserve space for RR header. */
1488		isc_buffer_add(&xfr->buf, 10);
1489
1490		result = dns_message_gettemprdata(msg, &msgrdata);
1491		if (result != ISC_R_SUCCESS) {
1492			goto failure;
1493		}
1494		isc_buffer_availableregion(&xfr->buf, &r);
1495		r.length = rdata->length;
1496		isc_buffer_putmem(&xfr->buf, rdata->data, rdata->length);
1497		dns_rdata_init(msgrdata);
1498		dns_rdata_fromregion(msgrdata, rdata->rdclass, rdata->type, &r);
1499
1500		result = dns_message_gettemprdatalist(msg, &msgrdl);
1501		if (result != ISC_R_SUCCESS) {
1502			goto failure;
1503		}
1504		msgrdl->type = rdata->type;
1505		msgrdl->rdclass = rdata->rdclass;
1506		msgrdl->ttl = ttl;
1507		if (rdata->type == dns_rdatatype_sig ||
1508		    rdata->type == dns_rdatatype_rrsig)
1509		{
1510			msgrdl->covers = dns_rdata_covers(rdata);
1511		} else {
1512			msgrdl->covers = dns_rdatatype_none;
1513		}
1514		ISC_LIST_APPEND(msgrdl->rdata, msgrdata, link);
1515
1516		result = dns_message_gettemprdataset(msg, &msgrds);
1517		if (result != ISC_R_SUCCESS) {
1518			goto failure;
1519		}
1520		result = dns_rdatalist_tordataset(msgrdl, msgrds);
1521		INSIST(result == ISC_R_SUCCESS);
1522
1523		ISC_LIST_APPEND(msgname->list, msgrds, link);
1524
1525		dns_message_addname(msg, msgname, DNS_SECTION_ANSWER);
1526		msgname = NULL;
1527
1528		xfr->stats.nrecs++;
1529
1530		result = xfr->stream->methods->next(xfr->stream);
1531		if (result == ISC_R_NOMORE) {
1532			xfr->end_of_stream = true;
1533			break;
1534		}
1535		CHECK(result);
1536
1537		if (!xfr->many_answers) {
1538			break;
1539		}
1540		/*
1541		 * At this stage, at least 1 RR has been rendered into
1542		 * the message. Check if we want to clamp this message
1543		 * here (TCP only).
1544		 */
1545		if ((isc_buffer_usedlength(&xfr->buf) >=
1546		     xfr->client->sctx->transfer_tcp_message_size) &&
1547		    is_tcp)
1548		{
1549			break;
1550		}
1551	}
1552
1553	if (is_tcp) {
1554		isc_region_t used;
1555		CHECK(dns_compress_init(&cctx, -1, xfr->mctx));
1556		dns_compress_setsensitive(&cctx, true);
1557		cleanup_cctx = true;
1558		CHECK(dns_message_renderbegin(msg, &cctx, &xfr->txbuf));
1559		CHECK(dns_message_rendersection(msg, DNS_SECTION_QUESTION, 0));
1560		CHECK(dns_message_rendersection(msg, DNS_SECTION_ANSWER, 0));
1561		CHECK(dns_message_renderend(msg));
1562		dns_compress_invalidate(&cctx);
1563		cleanup_cctx = false;
1564
1565		isc_buffer_usedregion(&xfr->txbuf, &used);
1566
1567		xfrout_log(xfr, ISC_LOG_DEBUG(8),
1568			   "sending TCP message of %d bytes", used.length);
1569
1570		/* System test helper options to simulate network issues. */
1571		if (ns_server_getoption(xfr->client->manager->sctx,
1572					NS_SERVER_TRANSFERSLOWLY))
1573		{
1574			/* Sleep for a bit over a second. */
1575			select(0, NULL, NULL, NULL,
1576			       &(struct timeval){ 1, 1000 });
1577		}
1578		if (ns_server_getoption(xfr->client->manager->sctx,
1579					NS_SERVER_TRANSFERSTUCK))
1580		{
1581			/* Sleep for a bit over a minute. */
1582			select(0, NULL, NULL, NULL,
1583			       &(struct timeval){ 60, 1000 });
1584		}
1585
1586		isc_nmhandle_attach(xfr->client->handle,
1587				    &xfr->client->sendhandle);
1588		if (xfr->idletime > 0) {
1589			isc_nmhandle_setwritetimeout(xfr->client->sendhandle,
1590						     xfr->idletime);
1591		}
1592		isc_nm_send(xfr->client->sendhandle, &used, xfrout_senddone,
1593			    xfr);
1594		xfr->sends++;
1595		xfr->cbytes = used.length;
1596	} else {
1597		xfrout_log(xfr, ISC_LOG_DEBUG(8), "sending IXFR UDP response");
1598
1599		/* System test helper options to simulate network issues. */
1600		if (ns_server_getoption(xfr->client->manager->sctx,
1601					NS_SERVER_TRANSFERSLOWLY))
1602		{
1603			/* Sleep for a bit over a second. */
1604			select(0, NULL, NULL, NULL,
1605			       &(struct timeval){ 1, 1000 });
1606		}
1607		if (ns_server_getoption(xfr->client->manager->sctx,
1608					NS_SERVER_TRANSFERSTUCK))
1609		{
1610			/* Sleep for a bit over a minute. */
1611			select(0, NULL, NULL, NULL,
1612			       &(struct timeval){ 60, 1000 });
1613		}
1614
1615		ns_client_send(xfr->client);
1616		xfr->stream->methods->pause(xfr->stream);
1617		isc_nmhandle_detach(&xfr->client->reqhandle);
1618		xfrout_ctx_destroy(&xfr);
1619		return;
1620	}
1621
1622	/* Advance lasttsig to be the last TSIG generated */
1623	CHECK(dns_message_getquerytsig(msg, xfr->mctx, &xfr->lasttsig));
1624
1625failure:
1626	if (msgname != NULL) {
1627		if (msgrds != NULL) {
1628			if (dns_rdataset_isassociated(msgrds)) {
1629				dns_rdataset_disassociate(msgrds);
1630			}
1631			dns_message_puttemprdataset(msg, &msgrds);
1632		}
1633		if (msgrdl != NULL) {
1634			ISC_LIST_UNLINK(msgrdl->rdata, msgrdata, link);
1635			dns_message_puttemprdatalist(msg, &msgrdl);
1636		}
1637		if (msgrdata != NULL) {
1638			dns_message_puttemprdata(msg, &msgrdata);
1639		}
1640		dns_message_puttempname(msg, &msgname);
1641	}
1642
1643	if (tcpmsg != NULL) {
1644		dns_message_detach(&tcpmsg);
1645	}
1646
1647	if (cleanup_cctx) {
1648		dns_compress_invalidate(&cctx);
1649	}
1650	/*
1651	 * Make sure to release any locks held by database
1652	 * iterators before returning from the event handler.
1653	 */
1654	xfr->stream->methods->pause(xfr->stream);
1655
1656	if (result == ISC_R_SUCCESS) {
1657		return;
1658	}
1659
1660	if (xfr->client->sendhandle != NULL) {
1661		isc_nmhandle_detach(&xfr->client->sendhandle);
1662	}
1663
1664	xfrout_fail(xfr, result, "sending zone data");
1665}
1666
1667static void
1668xfrout_ctx_destroy(xfrout_ctx_t **xfrp) {
1669	xfrout_ctx_t *xfr = *xfrp;
1670	*xfrp = NULL;
1671
1672	INSIST(xfr->sends == 0);
1673
1674	isc_nm_timer_stop(xfr->maxtime_timer);
1675	isc_nm_timer_detach(&xfr->maxtime_timer);
1676
1677	if (xfr->stream != NULL) {
1678		xfr->stream->methods->destroy(&xfr->stream);
1679	}
1680	if (xfr->buf.base != NULL) {
1681		isc_mem_put(xfr->mctx, xfr->buf.base, xfr->buf.length);
1682	}
1683	if (xfr->txmem != NULL) {
1684		isc_mem_put(xfr->mctx, xfr->txmem, xfr->txmemlen);
1685	}
1686	if (xfr->lasttsig != NULL) {
1687		isc_buffer_free(&xfr->lasttsig);
1688	}
1689	if (xfr->quota != NULL) {
1690		isc_quota_detach(&xfr->quota);
1691	}
1692	if (xfr->ver != NULL) {
1693		dns_db_closeversion(xfr->db, &xfr->ver, false);
1694	}
1695	if (xfr->zone != NULL) {
1696		dns_zone_detach(&xfr->zone);
1697	}
1698	if (xfr->db != NULL) {
1699		dns_db_detach(&xfr->db);
1700	}
1701
1702	isc_mem_putanddetach(&xfr->mctx, xfr, sizeof(*xfr));
1703}
1704
1705static void
1706xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg) {
1707	xfrout_ctx_t *xfr = (xfrout_ctx_t *)arg;
1708
1709	REQUIRE((xfr->client->attributes & NS_CLIENTATTR_TCP) != 0);
1710
1711	INSIST(handle == xfr->client->handle);
1712
1713	xfr->sends--;
1714	INSIST(xfr->sends == 0);
1715
1716	isc_nmhandle_detach(&xfr->client->sendhandle);
1717
1718	/*
1719	 * Update transfer statistics if sending succeeded, accounting for the
1720	 * two-byte TCP length prefix included in the number of bytes sent.
1721	 */
1722	if (result == ISC_R_SUCCESS) {
1723		xfr->stats.nmsg++;
1724		xfr->stats.nbytes += xfr->cbytes;
1725	}
1726
1727	if (xfr->shuttingdown) {
1728		xfrout_maybe_destroy(xfr);
1729	} else if (result != ISC_R_SUCCESS) {
1730		xfrout_fail(xfr, result, "send");
1731	} else if (!xfr->end_of_stream) {
1732		sendstream(xfr);
1733	} else {
1734		/* End of zone transfer stream. */
1735		uint64_t msecs, persec;
1736
1737		inc_stats(xfr->client, xfr->zone, ns_statscounter_xfrdone);
1738		isc_time_now(&xfr->stats.end);
1739		msecs = isc_time_microdiff(&xfr->stats.end, &xfr->stats.start);
1740		msecs /= 1000;
1741		if (msecs == 0) {
1742			msecs = 1;
1743		}
1744		persec = (xfr->stats.nbytes * 1000) / msecs;
1745		xfrout_log(xfr, xfr->poll ? ISC_LOG_DEBUG(1) : ISC_LOG_INFO,
1746			   "%s ended: "
1747			   "%" PRIu64 " messages, %" PRIu64 " records, "
1748			   "%" PRIu64 " bytes, "
1749			   "%u.%03u secs (%u bytes/sec) (serial %u)",
1750			   xfr->mnemonic, xfr->stats.nmsg, xfr->stats.nrecs,
1751			   xfr->stats.nbytes, (unsigned int)(msecs / 1000),
1752			   (unsigned int)(msecs % 1000), (unsigned int)persec,
1753			   xfr->end_serial);
1754
1755		/*
1756		 * We're done, unreference the handle and destroy the xfr
1757		 * context.
1758		 */
1759		isc_nmhandle_detach(&xfr->client->reqhandle);
1760		xfrout_ctx_destroy(&xfr);
1761	}
1762}
1763
1764static void
1765xfrout_fail(xfrout_ctx_t *xfr, isc_result_t result, const char *msg) {
1766	xfr->shuttingdown = true;
1767	xfrout_log(xfr, ISC_LOG_ERROR, "%s: %s", msg,
1768		   isc_result_totext(result));
1769	xfrout_maybe_destroy(xfr);
1770}
1771
1772static void
1773xfrout_maybe_destroy(xfrout_ctx_t *xfr) {
1774	REQUIRE(xfr->shuttingdown);
1775
1776	ns_client_drop(xfr->client, ISC_R_CANCELED);
1777	isc_nmhandle_detach(&xfr->client->reqhandle);
1778	xfrout_ctx_destroy(&xfr);
1779}
1780
1781static void
1782xfrout_client_timeout(void *arg, isc_result_t result) {
1783	xfrout_ctx_t *xfr = (xfrout_ctx_t *)arg;
1784
1785	xfr->shuttingdown = true;
1786	xfrout_log(xfr, ISC_LOG_ERROR, "%s: %s", "aborted",
1787		   isc_result_totext(result));
1788}
1789
1790/*
1791 * Log outgoing zone transfer messages in a format like
1792 * <client>: transfer of <zone>: <message>
1793 */
1794
1795static void
1796xfrout_logv(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
1797	    int level, const char *fmt, va_list ap) ISC_FORMAT_PRINTF(5, 0);
1798
1799static void
1800xfrout_logv(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
1801	    int level, const char *fmt, va_list ap) {
1802	char msgbuf[2048];
1803	char namebuf[DNS_NAME_FORMATSIZE];
1804	char classbuf[DNS_RDATACLASS_FORMATSIZE];
1805
1806	dns_name_format(zonename, namebuf, sizeof(namebuf));
1807	dns_rdataclass_format(rdclass, classbuf, sizeof(classbuf));
1808	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
1809	ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT, NS_LOGMODULE_XFER_OUT,
1810		      level, "transfer of '%s/%s': %s", namebuf, classbuf,
1811		      msgbuf);
1812}
1813
1814/*
1815 * Logging function for use when a xfrout_ctx_t has not yet been created.
1816 */
1817static void
1818xfrout_log1(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
1819	    int level, const char *fmt, ...) {
1820	va_list ap;
1821	va_start(ap, fmt);
1822	xfrout_logv(client, zonename, rdclass, level, fmt, ap);
1823	va_end(ap);
1824}
1825
1826/*
1827 * Logging function for use when there is a xfrout_ctx_t.
1828 */
1829static void
1830xfrout_log(xfrout_ctx_t *xfr, int level, const char *fmt, ...) {
1831	va_list ap;
1832	va_start(ap, fmt);
1833	xfrout_logv(xfr->client, xfr->qname, xfr->qclass, level, fmt, ap);
1834	va_end(ap);
1835}
1836