xfrout.c revision 1.10
1/*	$NetBSD: xfrout.c,v 1.10 2021/04/05 11:27:04 rillig Exp $	*/
2
3/*
4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5 *
6 * This Source Code Form is subject to the terms of the Mozilla Public
7 * License, v. 2.0. If a copy of the MPL was not distributed with this
8 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9 *
10 * See the COPYRIGHT file distributed with this work for additional
11 * information regarding copyright ownership.
12 */
13
14#include <inttypes.h>
15#include <stdbool.h>
16
17#include <isc/formatcheck.h>
18#include <isc/mem.h>
19#include <isc/print.h>
20#include <isc/stats.h>
21#include <isc/util.h>
22
23#include <dns/db.h>
24#include <dns/dbiterator.h>
25#include <dns/dlz.h>
26#include <dns/fixedname.h>
27#include <dns/journal.h>
28#include <dns/message.h>
29#include <dns/peer.h>
30#include <dns/rdataclass.h>
31#include <dns/rdatalist.h>
32#include <dns/rdataset.h>
33#include <dns/rdatasetiter.h>
34#include <dns/result.h>
35#include <dns/rriterator.h>
36#include <dns/soa.h>
37#include <dns/stats.h>
38#include <dns/tsig.h>
39#include <dns/view.h>
40#include <dns/zone.h>
41#include <dns/zt.h>
42
43#include <ns/client.h>
44#include <ns/log.h>
45#include <ns/server.h>
46#include <ns/stats.h>
47#include <ns/xfrout.h>
48
49#include <ns/pfilter.h>
50
51/*! \file
52 * \brief
53 * Outgoing AXFR and IXFR.
54 */
55
56/*
57 * TODO:
58 *  - IXFR over UDP
59 */
60
61#define XFROUT_COMMON_LOGARGS \
62	ns_lctx, DNS_LOGCATEGORY_XFER_OUT, NS_LOGMODULE_XFER_OUT
63
64#define XFROUT_PROTOCOL_LOGARGS XFROUT_COMMON_LOGARGS, ISC_LOG_INFO
65
66#define XFROUT_DEBUG_LOGARGS(n) XFROUT_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
67
68#define XFROUT_RR_LOGARGS XFROUT_COMMON_LOGARGS, XFROUT_RR_LOGLEVEL
69
70#define XFROUT_RR_LOGLEVEL ISC_LOG_DEBUG(8)
71
72/*%
73 * Fail unconditionally and log as a client error.
74 * The test against ISC_R_SUCCESS is there to keep the Solaris compiler
75 * from complaining about "end-of-loop code not reached".
76 */
77#define FAILC(code, msg)                                                 \
78	do {                                                             \
79		result = (code);                                         \
80		ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT,          \
81			      NS_LOGMODULE_XFER_OUT, ISC_LOG_INFO,       \
82			      "bad zone transfer request: %s (%s)", msg, \
83			      isc_result_totext(code));                  \
84		if (result != ISC_R_SUCCESS)                             \
85			goto failure;                                    \
86	} while (0)
87
88#define FAILQ(code, msg, question, rdclass)                                  \
89	do {                                                                 \
90		char _buf1[DNS_NAME_FORMATSIZE];                             \
91		char _buf2[DNS_RDATACLASS_FORMATSIZE];                       \
92		result = (code);                                             \
93		dns_name_format(question, _buf1, sizeof(_buf1));             \
94		dns_rdataclass_format(rdclass, _buf2, sizeof(_buf2));        \
95		ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT,              \
96			      NS_LOGMODULE_XFER_OUT, ISC_LOG_INFO,           \
97			      "bad zone transfer request: '%s/%s': %s (%s)", \
98			      _buf1, _buf2, msg, isc_result_totext(code));   \
99		if (result != ISC_R_SUCCESS)                                 \
100			goto failure;                                        \
101	} while (0)
102
103#define CHECK(op)                            \
104	do {                                 \
105		result = (op);               \
106		if (result != ISC_R_SUCCESS) \
107			goto failure;        \
108	} while (0)
109
110/**************************************************************************/
111
112static inline void
113inc_stats(ns_client_t *client, dns_zone_t *zone, isc_statscounter_t counter) {
114	ns_stats_increment(client->sctx->nsstats, counter);
115	if (zone != NULL) {
116		isc_stats_t *zonestats = dns_zone_getrequeststats(zone);
117		if (zonestats != NULL) {
118			isc_stats_increment(zonestats, counter);
119		}
120	}
121}
122
123/**************************************************************************/
124
125/*% Log an RR (for debugging) */
126
127static void
128log_rr(dns_name_t *name, dns_rdata_t *rdata, uint32_t ttl) {
129	isc_result_t result;
130	isc_buffer_t buf;
131	char mem[2000];
132	dns_rdatalist_t rdl;
133	dns_rdataset_t rds;
134	dns_rdata_t rd = DNS_RDATA_INIT;
135
136	dns_rdatalist_init(&rdl);
137	rdl.type = rdata->type;
138	rdl.rdclass = rdata->rdclass;
139	rdl.ttl = ttl;
140	if (rdata->type == dns_rdatatype_sig ||
141	    rdata->type == dns_rdatatype_rrsig) {
142		rdl.covers = dns_rdata_covers(rdata);
143	} else {
144		rdl.covers = dns_rdatatype_none;
145	}
146	dns_rdataset_init(&rds);
147	dns_rdata_init(&rd);
148	dns_rdata_clone(rdata, &rd);
149	ISC_LIST_APPEND(rdl.rdata, &rd, link);
150	RUNTIME_CHECK(dns_rdatalist_tordataset(&rdl, &rds) == ISC_R_SUCCESS);
151
152	isc_buffer_init(&buf, mem, sizeof(mem));
153	result = dns_rdataset_totext(&rds, name, false, false, &buf);
154
155	/*
156	 * We could use xfrout_log(), but that would produce
157	 * very long lines with a repetitive prefix.
158	 */
159	if (result == ISC_R_SUCCESS) {
160		/*
161		 * Get rid of final newline.
162		 */
163		INSIST(buf.used >= 1 &&
164		       ((char *)buf.base)[buf.used - 1] == '\n');
165		buf.used--;
166
167		isc_log_write(XFROUT_RR_LOGARGS, "%.*s",
168			      (int)isc_buffer_usedlength(&buf),
169			      (char *)isc_buffer_base(&buf));
170	} else {
171		isc_log_write(XFROUT_RR_LOGARGS, "<RR too large to print>");
172	}
173}
174
175/**************************************************************************/
176/*
177 * An 'rrstream_t' is a polymorphic iterator that returns
178 * a stream of resource records.  There are multiple implementations,
179 * e.g. for generating AXFR and IXFR records streams.
180 */
181
182typedef struct rrstream_methods rrstream_methods_t;
183
184typedef struct rrstream {
185	isc_mem_t *mctx;
186	rrstream_methods_t *methods;
187} rrstream_t;
188
189struct rrstream_methods {
190	isc_result_t (*first)(rrstream_t *);
191	isc_result_t (*next)(rrstream_t *);
192	void (*current)(rrstream_t *, dns_name_t **, uint32_t *,
193			dns_rdata_t **);
194	void (*pause)(rrstream_t *);
195	void (*destroy)(rrstream_t **);
196};
197
198static void
199rrstream_noop_pause(rrstream_t *rs) {
200	UNUSED(rs);
201}
202
203/**************************************************************************/
204/*
205 * An 'ixfr_rrstream_t' is an 'rrstream_t' that returns
206 * an IXFR-like RR stream from a journal file.
207 *
208 * The SOA at the beginning of each sequence of additions
209 * or deletions are included in the stream, but the extra
210 * SOAs at the beginning and end of the entire transfer are
211 * not included.
212 */
213
214typedef struct ixfr_rrstream {
215	rrstream_t common;
216	dns_journal_t *journal;
217} ixfr_rrstream_t;
218
219/* Forward declarations. */
220static void
221ixfr_rrstream_destroy(rrstream_t **sp);
222
223static rrstream_methods_t ixfr_rrstream_methods;
224
225/*
226 * Returns: anything dns_journal_open() or dns_journal_iter_init()
227 * may return.
228 */
229
230static isc_result_t
231ixfr_rrstream_create(isc_mem_t *mctx, const char *journal_filename,
232		     uint32_t begin_serial, uint32_t end_serial, size_t *sizep,
233		     rrstream_t **sp) {
234	isc_result_t result;
235	ixfr_rrstream_t *s = NULL;
236
237	INSIST(sp != NULL && *sp == NULL);
238
239	s = isc_mem_get(mctx, sizeof(*s));
240	s->common.mctx = NULL;
241	isc_mem_attach(mctx, &s->common.mctx);
242	s->common.methods = &ixfr_rrstream_methods;
243	s->journal = NULL;
244
245	CHECK(dns_journal_open(mctx, journal_filename, DNS_JOURNAL_READ,
246			       &s->journal));
247	CHECK(dns_journal_iter_init(s->journal, begin_serial, end_serial,
248				    sizep));
249
250	*sp = (rrstream_t *)s;
251	return (ISC_R_SUCCESS);
252
253failure:
254	ixfr_rrstream_destroy((rrstream_t **)(void *)&s);
255	return (result);
256}
257
258static isc_result_t
259ixfr_rrstream_first(rrstream_t *rs) {
260	ixfr_rrstream_t *s = (ixfr_rrstream_t *)rs;
261	return (dns_journal_first_rr(s->journal));
262}
263
264static isc_result_t
265ixfr_rrstream_next(rrstream_t *rs) {
266	ixfr_rrstream_t *s = (ixfr_rrstream_t *)rs;
267	return (dns_journal_next_rr(s->journal));
268}
269
270static void
271ixfr_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
272		      dns_rdata_t **rdata) {
273	ixfr_rrstream_t *s = (ixfr_rrstream_t *)rs;
274	dns_journal_current_rr(s->journal, name, ttl, rdata);
275}
276
277static void
278ixfr_rrstream_destroy(rrstream_t **rsp) {
279	ixfr_rrstream_t *s = (ixfr_rrstream_t *)*rsp;
280	if (s->journal != NULL) {
281		dns_journal_destroy(&s->journal);
282	}
283	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
284}
285
286static rrstream_methods_t ixfr_rrstream_methods = {
287	ixfr_rrstream_first, ixfr_rrstream_next, ixfr_rrstream_current,
288	rrstream_noop_pause, ixfr_rrstream_destroy
289};
290
291/**************************************************************************/
292/*
293 * An 'axfr_rrstream_t' is an 'rrstream_t' that returns
294 * an AXFR-like RR stream from a database.
295 *
296 * The SOAs at the beginning and end of the transfer are
297 * not included in the stream.
298 */
299
300typedef struct axfr_rrstream {
301	rrstream_t common;
302	dns_rriterator_t it;
303	bool it_valid;
304} axfr_rrstream_t;
305
306/*
307 * Forward declarations.
308 */
309static void
310axfr_rrstream_destroy(rrstream_t **rsp);
311
312static rrstream_methods_t axfr_rrstream_methods;
313
314static isc_result_t
315axfr_rrstream_create(isc_mem_t *mctx, dns_db_t *db, dns_dbversion_t *ver,
316		     rrstream_t **sp) {
317	axfr_rrstream_t *s;
318	isc_result_t result;
319
320	INSIST(sp != NULL && *sp == NULL);
321
322	s = isc_mem_get(mctx, sizeof(*s));
323	s->common.mctx = NULL;
324	isc_mem_attach(mctx, &s->common.mctx);
325	s->common.methods = &axfr_rrstream_methods;
326	s->it_valid = false;
327
328	CHECK(dns_rriterator_init(&s->it, db, ver, 0));
329	s->it_valid = true;
330
331	*sp = (rrstream_t *)s;
332	return (ISC_R_SUCCESS);
333
334failure:
335	axfr_rrstream_destroy((rrstream_t **)(void *)&s);
336	return (result);
337}
338
339static isc_result_t
340axfr_rrstream_first(rrstream_t *rs) {
341	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
342	isc_result_t result;
343	result = dns_rriterator_first(&s->it);
344	if (result != ISC_R_SUCCESS) {
345		return (result);
346	}
347	/* Skip SOA records. */
348	for (;;) {
349		dns_name_t *name_dummy = NULL;
350		uint32_t ttl_dummy;
351		dns_rdata_t *rdata = NULL;
352		dns_rriterator_current(&s->it, &name_dummy, &ttl_dummy, NULL,
353				       &rdata);
354		if (rdata->type != dns_rdatatype_soa) {
355			break;
356		}
357		result = dns_rriterator_next(&s->it);
358		if (result != ISC_R_SUCCESS) {
359			break;
360		}
361	}
362	return (result);
363}
364
365static isc_result_t
366axfr_rrstream_next(rrstream_t *rs) {
367	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
368	isc_result_t result;
369
370	/* Skip SOA records. */
371	for (;;) {
372		dns_name_t *name_dummy = NULL;
373		uint32_t ttl_dummy;
374		dns_rdata_t *rdata = NULL;
375		result = dns_rriterator_next(&s->it);
376		if (result != ISC_R_SUCCESS) {
377			break;
378		}
379		dns_rriterator_current(&s->it, &name_dummy, &ttl_dummy, NULL,
380				       &rdata);
381		if (rdata->type != dns_rdatatype_soa) {
382			break;
383		}
384	}
385	return (result);
386}
387
388static void
389axfr_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
390		      dns_rdata_t **rdata) {
391	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
392	dns_rriterator_current(&s->it, name, ttl, NULL, rdata);
393}
394
395static void
396axfr_rrstream_pause(rrstream_t *rs) {
397	axfr_rrstream_t *s = (axfr_rrstream_t *)rs;
398	dns_rriterator_pause(&s->it);
399}
400
401static void
402axfr_rrstream_destroy(rrstream_t **rsp) {
403	axfr_rrstream_t *s = (axfr_rrstream_t *)*rsp;
404	if (s->it_valid) {
405		dns_rriterator_destroy(&s->it);
406	}
407	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
408}
409
410static rrstream_methods_t axfr_rrstream_methods = {
411	axfr_rrstream_first, axfr_rrstream_next, axfr_rrstream_current,
412	axfr_rrstream_pause, axfr_rrstream_destroy
413};
414
415/**************************************************************************/
416/*
417 * An 'soa_rrstream_t' is a degenerate 'rrstream_t' that returns
418 * a single SOA record.
419 */
420
421typedef struct soa_rrstream {
422	rrstream_t common;
423	dns_difftuple_t *soa_tuple;
424} soa_rrstream_t;
425
426/*
427 * Forward declarations.
428 */
429static void
430soa_rrstream_destroy(rrstream_t **rsp);
431
432static rrstream_methods_t soa_rrstream_methods;
433
434static isc_result_t
435soa_rrstream_create(isc_mem_t *mctx, dns_db_t *db, dns_dbversion_t *ver,
436		    rrstream_t **sp) {
437	soa_rrstream_t *s;
438	isc_result_t result;
439
440	INSIST(sp != NULL && *sp == NULL);
441
442	s = isc_mem_get(mctx, sizeof(*s));
443	s->common.mctx = NULL;
444	isc_mem_attach(mctx, &s->common.mctx);
445	s->common.methods = &soa_rrstream_methods;
446	s->soa_tuple = NULL;
447
448	CHECK(dns_db_createsoatuple(db, ver, mctx, DNS_DIFFOP_EXISTS,
449				    &s->soa_tuple));
450
451	*sp = (rrstream_t *)s;
452	return (ISC_R_SUCCESS);
453
454failure:
455	soa_rrstream_destroy((rrstream_t **)(void *)&s);
456	return (result);
457}
458
459static isc_result_t
460soa_rrstream_first(rrstream_t *rs) {
461	UNUSED(rs);
462	return (ISC_R_SUCCESS);
463}
464
465static isc_result_t
466soa_rrstream_next(rrstream_t *rs) {
467	UNUSED(rs);
468	return (ISC_R_NOMORE);
469}
470
471static void
472soa_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
473		     dns_rdata_t **rdata) {
474	soa_rrstream_t *s = (soa_rrstream_t *)rs;
475	*name = &s->soa_tuple->name;
476	*ttl = s->soa_tuple->ttl;
477	*rdata = &s->soa_tuple->rdata;
478}
479
480static void
481soa_rrstream_destroy(rrstream_t **rsp) {
482	soa_rrstream_t *s = (soa_rrstream_t *)*rsp;
483	if (s->soa_tuple != NULL) {
484		dns_difftuple_free(&s->soa_tuple);
485	}
486	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
487}
488
489static rrstream_methods_t soa_rrstream_methods = {
490	soa_rrstream_first, soa_rrstream_next, soa_rrstream_current,
491	rrstream_noop_pause, soa_rrstream_destroy
492};
493
494/**************************************************************************/
495/*
496 * A 'compound_rrstream_t' objects owns a soa_rrstream
497 * and another rrstream, the "data stream".  It returns
498 * a concatenated stream consisting of the soa_rrstream, then
499 * the data stream, then the soa_rrstream again.
500 *
501 * The component streams are owned by the compound_rrstream_t
502 * and are destroyed with it.
503 */
504
505typedef struct compound_rrstream {
506	rrstream_t common;
507	rrstream_t *components[3];
508	int state;
509	isc_result_t result;
510} compound_rrstream_t;
511
512/*
513 * Forward declarations.
514 */
515static void
516compound_rrstream_destroy(rrstream_t **rsp);
517
518static isc_result_t
519compound_rrstream_next(rrstream_t *rs);
520
521static rrstream_methods_t compound_rrstream_methods;
522
523/*
524 * Requires:
525 *	soa_stream != NULL && *soa_stream != NULL
526 *	data_stream != NULL && *data_stream != NULL
527 *	sp != NULL && *sp == NULL
528 *
529 * Ensures:
530 *	*soa_stream == NULL
531 *	*data_stream == NULL
532 *	*sp points to a valid compound_rrstream_t
533 *	The soa and data streams will be destroyed
534 *	when the compound_rrstream_t is destroyed.
535 */
536static isc_result_t
537compound_rrstream_create(isc_mem_t *mctx, rrstream_t **soa_stream,
538			 rrstream_t **data_stream, rrstream_t **sp) {
539	compound_rrstream_t *s;
540
541	INSIST(sp != NULL && *sp == NULL);
542
543	s = isc_mem_get(mctx, sizeof(*s));
544	s->common.mctx = NULL;
545	isc_mem_attach(mctx, &s->common.mctx);
546	s->common.methods = &compound_rrstream_methods;
547	s->components[0] = *soa_stream;
548	s->components[1] = *data_stream;
549	s->components[2] = *soa_stream;
550	s->state = -1;
551	s->result = ISC_R_FAILURE;
552
553	*data_stream = NULL;
554	*soa_stream = NULL;
555	*sp = (rrstream_t *)s;
556	return (ISC_R_SUCCESS);
557}
558
559static isc_result_t
560compound_rrstream_first(rrstream_t *rs) {
561	compound_rrstream_t *s = (compound_rrstream_t *)rs;
562	s->state = 0;
563	do {
564		rrstream_t *curstream = s->components[s->state];
565		s->result = curstream->methods->first(curstream);
566	} while (s->result == ISC_R_NOMORE && s->state < 2);
567	return (s->result);
568}
569
570static isc_result_t
571compound_rrstream_next(rrstream_t *rs) {
572	compound_rrstream_t *s = (compound_rrstream_t *)rs;
573	rrstream_t *curstream = s->components[s->state];
574	s->result = curstream->methods->next(curstream);
575	while (s->result == ISC_R_NOMORE) {
576		/*
577		 * Make sure locks held by the current stream
578		 * are released before we switch streams.
579		 */
580		curstream->methods->pause(curstream);
581		if (s->state == 2) {
582			return (ISC_R_NOMORE);
583		}
584		s->state++;
585		curstream = s->components[s->state];
586		s->result = curstream->methods->first(curstream);
587	}
588	return (s->result);
589}
590
591static void
592compound_rrstream_current(rrstream_t *rs, dns_name_t **name, uint32_t *ttl,
593			  dns_rdata_t **rdata) {
594	compound_rrstream_t *s = (compound_rrstream_t *)rs;
595	rrstream_t *curstream;
596	INSIST(0 <= s->state && s->state < 3);
597	INSIST(s->result == ISC_R_SUCCESS);
598	curstream = s->components[s->state];
599	curstream->methods->current(curstream, name, ttl, rdata);
600}
601
602static void
603compound_rrstream_pause(rrstream_t *rs) {
604	compound_rrstream_t *s = (compound_rrstream_t *)rs;
605	rrstream_t *curstream;
606	INSIST(0 <= s->state && s->state < 3);
607	curstream = s->components[s->state];
608	curstream->methods->pause(curstream);
609}
610
611static void
612compound_rrstream_destroy(rrstream_t **rsp) {
613	compound_rrstream_t *s = (compound_rrstream_t *)*rsp;
614	s->components[0]->methods->destroy(&s->components[0]);
615	s->components[1]->methods->destroy(&s->components[1]);
616	s->components[2] = NULL; /* Copy of components[0]. */
617	isc_mem_putanddetach(&s->common.mctx, s, sizeof(*s));
618}
619
620static rrstream_methods_t compound_rrstream_methods = {
621	compound_rrstream_first, compound_rrstream_next,
622	compound_rrstream_current, compound_rrstream_pause,
623	compound_rrstream_destroy
624};
625
626/**************************************************************************/
627
628/*%
629 * Structure holding outgoing transfer statistics
630 */
631struct xfr_stats {
632	uint64_t nmsg;	  /*%< Number of messages sent */
633	uint64_t nrecs;	  /*%< Number of records sent */
634	uint64_t nbytes;  /*%< Number of bytes sent */
635	isc_time_t start; /*%< Start time of the transfer */
636	isc_time_t end;	  /*%< End time of the transfer */
637};
638
639/*%
640 * An 'xfrout_ctx_t' contains the state of an outgoing AXFR or IXFR
641 * in progress.
642 */
643typedef struct {
644	isc_mem_t *mctx;
645	ns_client_t *client;
646	unsigned int id;       /* ID of request */
647	dns_name_t *qname;     /* Question name of request */
648	dns_rdatatype_t qtype; /* dns_rdatatype_{a,i}xfr */
649	dns_rdataclass_t qclass;
650	dns_zone_t *zone; /* (necessary for stats) */
651	dns_db_t *db;
652	dns_dbversion_t *ver;
653	isc_quota_t *quota;
654	rrstream_t *stream;  /* The XFR RR stream */
655	bool question_added; /* QUESTION section sent? */
656	bool end_of_stream;  /* EOS has been reached */
657	isc_buffer_t buf;    /* Buffer for message owner
658			      * names and rdatas */
659	isc_buffer_t txbuf;  /* Transmit message buffer */
660	size_t cbytes;	     /* Length of current message */
661	void *txmem;
662	unsigned int txmemlen;
663	dns_tsigkey_t *tsigkey; /* Key used to create TSIG */
664	isc_buffer_t *lasttsig; /* the last TSIG */
665	bool verified_tsig;	/* verified request MAC */
666	bool many_answers;
667	int sends; /* Send in progress */
668	bool shuttingdown;
669	bool poll;
670	const char *mnemonic;	/* Style of transfer */
671	uint32_t end_serial;	/* Serial number after XFR is done */
672	struct xfr_stats stats; /*%< Transfer statistics */
673} xfrout_ctx_t;
674
675static void
676xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id,
677		  dns_name_t *qname, dns_rdatatype_t qtype,
678		  dns_rdataclass_t qclass, dns_zone_t *zone, dns_db_t *db,
679		  dns_dbversion_t *ver, isc_quota_t *quota, rrstream_t *stream,
680		  dns_tsigkey_t *tsigkey, isc_buffer_t *lasttsig,
681		  bool verified_tsig, unsigned int maxtime,
682		  unsigned int idletime, bool many_answers,
683		  xfrout_ctx_t **xfrp);
684
685static void
686sendstream(xfrout_ctx_t *xfr);
687
688static void
689xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg);
690
691static void
692xfrout_fail(xfrout_ctx_t *xfr, isc_result_t result, const char *msg);
693
694static void
695xfrout_maybe_destroy(xfrout_ctx_t *xfr);
696
697static void
698xfrout_ctx_destroy(xfrout_ctx_t **xfrp);
699
700static void
701xfrout_client_shutdown(void *arg, isc_result_t result);
702
703static void
704xfrout_log1(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
705	    int level, const char *fmt, ...) ISC_FORMAT_PRINTF(5, 6);
706
707static void
708xfrout_log(xfrout_ctx_t *xfr, int level, const char *fmt, ...)
709	ISC_FORMAT_PRINTF(3, 4);
710
711/**************************************************************************/
712
713void
714ns_xfr_start(ns_client_t *client, dns_rdatatype_t reqtype) {
715	isc_result_t result;
716	dns_name_t *question_name;
717	dns_rdataset_t *question_rdataset;
718	dns_zone_t *zone = NULL, *raw = NULL, *mayberaw;
719	dns_db_t *db = NULL;
720	dns_dbversion_t *ver = NULL;
721	dns_rdataclass_t question_class;
722	rrstream_t *soa_stream = NULL;
723	rrstream_t *data_stream = NULL;
724	rrstream_t *stream = NULL;
725	dns_difftuple_t *current_soa_tuple = NULL;
726	dns_name_t *soa_name;
727	dns_rdataset_t *soa_rdataset;
728	dns_rdata_t soa_rdata = DNS_RDATA_INIT;
729	bool have_soa = false;
730	const char *mnemonic = NULL;
731	isc_mem_t *mctx = client->mctx;
732	dns_message_t *request = client->message;
733	xfrout_ctx_t *xfr = NULL;
734	isc_quota_t *quota = NULL;
735	dns_transfer_format_t format = client->view->transfer_format;
736	isc_netaddr_t na;
737	dns_peer_t *peer = NULL;
738	isc_buffer_t *tsigbuf = NULL;
739	char *journalfile;
740	char msg[NS_CLIENT_ACLMSGSIZE("zone transfer")];
741	char keyname[DNS_NAME_FORMATSIZE];
742	bool is_poll = false;
743	bool is_dlz = false;
744	bool is_ixfr = false;
745	bool useviewacl = false;
746	uint32_t begin_serial = 0, current_serial;
747
748	switch (reqtype) {
749	case dns_rdatatype_axfr:
750		mnemonic = "AXFR";
751		break;
752	case dns_rdatatype_ixfr:
753		mnemonic = "IXFR";
754		break;
755	default:
756		INSIST(0);
757		ISC_UNREACHABLE();
758	}
759
760	ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT, NS_LOGMODULE_XFER_OUT,
761		      ISC_LOG_DEBUG(6), "%s request", mnemonic);
762	/*
763	 * Apply quota.
764	 */
765	result = isc_quota_attach(&client->sctx->xfroutquota, &quota);
766	if (result != ISC_R_SUCCESS) {
767		isc_log_write(XFROUT_COMMON_LOGARGS, ISC_LOG_WARNING,
768			      "%s request denied: %s", mnemonic,
769			      isc_result_totext(result));
770		goto failure;
771	}
772
773	/*
774	 * Interpret the question section.
775	 */
776	result = dns_message_firstname(request, DNS_SECTION_QUESTION);
777	INSIST(result == ISC_R_SUCCESS);
778
779	/*
780	 * The question section must contain exactly one question, and
781	 * it must be for AXFR/IXFR as appropriate.
782	 */
783	question_name = NULL;
784	dns_message_currentname(request, DNS_SECTION_QUESTION, &question_name);
785	question_rdataset = ISC_LIST_HEAD(question_name->list);
786	question_class = question_rdataset->rdclass;
787	INSIST(question_rdataset->type == reqtype);
788	if (ISC_LIST_NEXT(question_rdataset, link) != NULL) {
789		FAILC(DNS_R_FORMERR, "multiple questions");
790	}
791	result = dns_message_nextname(request, DNS_SECTION_QUESTION);
792	if (result != ISC_R_NOMORE) {
793		FAILC(DNS_R_FORMERR, "multiple questions");
794	}
795
796	result = dns_zt_find(client->view->zonetable, question_name, 0, NULL,
797			     &zone);
798
799	if (result != ISC_R_SUCCESS || dns_zone_gettype(zone) == dns_zone_dlz) {
800		/*
801		 * The normal zone table does not have a match, or this is
802		 * marked in the zone table as a DLZ zone. Check the DLZ
803		 * databases for a match.
804		 */
805		if (!ISC_LIST_EMPTY(client->view->dlz_searched)) {
806			result = dns_dlzallowzonexfr(client->view,
807						     question_name,
808						     &client->peeraddr, &db);
809			if (result == ISC_R_DEFAULT) {
810				useviewacl = true;
811				result = ISC_R_SUCCESS;
812			}
813			if (result == ISC_R_NOPERM) {
814				char _buf1[DNS_NAME_FORMATSIZE];
815				char _buf2[DNS_RDATACLASS_FORMATSIZE];
816
817				result = DNS_R_REFUSED;
818				dns_name_format(question_name, _buf1,
819						sizeof(_buf1));
820				dns_rdataclass_format(question_class, _buf2,
821						      sizeof(_buf2));
822				ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
823					      NS_LOGMODULE_XFER_OUT,
824					      ISC_LOG_ERROR,
825					      "zone transfer '%s/%s' denied",
826					      _buf1, _buf2);
827				pfilter_notify(result, client, "zonexfr");
828				goto failure;
829			}
830			if (result != ISC_R_SUCCESS) {
831				pfilter_notify(result, client, "zonexfr");
832				FAILQ(DNS_R_NOTAUTH, "non-authoritative zone",
833				      question_name, question_class);
834			}
835			is_dlz = true;
836		} else {
837			/*
838			 * not DLZ and not in normal zone table, we are
839			 * not authoritative
840			 */
841			FAILQ(DNS_R_NOTAUTH, "non-authoritative zone",
842			      question_name, question_class);
843		}
844	} else {
845		/* zone table has a match */
846		switch (dns_zone_gettype(zone)) {
847		/*
848		 * Master, slave, and mirror zones are OK for transfer.
849		 */
850		case dns_zone_master:
851		case dns_zone_slave:
852		case dns_zone_mirror:
853		case dns_zone_dlz:
854			break;
855		default:
856			FAILQ(DNS_R_NOTAUTH, "non-authoritative zone",
857			      question_name, question_class);
858		}
859		CHECK(dns_zone_getdb(zone, &db));
860		dns_db_currentversion(db, &ver);
861	}
862
863	xfrout_log1(client, question_name, question_class, ISC_LOG_DEBUG(6),
864		    "%s question section OK", mnemonic);
865
866	/*
867	 * Check the authority section.  Look for a SOA record with
868	 * the same name and class as the question.
869	 */
870	for (result = dns_message_firstname(request, DNS_SECTION_AUTHORITY);
871	     result == ISC_R_SUCCESS;
872	     result = dns_message_nextname(request, DNS_SECTION_AUTHORITY))
873	{
874		soa_name = NULL;
875		dns_message_currentname(request, DNS_SECTION_AUTHORITY,
876					&soa_name);
877
878		/*
879		 * Ignore data whose owner name is not the zone apex.
880		 */
881		if (!dns_name_equal(soa_name, question_name)) {
882			continue;
883		}
884
885		for (soa_rdataset = ISC_LIST_HEAD(soa_name->list);
886		     soa_rdataset != NULL;
887		     soa_rdataset = ISC_LIST_NEXT(soa_rdataset, link))
888		{
889			/*
890			 * Ignore non-SOA data.
891			 */
892			if (soa_rdataset->type != dns_rdatatype_soa) {
893				continue;
894			}
895			if (soa_rdataset->rdclass != question_class) {
896				continue;
897			}
898
899			CHECK(dns_rdataset_first(soa_rdataset));
900			dns_rdataset_current(soa_rdataset, &soa_rdata);
901			result = dns_rdataset_next(soa_rdataset);
902			if (result == ISC_R_SUCCESS) {
903				FAILC(DNS_R_FORMERR, "IXFR authority section "
904						     "has multiple SOAs");
905			}
906			have_soa = true;
907			goto got_soa;
908		}
909	}
910got_soa:
911	if (result != ISC_R_NOMORE) {
912		CHECK(result);
913	}
914
915	xfrout_log1(client, question_name, question_class, ISC_LOG_DEBUG(6),
916		    "%s authority section OK", mnemonic);
917
918	/*
919	 * If not a DLZ zone or we are falling back to the view's transfer
920	 * ACL, decide whether to allow this transfer.
921	 */
922	if (!is_dlz || useviewacl) {
923		dns_acl_t *acl;
924
925		ns_client_aclmsg("zone transfer", question_name, reqtype,
926				 client->view->rdclass, msg, sizeof(msg));
927		if (useviewacl) {
928			acl = client->view->transferacl;
929		} else {
930			acl = dns_zone_getxfracl(zone);
931		}
932		CHECK(ns_client_checkacl(client, NULL, msg, acl, true,
933					 ISC_LOG_ERROR));
934	}
935
936	/*
937	 * AXFR over UDP is not possible.
938	 */
939	if (reqtype == dns_rdatatype_axfr &&
940	    (client->attributes & NS_CLIENTATTR_TCP) == 0)
941	{
942		FAILC(DNS_R_FORMERR, "attempted AXFR over UDP");
943	}
944
945	/*
946	 * Look up the requesting server in the peer table.
947	 */
948	isc_netaddr_fromsockaddr(&na, &client->peeraddr);
949	(void)dns_peerlist_peerbyaddr(client->view->peers, &na, &peer);
950
951	/*
952	 * Decide on the transfer format (one-answer or many-answers).
953	 */
954	if (peer != NULL) {
955		(void)dns_peer_gettransferformat(peer, &format);
956	}
957
958	/*
959	 * Get a dynamically allocated copy of the current SOA.
960	 */
961	if (is_dlz) {
962		dns_db_currentversion(db, &ver);
963	}
964
965	CHECK(dns_db_createsoatuple(db, ver, mctx, DNS_DIFFOP_EXISTS,
966				    &current_soa_tuple));
967
968	current_serial = dns_soa_getserial(&current_soa_tuple->rdata);
969	if (reqtype == dns_rdatatype_ixfr) {
970		size_t jsize;
971		uint64_t dbsize;
972
973		/*
974		 * Outgoing IXFR may have been disabled for this peer
975		 * or globally.
976		 */
977		if ((client->attributes & NS_CLIENTATTR_TCP) != 0) {
978			bool provide_ixfr;
979
980			provide_ixfr = client->view->provideixfr;
981			if (peer != NULL) {
982				(void)dns_peer_getprovideixfr(peer,
983							      &provide_ixfr);
984			}
985			if (provide_ixfr == false) {
986				goto axfr_fallback;
987			}
988		}
989
990		if (!have_soa) {
991			FAILC(DNS_R_FORMERR, "IXFR request missing SOA");
992		}
993
994		begin_serial = dns_soa_getserial(&soa_rdata);
995
996		/*
997		 * RFC1995 says "If an IXFR query with the same or
998		 * newer version number than that of the server
999		 * is received, it is replied to with a single SOA
1000		 * record of the server's current version, just as
1001		 * in AXFR".  The claim about AXFR is incorrect,
1002		 * but other than that, we do as the RFC says.
1003		 *
1004		 * Sending a single SOA record is also how we refuse
1005		 * IXFR over UDP (currently, we always do).
1006		 */
1007		if (DNS_SERIAL_GE(begin_serial, current_serial) ||
1008		    (client->attributes & NS_CLIENTATTR_TCP) == 0)
1009		{
1010			CHECK(soa_rrstream_create(mctx, db, ver, &stream));
1011			is_poll = true;
1012			goto have_stream;
1013		}
1014
1015		/*
1016		 * Outgoing IXFR may have been disabled for this peer
1017		 * or globally.
1018		 */
1019		if ((client->attributes & NS_CLIENTATTR_TCP) != 0) {
1020			bool provide_ixfr;
1021
1022			provide_ixfr = client->view->provideixfr;
1023			if (peer != NULL) {
1024				(void)dns_peer_getprovideixfr(peer,
1025							      &provide_ixfr);
1026			}
1027			if (!provide_ixfr) {
1028				xfrout_log1(client, question_name,
1029					    question_class, ISC_LOG_DEBUG(4),
1030					    "IXFR delta response disabled due "
1031					    "to 'provide-ixfr no;' being set");
1032				mnemonic = "AXFR-style IXFR";
1033				goto axfr_fallback;
1034			}
1035		}
1036
1037		journalfile = is_dlz ? NULL : dns_zone_getjournal(zone);
1038		if (journalfile != NULL) {
1039			result = ixfr_rrstream_create(
1040				mctx, journalfile, begin_serial, current_serial,
1041				&jsize, &data_stream);
1042		} else {
1043			result = ISC_R_NOTFOUND;
1044		}
1045		if (result == ISC_R_NOTFOUND || result == ISC_R_RANGE) {
1046			xfrout_log1(client, question_name, question_class,
1047				    ISC_LOG_DEBUG(4),
1048				    "IXFR version not in journal, "
1049				    "falling back to AXFR");
1050			mnemonic = "AXFR-style IXFR";
1051			goto axfr_fallback;
1052		}
1053		CHECK(result);
1054
1055		result = dns_db_getsize(db, ver, NULL, &dbsize);
1056		if (result == ISC_R_SUCCESS) {
1057			uint32_t ratio = dns_zone_getixfrratio(zone);
1058			if (ratio != 0 && ((100 * jsize) / dbsize) > ratio) {
1059				data_stream->methods->destroy(&data_stream);
1060				data_stream = NULL;
1061				xfrout_log1(client, question_name,
1062					    question_class, ISC_LOG_DEBUG(4),
1063					    "IXFR delta size (%zu bytes) "
1064					    "exceeds the maximum ratio to "
1065					    "database size "
1066					    "(%" PRIu64 " bytes), "
1067					    "falling back to AXFR",
1068					    jsize, dbsize);
1069				mnemonic = "AXFR-style IXFR";
1070				goto axfr_fallback;
1071			} else {
1072				xfrout_log1(client, question_name,
1073					    question_class, ISC_LOG_DEBUG(4),
1074					    "IXFR delta size (%zu bytes); "
1075					    "database size "
1076					    "(%" PRIu64 " bytes)",
1077					    jsize, dbsize);
1078			}
1079		}
1080		is_ixfr = true;
1081	} else {
1082	axfr_fallback:
1083		CHECK(axfr_rrstream_create(mctx, db, ver, &data_stream));
1084	}
1085
1086	/*
1087	 * Bracket the data stream with SOAs.
1088	 */
1089	CHECK(soa_rrstream_create(mctx, db, ver, &soa_stream));
1090	CHECK(compound_rrstream_create(mctx, &soa_stream, &data_stream,
1091				       &stream));
1092	soa_stream = NULL;
1093	data_stream = NULL;
1094
1095have_stream:
1096	CHECK(dns_message_getquerytsig(request, mctx, &tsigbuf));
1097	/*
1098	 * Create the xfrout context object.  This transfers the ownership
1099	 * of "stream", "db", "ver", and "quota" to the xfrout context object.
1100	 */
1101
1102	if (is_dlz) {
1103		xfrout_ctx_create(mctx, client, request->id, question_name,
1104				  reqtype, question_class, zone, db, ver, quota,
1105				  stream, dns_message_gettsigkey(request),
1106				  tsigbuf, request->verified_sig, 3600, 3600,
1107				  (format == dns_many_answers) ? true : false,
1108				  &xfr);
1109	} else {
1110		xfrout_ctx_create(
1111			mctx, client, request->id, question_name, reqtype,
1112			question_class, zone, db, ver, quota, stream,
1113			dns_message_gettsigkey(request), tsigbuf,
1114			request->verified_sig, dns_zone_getmaxxfrout(zone),
1115			dns_zone_getidleout(zone),
1116			(format == dns_many_answers) ? true : false, &xfr);
1117	}
1118
1119	xfr->end_serial = current_serial;
1120	xfr->mnemonic = mnemonic;
1121	stream = NULL;
1122	quota = NULL;
1123
1124	CHECK(xfr->stream->methods->first(xfr->stream));
1125
1126	if (xfr->tsigkey != NULL) {
1127		dns_name_format(&xfr->tsigkey->name, keyname, sizeof(keyname));
1128	} else {
1129		keyname[0] = '\0';
1130	}
1131	xfr->poll = is_poll;
1132	if (is_poll) {
1133		xfr->mnemonic = "IXFR poll response";
1134		xfrout_log1(client, question_name, question_class,
1135			    ISC_LOG_DEBUG(1), "IXFR poll up to date%s%s",
1136			    (xfr->tsigkey != NULL) ? ": TSIG " : "", keyname);
1137	} else if (is_ixfr) {
1138		xfrout_log1(client, question_name, question_class, ISC_LOG_INFO,
1139			    "%s started%s%s (serial %u -> %u)", mnemonic,
1140			    (xfr->tsigkey != NULL) ? ": TSIG " : "", keyname,
1141			    begin_serial, current_serial);
1142	} else {
1143		xfrout_log1(client, question_name, question_class, ISC_LOG_INFO,
1144			    "%s started%s%s (serial %u)", mnemonic,
1145			    (xfr->tsigkey != NULL) ? ": TSIG " : "", keyname,
1146			    current_serial);
1147	}
1148
1149	if (zone != NULL) {
1150		dns_zone_getraw(zone, &raw);
1151		mayberaw = (raw != NULL) ? raw : zone;
1152		if ((client->attributes & NS_CLIENTATTR_WANTEXPIRE) != 0 &&
1153		    (dns_zone_gettype(mayberaw) == dns_zone_slave ||
1154		     dns_zone_gettype(mayberaw) == dns_zone_mirror))
1155		{
1156			isc_time_t expiretime;
1157			uint32_t secs;
1158			dns_zone_getexpiretime(zone, &expiretime);
1159			secs = isc_time_seconds(&expiretime);
1160			if (secs >= client->now && result == ISC_R_SUCCESS) {
1161				client->attributes |= NS_CLIENTATTR_HAVEEXPIRE;
1162				client->expire = secs - client->now;
1163			}
1164		}
1165		if (raw != NULL) {
1166			dns_zone_detach(&raw);
1167		}
1168	}
1169
1170	/*
1171	 * Hand the context over to sendstream().  Set xfr to NULL;
1172	 * sendstream() is responsible for either passing the
1173	 * context on to a later event handler or destroying it.
1174	 */
1175	sendstream(xfr);
1176	xfr = NULL;
1177
1178	result = ISC_R_SUCCESS;
1179
1180failure:
1181	if (result == DNS_R_REFUSED) {
1182		inc_stats(client, zone, ns_statscounter_xfrrej);
1183	}
1184	if (quota != NULL) {
1185		isc_quota_detach(&quota);
1186	}
1187	if (current_soa_tuple != NULL) {
1188		dns_difftuple_free(&current_soa_tuple);
1189	}
1190	if (stream != NULL) {
1191		stream->methods->destroy(&stream);
1192	}
1193	if (soa_stream != NULL) {
1194		soa_stream->methods->destroy(&soa_stream);
1195	}
1196	if (data_stream != NULL) {
1197		data_stream->methods->destroy(&data_stream);
1198	}
1199	if (ver != NULL) {
1200		dns_db_closeversion(db, &ver, false);
1201	}
1202	if (db != NULL) {
1203		dns_db_detach(&db);
1204	}
1205	if (zone != NULL) {
1206		dns_zone_detach(&zone);
1207	}
1208	/* XXX kludge */
1209	if (xfr != NULL) {
1210		xfrout_fail(xfr, result, "setting up zone transfer");
1211	} else if (result != ISC_R_SUCCESS) {
1212		ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT,
1213			      NS_LOGMODULE_XFER_OUT, ISC_LOG_DEBUG(3),
1214			      "zone transfer setup failed");
1215		ns_client_error(client, result);
1216		isc_nmhandle_detach(&client->reqhandle);
1217	}
1218}
1219
1220static void
1221xfrout_ctx_create(isc_mem_t *mctx, ns_client_t *client, unsigned int id,
1222		  dns_name_t *qname, dns_rdatatype_t qtype,
1223		  dns_rdataclass_t qclass, dns_zone_t *zone, dns_db_t *db,
1224		  dns_dbversion_t *ver, isc_quota_t *quota, rrstream_t *stream,
1225		  dns_tsigkey_t *tsigkey, isc_buffer_t *lasttsig,
1226		  bool verified_tsig, unsigned int maxtime,
1227		  unsigned int idletime, bool many_answers,
1228		  xfrout_ctx_t **xfrp) {
1229	xfrout_ctx_t *xfr;
1230	unsigned int len;
1231	void *mem;
1232
1233	REQUIRE(xfrp != NULL && *xfrp == NULL);
1234
1235	UNUSED(maxtime);
1236	UNUSED(idletime);
1237
1238	xfr = isc_mem_get(mctx, sizeof(*xfr));
1239	xfr->mctx = NULL;
1240	isc_mem_attach(mctx, &xfr->mctx);
1241	xfr->client = client;
1242	xfr->id = id;
1243	xfr->qname = qname;
1244	xfr->qtype = qtype;
1245	xfr->qclass = qclass;
1246	xfr->zone = NULL;
1247	xfr->db = NULL;
1248	xfr->ver = NULL;
1249	if (zone != NULL) { /* zone will be NULL if it's DLZ */
1250		dns_zone_attach(zone, &xfr->zone);
1251	}
1252	dns_db_attach(db, &xfr->db);
1253	dns_db_attachversion(db, ver, &xfr->ver);
1254	xfr->question_added = false;
1255	xfr->end_of_stream = false;
1256	xfr->tsigkey = tsigkey;
1257	xfr->lasttsig = lasttsig;
1258	xfr->verified_tsig = verified_tsig;
1259	xfr->many_answers = many_answers;
1260	xfr->sends = 0;
1261	xfr->shuttingdown = false;
1262	xfr->poll = false;
1263	xfr->mnemonic = NULL;
1264	xfr->buf.base = NULL;
1265	xfr->buf.length = 0;
1266	xfr->txmem = NULL;
1267	xfr->txmemlen = 0;
1268	xfr->stream = NULL;
1269	xfr->quota = NULL;
1270
1271	xfr->stats.nmsg = 0;
1272	xfr->stats.nrecs = 0;
1273	xfr->stats.nbytes = 0;
1274	isc_time_now(&xfr->stats.start);
1275
1276	/*
1277	 * Allocate a temporary buffer for the uncompressed response
1278	 * message data.  The size should be no more than 65535 bytes
1279	 * so that the compressed data will fit in a TCP message,
1280	 * and no less than 65535 bytes so that an almost maximum-sized
1281	 * RR will fit.  Note that although 65535-byte RRs are allowed
1282	 * in principle, they cannot be zone-transferred (at least not
1283	 * if uncompressible), because the message and RR headers would
1284	 * push the size of the TCP message over the 65536 byte limit.
1285	 */
1286	len = 65535;
1287	mem = isc_mem_get(mctx, len);
1288	isc_buffer_init(&xfr->buf, mem, len);
1289
1290	/*
1291	 * Allocate another temporary buffer for the compressed
1292	 * response message.
1293	 */
1294	len = NS_CLIENT_TCP_BUFFER_SIZE;
1295	mem = isc_mem_get(mctx, len);
1296	isc_buffer_init(&xfr->txbuf, (char *)mem, len);
1297	xfr->txmem = mem;
1298	xfr->txmemlen = len;
1299
1300	/*
1301	 * Register a shutdown callback with the client, so that we
1302	 * can stop the transfer immediately when the client task
1303	 * gets a shutdown event.
1304	 */
1305	xfr->client->shutdown = xfrout_client_shutdown;
1306	xfr->client->shutdown_arg = xfr;
1307	/*
1308	 * These MUST be after the last "goto failure;" / CHECK to
1309	 * prevent a double free by the caller.
1310	 */
1311	xfr->quota = quota;
1312	xfr->stream = stream;
1313
1314	*xfrp = xfr;
1315}
1316
1317/*
1318 * Arrange to send as much as we can of "stream" without blocking.
1319 *
1320 * Requires:
1321 *	The stream iterator is initialized and points at an RR,
1322 *      or possibly at the end of the stream (that is, the
1323 *      _first method of the iterator has been called).
1324 */
1325static void
1326sendstream(xfrout_ctx_t *xfr) {
1327	dns_message_t *tcpmsg = NULL;
1328	dns_message_t *msg = NULL; /* Client message if UDP, tcpmsg if TCP */
1329	isc_result_t result;
1330	dns_rdataset_t *qrdataset;
1331	dns_name_t *msgname = NULL;
1332	dns_rdata_t *msgrdata = NULL;
1333	dns_rdatalist_t *msgrdl = NULL;
1334	dns_rdataset_t *msgrds = NULL;
1335	dns_compress_t cctx;
1336	bool cleanup_cctx = false;
1337	bool is_tcp;
1338	int n_rrs;
1339
1340	isc_buffer_clear(&xfr->buf);
1341	isc_buffer_clear(&xfr->txbuf);
1342
1343	is_tcp = ((xfr->client->attributes & NS_CLIENTATTR_TCP) != 0);
1344	if (!is_tcp) {
1345		/*
1346		 * In the UDP case, we put the response data directly into
1347		 * the client message.
1348		 */
1349		msg = xfr->client->message;
1350		CHECK(dns_message_reply(msg, true));
1351	} else {
1352		/*
1353		 * TCP. Build a response dns_message_t, temporarily storing
1354		 * the raw, uncompressed owner names and RR data contiguously
1355		 * in xfr->buf.  We know that if the uncompressed data fits
1356		 * in xfr->buf, the compressed data will surely fit in a TCP
1357		 * message.
1358		 */
1359
1360		dns_message_create(xfr->mctx, DNS_MESSAGE_INTENTRENDER,
1361				   &tcpmsg);
1362		msg = tcpmsg;
1363
1364		msg->id = xfr->id;
1365		msg->rcode = dns_rcode_noerror;
1366		msg->flags = DNS_MESSAGEFLAG_QR | DNS_MESSAGEFLAG_AA;
1367		if ((xfr->client->attributes & NS_CLIENTATTR_RA) != 0) {
1368			msg->flags |= DNS_MESSAGEFLAG_RA;
1369		}
1370		CHECK(dns_message_settsigkey(msg, xfr->tsigkey));
1371		CHECK(dns_message_setquerytsig(msg, xfr->lasttsig));
1372		if (xfr->lasttsig != NULL) {
1373			isc_buffer_free(&xfr->lasttsig);
1374		}
1375		msg->verified_sig = xfr->verified_tsig;
1376
1377		/*
1378		 * Add a EDNS option to the message?
1379		 */
1380		if ((xfr->client->attributes & NS_CLIENTATTR_WANTOPT) != 0) {
1381			dns_rdataset_t *opt = NULL;
1382
1383			CHECK(ns_client_addopt(xfr->client, msg, &opt));
1384			CHECK(dns_message_setopt(msg, opt));
1385			/*
1386			 * Add to first message only.
1387			 */
1388			xfr->client->attributes &= ~NS_CLIENTATTR_WANTNSID;
1389			xfr->client->attributes &= ~NS_CLIENTATTR_HAVEEXPIRE;
1390		}
1391
1392		/*
1393		 * Account for reserved space.
1394		 */
1395		if (xfr->tsigkey != NULL) {
1396			INSIST(msg->reserved != 0U);
1397		}
1398		isc_buffer_add(&xfr->buf, msg->reserved);
1399
1400		/*
1401		 * Include a question section in the first message only.
1402		 * BIND 8.2.1 will not recognize an IXFR if it does not
1403		 * have a question section.
1404		 */
1405		if (!xfr->question_added) {
1406			dns_name_t *qname = NULL;
1407			isc_region_t r;
1408
1409			/*
1410			 * Reserve space for the 12-byte message header
1411			 * and 4 bytes of question.
1412			 */
1413			isc_buffer_add(&xfr->buf, 12 + 4);
1414
1415			qrdataset = NULL;
1416			result = dns_message_gettemprdataset(msg, &qrdataset);
1417			if (result != ISC_R_SUCCESS) {
1418				goto failure;
1419			}
1420			dns_rdataset_makequestion(qrdataset,
1421						  xfr->client->message->rdclass,
1422						  xfr->qtype);
1423
1424			result = dns_message_gettempname(msg, &qname);
1425			if (result != ISC_R_SUCCESS) {
1426				goto failure;
1427			}
1428			dns_name_init(qname, NULL);
1429			isc_buffer_availableregion(&xfr->buf, &r);
1430			INSIST(r.length >= xfr->qname->length);
1431			r.length = xfr->qname->length;
1432			isc_buffer_putmem(&xfr->buf, xfr->qname->ndata,
1433					  xfr->qname->length);
1434			dns_name_fromregion(qname, &r);
1435			ISC_LIST_INIT(qname->list);
1436			ISC_LIST_APPEND(qname->list, qrdataset, link);
1437
1438			dns_message_addname(msg, qname, DNS_SECTION_QUESTION);
1439			xfr->question_added = true;
1440		} else {
1441			/*
1442			 * Reserve space for the 12-byte message header
1443			 */
1444			isc_buffer_add(&xfr->buf, 12);
1445			msg->tcp_continuation = 1;
1446		}
1447	}
1448
1449	/*
1450	 * Try to fit in as many RRs as possible, unless "one-answer"
1451	 * format has been requested.
1452	 */
1453	for (n_rrs = 0;; n_rrs++) {
1454		dns_name_t *name = NULL;
1455		uint32_t ttl;
1456		dns_rdata_t *rdata = NULL;
1457
1458		unsigned int size;
1459		isc_region_t r;
1460
1461		msgname = NULL;
1462		msgrdata = NULL;
1463		msgrdl = NULL;
1464		msgrds = NULL;
1465
1466		xfr->stream->methods->current(xfr->stream, &name, &ttl, &rdata);
1467		size = name->length + 10 + rdata->length;
1468		isc_buffer_availableregion(&xfr->buf, &r);
1469		if (size >= r.length) {
1470			/*
1471			 * RR would not fit.  If there are other RRs in the
1472			 * buffer, send them now and leave this RR to the
1473			 * next message.  If this RR overflows the buffer
1474			 * all by itself, fail.
1475			 *
1476			 * In theory some RRs might fit in a TCP message
1477			 * when compressed even if they do not fit when
1478			 * uncompressed, but surely we don't want
1479			 * to send such monstrosities to an unsuspecting
1480			 * slave.
1481			 */
1482			if (n_rrs == 0) {
1483				xfrout_log(xfr, ISC_LOG_WARNING,
1484					   "RR too large for zone transfer "
1485					   "(%d bytes)",
1486					   size);
1487				/* XXX DNS_R_RRTOOLARGE? */
1488				result = ISC_R_NOSPACE;
1489				goto failure;
1490			}
1491			break;
1492		}
1493
1494		if (isc_log_wouldlog(ns_lctx, XFROUT_RR_LOGLEVEL)) {
1495			log_rr(name, rdata, ttl); /* XXX */
1496		}
1497
1498		result = dns_message_gettempname(msg, &msgname);
1499		if (result != ISC_R_SUCCESS) {
1500			goto failure;
1501		}
1502		dns_name_init(msgname, NULL);
1503		isc_buffer_availableregion(&xfr->buf, &r);
1504		INSIST(r.length >= name->length);
1505		r.length = name->length;
1506		isc_buffer_putmem(&xfr->buf, name->ndata, name->length);
1507		dns_name_fromregion(msgname, &r);
1508
1509		/* Reserve space for RR header. */
1510		isc_buffer_add(&xfr->buf, 10);
1511
1512		result = dns_message_gettemprdata(msg, &msgrdata);
1513		if (result != ISC_R_SUCCESS) {
1514			goto failure;
1515		}
1516		isc_buffer_availableregion(&xfr->buf, &r);
1517		r.length = rdata->length;
1518		isc_buffer_putmem(&xfr->buf, rdata->data, rdata->length);
1519		dns_rdata_init(msgrdata);
1520		dns_rdata_fromregion(msgrdata, rdata->rdclass, rdata->type, &r);
1521
1522		result = dns_message_gettemprdatalist(msg, &msgrdl);
1523		if (result != ISC_R_SUCCESS) {
1524			goto failure;
1525		}
1526		msgrdl->type = rdata->type;
1527		msgrdl->rdclass = rdata->rdclass;
1528		msgrdl->ttl = ttl;
1529		if (rdata->type == dns_rdatatype_sig ||
1530		    rdata->type == dns_rdatatype_rrsig) {
1531			msgrdl->covers = dns_rdata_covers(rdata);
1532		} else {
1533			msgrdl->covers = dns_rdatatype_none;
1534		}
1535		ISC_LIST_APPEND(msgrdl->rdata, msgrdata, link);
1536
1537		result = dns_message_gettemprdataset(msg, &msgrds);
1538		if (result != ISC_R_SUCCESS) {
1539			goto failure;
1540		}
1541		result = dns_rdatalist_tordataset(msgrdl, msgrds);
1542		INSIST(result == ISC_R_SUCCESS);
1543
1544		ISC_LIST_APPEND(msgname->list, msgrds, link);
1545
1546		dns_message_addname(msg, msgname, DNS_SECTION_ANSWER);
1547		msgname = NULL;
1548
1549		xfr->stats.nrecs++;
1550
1551		result = xfr->stream->methods->next(xfr->stream);
1552		if (result == ISC_R_NOMORE) {
1553			xfr->end_of_stream = true;
1554			break;
1555		}
1556		CHECK(result);
1557
1558		if (!xfr->many_answers) {
1559			break;
1560		}
1561		/*
1562		 * At this stage, at least 1 RR has been rendered into
1563		 * the message. Check if we want to clamp this message
1564		 * here (TCP only).
1565		 */
1566		if ((isc_buffer_usedlength(&xfr->buf) >=
1567		     xfr->client->sctx->transfer_tcp_message_size) &&
1568		    is_tcp)
1569		{
1570			break;
1571		}
1572	}
1573
1574	if (is_tcp) {
1575		isc_region_t used;
1576		CHECK(dns_compress_init(&cctx, -1, xfr->mctx));
1577		dns_compress_setsensitive(&cctx, true);
1578		cleanup_cctx = true;
1579		CHECK(dns_message_renderbegin(msg, &cctx, &xfr->txbuf));
1580		CHECK(dns_message_rendersection(msg, DNS_SECTION_QUESTION, 0));
1581		CHECK(dns_message_rendersection(msg, DNS_SECTION_ANSWER, 0));
1582		CHECK(dns_message_renderend(msg));
1583		dns_compress_invalidate(&cctx);
1584		cleanup_cctx = false;
1585
1586		isc_buffer_usedregion(&xfr->txbuf, &used);
1587
1588		xfrout_log(xfr, ISC_LOG_DEBUG(8),
1589			   "sending TCP message of %d bytes", used.length);
1590
1591		isc_nmhandle_attach(xfr->client->handle,
1592				    &xfr->client->sendhandle);
1593		isc_nm_send(xfr->client->sendhandle, &used, xfrout_senddone,
1594			    xfr);
1595		xfr->sends++;
1596		xfr->cbytes = used.length;
1597	} else {
1598		xfrout_log(xfr, ISC_LOG_DEBUG(8), "sending IXFR UDP response");
1599		ns_client_send(xfr->client);
1600		xfr->stream->methods->pause(xfr->stream);
1601		isc_nmhandle_detach(&xfr->client->reqhandle);
1602		xfrout_ctx_destroy(&xfr);
1603		return;
1604	}
1605
1606	/* Advance lasttsig to be the last TSIG generated */
1607	CHECK(dns_message_getquerytsig(msg, xfr->mctx, &xfr->lasttsig));
1608
1609failure:
1610	if (msgname != NULL) {
1611		if (msgrds != NULL) {
1612			if (dns_rdataset_isassociated(msgrds)) {
1613				dns_rdataset_disassociate(msgrds);
1614			}
1615			dns_message_puttemprdataset(msg, &msgrds);
1616		}
1617		if (msgrdl != NULL) {
1618			ISC_LIST_UNLINK(msgrdl->rdata, msgrdata, link);
1619			dns_message_puttemprdatalist(msg, &msgrdl);
1620		}
1621		if (msgrdata != NULL) {
1622			dns_message_puttemprdata(msg, &msgrdata);
1623		}
1624		dns_message_puttempname(msg, &msgname);
1625	}
1626
1627	if (tcpmsg != NULL) {
1628		dns_message_detach(&tcpmsg);
1629	}
1630
1631	if (cleanup_cctx) {
1632		dns_compress_invalidate(&cctx);
1633	}
1634	/*
1635	 * Make sure to release any locks held by database
1636	 * iterators before returning from the event handler.
1637	 */
1638	xfr->stream->methods->pause(xfr->stream);
1639
1640	if (result == ISC_R_SUCCESS) {
1641		return;
1642	}
1643
1644	if (xfr->client->sendhandle != NULL) {
1645		isc_nmhandle_detach(&xfr->client->sendhandle);
1646	}
1647
1648	xfrout_fail(xfr, result, "sending zone data");
1649}
1650
1651static void
1652xfrout_ctx_destroy(xfrout_ctx_t **xfrp) {
1653	xfrout_ctx_t *xfr = *xfrp;
1654	*xfrp = NULL;
1655
1656	INSIST(xfr->sends == 0);
1657
1658	xfr->client->shutdown = NULL;
1659	xfr->client->shutdown_arg = NULL;
1660
1661	if (xfr->stream != NULL) {
1662		xfr->stream->methods->destroy(&xfr->stream);
1663	}
1664	if (xfr->buf.base != NULL) {
1665		isc_mem_put(xfr->mctx, xfr->buf.base, xfr->buf.length);
1666	}
1667	if (xfr->txmem != NULL) {
1668		isc_mem_put(xfr->mctx, xfr->txmem, xfr->txmemlen);
1669	}
1670	if (xfr->lasttsig != NULL) {
1671		isc_buffer_free(&xfr->lasttsig);
1672	}
1673	if (xfr->quota != NULL) {
1674		isc_quota_detach(&xfr->quota);
1675	}
1676	if (xfr->ver != NULL) {
1677		dns_db_closeversion(xfr->db, &xfr->ver, false);
1678	}
1679	if (xfr->zone != NULL) {
1680		dns_zone_detach(&xfr->zone);
1681	}
1682	if (xfr->db != NULL) {
1683		dns_db_detach(&xfr->db);
1684	}
1685
1686	isc_mem_putanddetach(&xfr->mctx, xfr, sizeof(*xfr));
1687}
1688
1689static void
1690xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg) {
1691	xfrout_ctx_t *xfr = (xfrout_ctx_t *)arg;
1692
1693	REQUIRE((xfr->client->attributes & NS_CLIENTATTR_TCP) != 0);
1694
1695	INSIST(handle == xfr->client->handle);
1696
1697	xfr->sends--;
1698	INSIST(xfr->sends == 0);
1699
1700	isc_nmhandle_detach(&xfr->client->sendhandle);
1701
1702	/*
1703	 * Update transfer statistics if sending succeeded, accounting for the
1704	 * two-byte TCP length prefix included in the number of bytes sent.
1705	 */
1706	if (result == ISC_R_SUCCESS) {
1707		xfr->stats.nmsg++;
1708		xfr->stats.nbytes += xfr->cbytes;
1709	}
1710
1711	if (xfr->shuttingdown) {
1712		xfrout_maybe_destroy(xfr);
1713	} else if (result != ISC_R_SUCCESS) {
1714		xfrout_fail(xfr, result, "send");
1715	} else if (!xfr->end_of_stream) {
1716		sendstream(xfr);
1717	} else {
1718		/* End of zone transfer stream. */
1719		uint64_t msecs, persec;
1720
1721		inc_stats(xfr->client, xfr->zone, ns_statscounter_xfrdone);
1722		isc_time_now(&xfr->stats.end);
1723		msecs = isc_time_microdiff(&xfr->stats.end, &xfr->stats.start);
1724		msecs /= 1000;
1725		if (msecs == 0) {
1726			msecs = 1;
1727		}
1728		persec = (xfr->stats.nbytes * 1000) / msecs;
1729		xfrout_log(xfr, xfr->poll ? ISC_LOG_DEBUG(1) : ISC_LOG_INFO,
1730			   "%s ended: "
1731			   "%" PRIu64 " messages, %" PRIu64 " records, "
1732			   "%" PRIu64 " bytes, "
1733			   "%u.%03u secs (%u bytes/sec) (serial %u)",
1734			   xfr->mnemonic, xfr->stats.nmsg, xfr->stats.nrecs,
1735			   xfr->stats.nbytes, (unsigned int)(msecs / 1000),
1736			   (unsigned int)(msecs % 1000), (unsigned int)persec,
1737			   xfr->end_serial);
1738
1739		/*
1740		 * We're done, unreference the handle and destroy the xfr
1741		 * context.
1742		 */
1743		isc_nmhandle_detach(&xfr->client->reqhandle);
1744		xfrout_ctx_destroy(&xfr);
1745	}
1746}
1747
1748static void
1749xfrout_fail(xfrout_ctx_t *xfr, isc_result_t result, const char *msg) {
1750	xfr->shuttingdown = true;
1751	xfrout_log(xfr, ISC_LOG_ERROR, "%s: %s", msg,
1752		   isc_result_totext(result));
1753	xfrout_maybe_destroy(xfr);
1754}
1755
1756static void
1757xfrout_maybe_destroy(xfrout_ctx_t *xfr) {
1758	REQUIRE(xfr->shuttingdown);
1759
1760	ns_client_drop(xfr->client, ISC_R_CANCELED);
1761	isc_nmhandle_detach(&xfr->client->reqhandle);
1762	xfrout_ctx_destroy(&xfr);
1763}
1764
1765static void
1766xfrout_client_shutdown(void *arg, isc_result_t result) {
1767	xfrout_ctx_t *xfr = (xfrout_ctx_t *)arg;
1768	xfrout_fail(xfr, result, "aborted");
1769}
1770
1771/*
1772 * Log outgoing zone transfer messages in a format like
1773 * <client>: transfer of <zone>: <message>
1774 */
1775
1776static void
1777xfrout_logv(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
1778	    int level, const char *fmt, va_list ap) ISC_FORMAT_PRINTF(5, 0);
1779
1780static void
1781xfrout_logv(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
1782	    int level, const char *fmt, va_list ap) {
1783	char msgbuf[2048];
1784	char namebuf[DNS_NAME_FORMATSIZE];
1785	char classbuf[DNS_RDATACLASS_FORMATSIZE];
1786
1787	dns_name_format(zonename, namebuf, sizeof(namebuf));
1788	dns_rdataclass_format(rdclass, classbuf, sizeof(classbuf));
1789	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
1790	ns_client_log(client, DNS_LOGCATEGORY_XFER_OUT, NS_LOGMODULE_XFER_OUT,
1791		      level, "transfer of '%s/%s': %s", namebuf, classbuf,
1792		      msgbuf);
1793}
1794
1795/*
1796 * Logging function for use when a xfrout_ctx_t has not yet been created.
1797 */
1798static void
1799xfrout_log1(ns_client_t *client, dns_name_t *zonename, dns_rdataclass_t rdclass,
1800	    int level, const char *fmt, ...) {
1801	va_list ap;
1802	va_start(ap, fmt);
1803	xfrout_logv(client, zonename, rdclass, level, fmt, ap);
1804	va_end(ap);
1805}
1806
1807/*
1808 * Logging function for use when there is a xfrout_ctx_t.
1809 */
1810static void
1811xfrout_log(xfrout_ctx_t *xfr, int level, const char *fmt, ...) {
1812	va_list ap;
1813	va_start(ap, fmt);
1814	xfrout_logv(xfr->client, xfr->qname, xfr->qclass, level, fmt, ap);
1815	va_end(ap);
1816}
1817