client.c revision 180477
1/*
2 * Copyright (C) 2004-2007  Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003  Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18/* $Id: client.c,v 1.219.18.28.10.1 2008/05/22 21:28:04 each Exp $ */
19
20#include <config.h>
21
22#include <isc/formatcheck.h>
23#include <isc/mutex.h>
24#include <isc/once.h>
25#include <isc/platform.h>
26#include <isc/print.h>
27#include <isc/stdio.h>
28#include <isc/string.h>
29#include <isc/task.h>
30#include <isc/timer.h>
31#include <isc/util.h>
32
33#include <dns/db.h>
34#include <dns/dispatch.h>
35#include <dns/events.h>
36#include <dns/message.h>
37#include <dns/peer.h>
38#include <dns/rcode.h>
39#include <dns/rdata.h>
40#include <dns/rdataclass.h>
41#include <dns/rdatalist.h>
42#include <dns/rdataset.h>
43#include <dns/resolver.h>
44#include <dns/tsig.h>
45#include <dns/view.h>
46#include <dns/zone.h>
47
48#include <named/interfacemgr.h>
49#include <named/log.h>
50#include <named/notify.h>
51#include <named/server.h>
52#include <named/update.h>
53
54/***
55 *** Client
56 ***/
57
58/*! \file
59 * Client Routines
60 *
61 * Important note!
62 *
63 * All client state changes, other than that from idle to listening, occur
64 * as a result of events.  This guarantees serialization and avoids the
65 * need for locking.
66 *
67 * If a routine is ever created that allows someone other than the client's
68 * task to change the client, then the client will have to be locked.
69 */
70
71#define NS_CLIENT_TRACE
72#ifdef NS_CLIENT_TRACE
73#define CTRACE(m)	ns_client_log(client, \
74				      NS_LOGCATEGORY_CLIENT, \
75				      NS_LOGMODULE_CLIENT, \
76				      ISC_LOG_DEBUG(3), \
77				      "%s", (m))
78#define MTRACE(m)	isc_log_write(ns_g_lctx, \
79				      NS_LOGCATEGORY_GENERAL, \
80				      NS_LOGMODULE_CLIENT, \
81				      ISC_LOG_DEBUG(3), \
82				      "clientmgr @%p: %s", manager, (m))
83#else
84#define CTRACE(m)	((void)(m))
85#define MTRACE(m)	((void)(m))
86#endif
87
88#define TCP_CLIENT(c)	(((c)->attributes & NS_CLIENTATTR_TCP) != 0)
89
90#define TCP_BUFFER_SIZE			(65535 + 2)
91#define SEND_BUFFER_SIZE		4096
92#define RECV_BUFFER_SIZE		4096
93
94#ifdef ISC_PLATFORM_USETHREADS
95#define NMCTXS				100
96/*%<
97 * Number of 'mctx pools' for clients. (Should this be configurable?)
98 * When enabling threads, we use a pool of memory contexts shared by
99 * client objects, since concurrent access to a shared context would cause
100 * heavy contentions.  The above constant is expected to be enough for
101 * completely avoiding contentions among threads for an authoritative-only
102 * server.
103 */
104#else
105#define NMCTXS				0
106/*%<
107 * If named with built without thread, simply share manager's context.  Using
108 * a separate context in this case would simply waste memory.
109 */
110#endif
111
112/*% nameserver client manager structure */
113struct ns_clientmgr {
114	/* Unlocked. */
115	unsigned int			magic;
116	isc_mem_t *			mctx;
117	isc_taskmgr_t *			taskmgr;
118	isc_timermgr_t *		timermgr;
119	isc_mutex_t			lock;
120	/* Locked by lock. */
121	isc_boolean_t			exiting;
122	client_list_t			active; 	/*%< Active clients */
123	client_list_t			recursing; 	/*%< Recursing clients */
124	client_list_t 			inactive;	/*%< To be recycled */
125#if NMCTXS > 0
126	/*%< mctx pool for clients. */
127	unsigned int			nextmctx;
128	isc_mem_t *			mctxpool[NMCTXS];
129#endif
130};
131
132#define MANAGER_MAGIC			ISC_MAGIC('N', 'S', 'C', 'm')
133#define VALID_MANAGER(m)		ISC_MAGIC_VALID(m, MANAGER_MAGIC)
134
135/*!
136 * Client object states.  Ordering is significant: higher-numbered
137 * states are generally "more active", meaning that the client can
138 * have more dynamically allocated data, outstanding events, etc.
139 * In the list below, any such properties listed for state N
140 * also apply to any state > N.
141 *
142 * To force the client into a less active state, set client->newstate
143 * to that state and call exit_check().  This will cause any
144 * activities defined for higher-numbered states to be aborted.
145 */
146
147#define NS_CLIENTSTATE_FREED    0
148/*%<
149 * The client object no longer exists.
150 */
151
152#define NS_CLIENTSTATE_INACTIVE 1
153/*%<
154 * The client object exists and has a task and timer.
155 * Its "query" struct and sendbuf are initialized.
156 * It is on the client manager's list of inactive clients.
157 * It has a message and OPT, both in the reset state.
158 */
159
160#define NS_CLIENTSTATE_READY    2
161/*%<
162 * The client object is either a TCP or a UDP one, and
163 * it is associated with a network interface.  It is on the
164 * client manager's list of active clients.
165 *
166 * If it is a TCP client object, it has a TCP listener socket
167 * and an outstanding TCP listen request.
168 *
169 * If it is a UDP client object, it has a UDP listener socket
170 * and an outstanding UDP receive request.
171 */
172
173#define NS_CLIENTSTATE_READING  3
174/*%<
175 * The client object is a TCP client object that has received
176 * a connection.  It has a tcpsocket, tcpmsg, TCP quota, and an
177 * outstanding TCP read request.  This state is not used for
178 * UDP client objects.
179 */
180
181#define NS_CLIENTSTATE_WORKING  4
182/*%<
183 * The client object has received a request and is working
184 * on it.  It has a view, and it may have any of a non-reset OPT,
185 * recursion quota, and an outstanding write request.
186 */
187
188#define NS_CLIENTSTATE_MAX      9
189/*%<
190 * Sentinel value used to indicate "no state".  When client->newstate
191 * has this value, we are not attempting to exit the current state.
192 * Must be greater than any valid state.
193 */
194
195/*
196 * Enable ns_client_dropport() by default.
197 */
198#ifndef NS_CLIENT_DROPPORT
199#define NS_CLIENT_DROPPORT 1
200#endif
201
202unsigned int ns_client_requests;
203
204static void client_read(ns_client_t *client);
205static void client_accept(ns_client_t *client);
206static void client_udprecv(ns_client_t *client);
207static void clientmgr_destroy(ns_clientmgr_t *manager);
208static isc_boolean_t exit_check(ns_client_t *client);
209static void ns_client_endrequest(ns_client_t *client);
210static void ns_client_checkactive(ns_client_t *client);
211static void client_start(isc_task_t *task, isc_event_t *event);
212static void client_request(isc_task_t *task, isc_event_t *event);
213static void ns_client_dumpmessage(ns_client_t *client, const char *reason);
214
215void
216ns_client_recursing(ns_client_t *client) {
217	REQUIRE(NS_CLIENT_VALID(client));
218
219	LOCK(&client->manager->lock);
220	ISC_LIST_UNLINK(*client->list, client, link);
221	ISC_LIST_APPEND(client->manager->recursing, client, link);
222	client->list = &client->manager->recursing;
223	UNLOCK(&client->manager->lock);
224}
225
226void
227ns_client_killoldestquery(ns_client_t *client) {
228	ns_client_t *oldest;
229	REQUIRE(NS_CLIENT_VALID(client));
230
231	LOCK(&client->manager->lock);
232	oldest = ISC_LIST_HEAD(client->manager->recursing);
233	if (oldest != NULL) {
234		ns_query_cancel(oldest);
235		ISC_LIST_UNLINK(*oldest->list, oldest, link);
236		ISC_LIST_APPEND(client->manager->active, oldest, link);
237		oldest->list = &client->manager->active;
238	}
239	UNLOCK(&client->manager->lock);
240}
241
242void
243ns_client_settimeout(ns_client_t *client, unsigned int seconds) {
244	isc_result_t result;
245	isc_interval_t interval;
246
247	isc_interval_set(&interval, seconds, 0);
248	result = isc_timer_reset(client->timer, isc_timertype_once, NULL,
249				 &interval, ISC_FALSE);
250	client->timerset = ISC_TRUE;
251	if (result != ISC_R_SUCCESS) {
252		ns_client_log(client, NS_LOGCATEGORY_CLIENT,
253			      NS_LOGMODULE_CLIENT, ISC_LOG_ERROR,
254			      "setting timeout: %s",
255			      isc_result_totext(result));
256		/* Continue anyway. */
257	}
258}
259
260/*%
261 * Check for a deactivation or shutdown request and take appropriate
262 * action.  Returns ISC_TRUE if either is in progress; in this case
263 * the caller must no longer use the client object as it may have been
264 * freed.
265 */
266static isc_boolean_t
267exit_check(ns_client_t *client) {
268	ns_clientmgr_t *locked_manager = NULL;
269	ns_clientmgr_t *destroy_manager = NULL;
270
271	REQUIRE(NS_CLIENT_VALID(client));
272
273	if (client->state <= client->newstate)
274		return (ISC_FALSE); /* Business as usual. */
275
276	INSIST(client->newstate < NS_CLIENTSTATE_WORKING);
277
278	/*
279	 * We need to detach from the view early when shutting down
280	 * the server to break the following vicious circle:
281	 *
282	 *  - The resolver will not shut down until the view refcount is zero
283	 *  - The view refcount does not go to zero until all clients detach
284	 *  - The client does not detach from the view until references is zero
285	 *  - references does not go to zero until the resolver has shut down
286	 *
287	 * Keep the view attached until any outstanding updates complete.
288	 */
289	if (client->nupdates == 0 &&
290	    client->newstate == NS_CLIENTSTATE_FREED && client->view != NULL)
291		dns_view_detach(&client->view);
292
293	if (client->state == NS_CLIENTSTATE_WORKING) {
294		INSIST(client->newstate <= NS_CLIENTSTATE_READING);
295		/*
296		 * Let the update processing complete.
297		 */
298		if (client->nupdates > 0)
299			return (ISC_TRUE);
300		/*
301		 * We are trying to abort request processing.
302		 */
303		if (client->nsends > 0) {
304			isc_socket_t *socket;
305			if (TCP_CLIENT(client))
306				socket = client->tcpsocket;
307			else
308				socket = client->udpsocket;
309			isc_socket_cancel(socket, client->task,
310					  ISC_SOCKCANCEL_SEND);
311		}
312
313		if (! (client->nsends == 0 && client->nrecvs == 0 &&
314		       client->references == 0))
315		{
316			/*
317			 * Still waiting for I/O cancel completion.
318			 * or lingering references.
319			 */
320			return (ISC_TRUE);
321		}
322		/*
323		 * I/O cancel is complete.  Burn down all state
324		 * related to the current request.  Ensure that
325		 * the client is on the active list and not the
326		 * recursing list.
327		 */
328		LOCK(&client->manager->lock);
329		if (client->list == &client->manager->recursing) {
330			ISC_LIST_UNLINK(*client->list, client, link);
331			ISC_LIST_APPEND(client->manager->active, client, link);
332			client->list = &client->manager->active;
333		}
334		UNLOCK(&client->manager->lock);
335		ns_client_endrequest(client);
336
337		client->state = NS_CLIENTSTATE_READING;
338		INSIST(client->recursionquota == NULL);
339		if (NS_CLIENTSTATE_READING == client->newstate) {
340			client_read(client);
341			client->newstate = NS_CLIENTSTATE_MAX;
342			return (ISC_TRUE); /* We're done. */
343		}
344	}
345
346	if (client->state == NS_CLIENTSTATE_READING) {
347		/*
348		 * We are trying to abort the current TCP connection,
349		 * if any.
350		 */
351		INSIST(client->recursionquota == NULL);
352		INSIST(client->newstate <= NS_CLIENTSTATE_READY);
353		if (client->nreads > 0)
354			dns_tcpmsg_cancelread(&client->tcpmsg);
355		if (! client->nreads == 0) {
356			/* Still waiting for read cancel completion. */
357			return (ISC_TRUE);
358		}
359
360		if (client->tcpmsg_valid) {
361			dns_tcpmsg_invalidate(&client->tcpmsg);
362			client->tcpmsg_valid = ISC_FALSE;
363		}
364		if (client->tcpsocket != NULL) {
365			CTRACE("closetcp");
366			isc_socket_detach(&client->tcpsocket);
367		}
368
369		if (client->tcpquota != NULL)
370			isc_quota_detach(&client->tcpquota);
371
372		if (client->timerset) {
373			(void)isc_timer_reset(client->timer,
374					      isc_timertype_inactive,
375					      NULL, NULL, ISC_TRUE);
376			client->timerset = ISC_FALSE;
377		}
378
379		client->peeraddr_valid = ISC_FALSE;
380
381		client->state = NS_CLIENTSTATE_READY;
382		INSIST(client->recursionquota == NULL);
383
384		/*
385		 * Now the client is ready to accept a new TCP connection
386		 * or UDP request, but we may have enough clients doing
387		 * that already.  Check whether this client needs to remain
388		 * active and force it to go inactive if not.
389		 */
390		ns_client_checkactive(client);
391
392		if (NS_CLIENTSTATE_READY == client->newstate) {
393			if (TCP_CLIENT(client)) {
394				client_accept(client);
395			} else
396				client_udprecv(client);
397			client->newstate = NS_CLIENTSTATE_MAX;
398			return (ISC_TRUE);
399		}
400	}
401
402	if (client->state == NS_CLIENTSTATE_READY) {
403		INSIST(client->newstate <= NS_CLIENTSTATE_INACTIVE);
404		/*
405		 * We are trying to enter the inactive state.
406		 */
407		if (client->naccepts > 0)
408			isc_socket_cancel(client->tcplistener, client->task,
409					  ISC_SOCKCANCEL_ACCEPT);
410
411		if (! (client->naccepts == 0)) {
412			/* Still waiting for accept cancel completion. */
413			return (ISC_TRUE);
414		}
415		/* Accept cancel is complete. */
416
417		if (client->nrecvs > 0)
418			isc_socket_cancel(client->udpsocket, client->task,
419					  ISC_SOCKCANCEL_RECV);
420		if (! (client->nrecvs == 0)) {
421			/* Still waiting for recv cancel completion. */
422			return (ISC_TRUE);
423		}
424		/* Recv cancel is complete. */
425
426		if (client->nctls > 0) {
427			/* Still waiting for control event to be delivered */
428			return (ISC_TRUE);
429		}
430
431		/* Deactivate the client. */
432		if (client->interface)
433			ns_interface_detach(&client->interface);
434
435		INSIST(client->naccepts == 0);
436		INSIST(client->recursionquota == NULL);
437		if (client->tcplistener != NULL)
438			isc_socket_detach(&client->tcplistener);
439
440		if (client->udpsocket != NULL)
441			isc_socket_detach(&client->udpsocket);
442
443		if (client->dispatch != NULL)
444			dns_dispatch_detach(&client->dispatch);
445
446		client->attributes = 0;
447		client->mortal = ISC_FALSE;
448
449		LOCK(&client->manager->lock);
450		/*
451		 * Put the client on the inactive list.  If we are aiming for
452		 * the "freed" state, it will be removed from the inactive
453		 * list shortly, and we need to keep the manager locked until
454		 * that has been done, lest the manager decide to reactivate
455		 * the dying client inbetween.
456		 */
457		locked_manager = client->manager;
458		ISC_LIST_UNLINK(*client->list, client, link);
459		ISC_LIST_APPEND(client->manager->inactive, client, link);
460		client->list = &client->manager->inactive;
461		client->state = NS_CLIENTSTATE_INACTIVE;
462		INSIST(client->recursionquota == NULL);
463
464		if (client->state == client->newstate) {
465			client->newstate = NS_CLIENTSTATE_MAX;
466			goto unlock;
467		}
468	}
469
470	if (client->state == NS_CLIENTSTATE_INACTIVE) {
471		INSIST(client->newstate == NS_CLIENTSTATE_FREED);
472		/*
473		 * We are trying to free the client.
474		 *
475		 * When "shuttingdown" is true, either the task has received
476		 * its shutdown event or no shutdown event has ever been
477		 * set up.  Thus, we have no outstanding shutdown
478		 * event at this point.
479		 */
480		REQUIRE(client->state == NS_CLIENTSTATE_INACTIVE);
481
482		INSIST(client->recursionquota == NULL);
483
484		ns_query_free(client);
485		isc_mem_put(client->mctx, client->recvbuf, RECV_BUFFER_SIZE);
486		isc_event_free((isc_event_t **)&client->sendevent);
487		isc_event_free((isc_event_t **)&client->recvevent);
488		isc_timer_detach(&client->timer);
489
490		if (client->tcpbuf != NULL)
491			isc_mem_put(client->mctx, client->tcpbuf, TCP_BUFFER_SIZE);
492		if (client->opt != NULL) {
493			INSIST(dns_rdataset_isassociated(client->opt));
494			dns_rdataset_disassociate(client->opt);
495			dns_message_puttemprdataset(client->message, &client->opt);
496		}
497		dns_message_destroy(&client->message);
498		if (client->manager != NULL) {
499			ns_clientmgr_t *manager = client->manager;
500			if (locked_manager == NULL) {
501				LOCK(&manager->lock);
502				locked_manager = manager;
503			}
504			ISC_LIST_UNLINK(*client->list, client, link);
505			client->list = NULL;
506			if (manager->exiting &&
507			    ISC_LIST_EMPTY(manager->active) &&
508			    ISC_LIST_EMPTY(manager->inactive) &&
509			    ISC_LIST_EMPTY(manager->recursing))
510				destroy_manager = manager;
511		}
512		/*
513		 * Detaching the task must be done after unlinking from
514		 * the manager's lists because the manager accesses
515		 * client->task.
516		 */
517		if (client->task != NULL)
518			isc_task_detach(&client->task);
519
520		CTRACE("free");
521		client->magic = 0;
522		isc_mem_putanddetach(&client->mctx, client, sizeof(*client));
523
524		goto unlock;
525	}
526
527 unlock:
528	if (locked_manager != NULL) {
529		UNLOCK(&locked_manager->lock);
530		locked_manager = NULL;
531	}
532
533	/*
534	 * Only now is it safe to destroy the client manager (if needed),
535	 * because we have accessed its lock for the last time.
536	 */
537	if (destroy_manager != NULL)
538		clientmgr_destroy(destroy_manager);
539
540	return (ISC_TRUE);
541}
542
543/*%
544 * The client's task has received the client's control event
545 * as part of the startup process.
546 */
547static void
548client_start(isc_task_t *task, isc_event_t *event) {
549	ns_client_t *client = (ns_client_t *) event->ev_arg;
550
551	INSIST(task == client->task);
552
553	UNUSED(task);
554
555	INSIST(client->nctls == 1);
556	client->nctls--;
557
558	if (exit_check(client))
559		return;
560
561	if (TCP_CLIENT(client)) {
562		client_accept(client);
563	} else {
564		client_udprecv(client);
565	}
566}
567
568
569/*%
570 * The client's task has received a shutdown event.
571 */
572static void
573client_shutdown(isc_task_t *task, isc_event_t *event) {
574	ns_client_t *client;
575
576	REQUIRE(event != NULL);
577	REQUIRE(event->ev_type == ISC_TASKEVENT_SHUTDOWN);
578	client = event->ev_arg;
579	REQUIRE(NS_CLIENT_VALID(client));
580	REQUIRE(task == client->task);
581
582	UNUSED(task);
583
584	CTRACE("shutdown");
585
586	isc_event_free(&event);
587
588	if (client->shutdown != NULL) {
589		(client->shutdown)(client->shutdown_arg, ISC_R_SHUTTINGDOWN);
590		client->shutdown = NULL;
591		client->shutdown_arg = NULL;
592	}
593
594	client->newstate = NS_CLIENTSTATE_FREED;
595	(void)exit_check(client);
596}
597
598static void
599ns_client_endrequest(ns_client_t *client) {
600	INSIST(client->naccepts == 0);
601	INSIST(client->nreads == 0);
602	INSIST(client->nsends == 0);
603	INSIST(client->nrecvs == 0);
604	INSIST(client->nupdates == 0);
605	INSIST(client->state == NS_CLIENTSTATE_WORKING);
606
607	CTRACE("endrequest");
608
609	if (client->next != NULL) {
610		(client->next)(client);
611		client->next = NULL;
612	}
613
614	if (client->view != NULL)
615		dns_view_detach(&client->view);
616	if (client->opt != NULL) {
617		INSIST(dns_rdataset_isassociated(client->opt));
618		dns_rdataset_disassociate(client->opt);
619		dns_message_puttemprdataset(client->message, &client->opt);
620	}
621
622	client->udpsize = 512;
623	client->extflags = 0;
624	client->ednsversion = -1;
625	dns_message_reset(client->message, DNS_MESSAGE_INTENTPARSE);
626
627	if (client->recursionquota != NULL)
628		isc_quota_detach(&client->recursionquota);
629
630	/*
631	 * Clear all client attributes that are specific to
632	 * the request; that's all except the TCP flag.
633	 */
634	client->attributes &= NS_CLIENTATTR_TCP;
635}
636
637static void
638ns_client_checkactive(ns_client_t *client) {
639	if (client->mortal) {
640		/*
641		 * This client object should normally go inactive
642		 * at this point, but if we have fewer active client
643		 * objects than  desired due to earlier quota exhaustion,
644		 * keep it active to make up for the shortage.
645		 */
646		isc_boolean_t need_another_client = ISC_FALSE;
647		if (TCP_CLIENT(client)) {
648			LOCK(&client->interface->lock);
649			if (client->interface->ntcpcurrent <
650			    client->interface->ntcptarget)
651				need_another_client = ISC_TRUE;
652			UNLOCK(&client->interface->lock);
653		} else {
654			/*
655			 * The UDP client quota is enforced by making
656			 * requests fail rather than by not listening
657			 * for new ones.  Therefore, there is always a
658			 * full set of UDP clients listening.
659			 */
660		}
661		if (! need_another_client) {
662			/*
663			 * We don't need this client object.  Recycle it.
664			 */
665			if (client->newstate >= NS_CLIENTSTATE_INACTIVE)
666				client->newstate = NS_CLIENTSTATE_INACTIVE;
667		}
668	}
669}
670
671void
672ns_client_next(ns_client_t *client, isc_result_t result) {
673	int newstate;
674
675	REQUIRE(NS_CLIENT_VALID(client));
676	REQUIRE(client->state == NS_CLIENTSTATE_WORKING ||
677		client->state == NS_CLIENTSTATE_READING);
678
679	CTRACE("next");
680
681	if (result != ISC_R_SUCCESS)
682		ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
683			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3),
684			      "request failed: %s", isc_result_totext(result));
685
686	/*
687	 * An error processing a TCP request may have left
688	 * the connection out of sync.  To be safe, we always
689	 * sever the connection when result != ISC_R_SUCCESS.
690	 */
691	if (result == ISC_R_SUCCESS && TCP_CLIENT(client))
692		newstate = NS_CLIENTSTATE_READING;
693	else
694		newstate = NS_CLIENTSTATE_READY;
695
696	if (client->newstate > newstate)
697		client->newstate = newstate;
698	(void)exit_check(client);
699}
700
701
702static void
703client_senddone(isc_task_t *task, isc_event_t *event) {
704	ns_client_t *client;
705	isc_socketevent_t *sevent = (isc_socketevent_t *) event;
706
707	REQUIRE(sevent != NULL);
708	REQUIRE(sevent->ev_type == ISC_SOCKEVENT_SENDDONE);
709	client = sevent->ev_arg;
710	REQUIRE(NS_CLIENT_VALID(client));
711	REQUIRE(task == client->task);
712	REQUIRE(sevent == client->sendevent);
713
714	UNUSED(task);
715
716	CTRACE("senddone");
717
718	if (sevent->result != ISC_R_SUCCESS)
719		ns_client_log(client, NS_LOGCATEGORY_CLIENT,
720			      NS_LOGMODULE_CLIENT, ISC_LOG_WARNING,
721			      "error sending response: %s",
722			      isc_result_totext(sevent->result));
723
724	INSIST(client->nsends > 0);
725	client->nsends--;
726
727	if (client->tcpbuf != NULL) {
728		INSIST(TCP_CLIENT(client));
729		isc_mem_put(client->mctx, client->tcpbuf, TCP_BUFFER_SIZE);
730		client->tcpbuf = NULL;
731	}
732
733	if (exit_check(client))
734		return;
735
736	ns_client_next(client, ISC_R_SUCCESS);
737}
738
739/*%
740 * We only want to fail with ISC_R_NOSPACE when called from
741 * ns_client_sendraw() and not when called from ns_client_send(),
742 * tcpbuffer is NULL when called from ns_client_sendraw() and
743 * length != 0.  tcpbuffer != NULL when called from ns_client_send()
744 * and length == 0.
745 */
746
747static isc_result_t
748client_allocsendbuf(ns_client_t *client, isc_buffer_t *buffer,
749		    isc_buffer_t *tcpbuffer, isc_uint32_t length,
750		    unsigned char *sendbuf, unsigned char **datap)
751{
752	unsigned char *data;
753	isc_uint32_t bufsize;
754	isc_result_t result;
755
756	INSIST(datap != NULL);
757	INSIST((tcpbuffer == NULL && length != 0) ||
758	       (tcpbuffer != NULL && length == 0));
759
760	if (TCP_CLIENT(client)) {
761		INSIST(client->tcpbuf == NULL);
762		if (length + 2 > TCP_BUFFER_SIZE) {
763			result = ISC_R_NOSPACE;
764			goto done;
765		}
766		client->tcpbuf = isc_mem_get(client->mctx, TCP_BUFFER_SIZE);
767		if (client->tcpbuf == NULL) {
768			result = ISC_R_NOMEMORY;
769			goto done;
770		}
771		data = client->tcpbuf;
772		if (tcpbuffer != NULL) {
773			isc_buffer_init(tcpbuffer, data, TCP_BUFFER_SIZE);
774			isc_buffer_init(buffer, data + 2, TCP_BUFFER_SIZE - 2);
775		} else {
776			isc_buffer_init(buffer, data, TCP_BUFFER_SIZE);
777			INSIST(length <= 0xffff);
778			isc_buffer_putuint16(buffer, (isc_uint16_t)length);
779		}
780	} else {
781		data = sendbuf;
782		if (client->udpsize < SEND_BUFFER_SIZE)
783			bufsize = client->udpsize;
784		else
785			bufsize = SEND_BUFFER_SIZE;
786		if (length > bufsize) {
787			result = ISC_R_NOSPACE;
788			goto done;
789		}
790		isc_buffer_init(buffer, data, bufsize);
791	}
792	*datap = data;
793	result = ISC_R_SUCCESS;
794
795 done:
796	return (result);
797}
798
799static isc_result_t
800client_sendpkg(ns_client_t *client, isc_buffer_t *buffer) {
801	struct in6_pktinfo *pktinfo;
802	isc_result_t result;
803	isc_region_t r;
804	isc_sockaddr_t *address;
805	isc_socket_t *socket;
806	isc_netaddr_t netaddr;
807	int match;
808	unsigned int sockflags = ISC_SOCKFLAG_IMMEDIATE;
809
810	if (TCP_CLIENT(client)) {
811		socket = client->tcpsocket;
812		address = NULL;
813	} else {
814		socket = client->udpsocket;
815		address = &client->peeraddr;
816
817		isc_netaddr_fromsockaddr(&netaddr, &client->peeraddr);
818		if (ns_g_server->blackholeacl != NULL &&
819		    dns_acl_match(&netaddr, NULL,
820			    	  ns_g_server->blackholeacl,
821				  &ns_g_server->aclenv,
822				  &match, NULL) == ISC_R_SUCCESS &&
823		    match > 0)
824			return (DNS_R_BLACKHOLED);
825		sockflags |= ISC_SOCKFLAG_NORETRY;
826	}
827
828	if ((client->attributes & NS_CLIENTATTR_PKTINFO) != 0 &&
829	    (client->attributes & NS_CLIENTATTR_MULTICAST) == 0)
830		pktinfo = &client->pktinfo;
831	else
832		pktinfo = NULL;
833
834	isc_buffer_usedregion(buffer, &r);
835
836	CTRACE("sendto");
837
838	result = isc_socket_sendto2(socket, &r, client->task,
839				    address, pktinfo,
840				    client->sendevent, sockflags);
841	if (result == ISC_R_SUCCESS || result == ISC_R_INPROGRESS) {
842		client->nsends++;
843		if (result == ISC_R_SUCCESS)
844			client_senddone(client->task,
845					(isc_event_t *)client->sendevent);
846		result = ISC_R_SUCCESS;
847	}
848	return (result);
849}
850
851void
852ns_client_sendraw(ns_client_t *client, dns_message_t *message) {
853	isc_result_t result;
854	unsigned char *data;
855	isc_buffer_t buffer;
856	isc_region_t r;
857	isc_region_t *mr;
858	unsigned char sendbuf[SEND_BUFFER_SIZE];
859
860	REQUIRE(NS_CLIENT_VALID(client));
861
862	CTRACE("sendraw");
863
864	mr = dns_message_getrawmessage(message);
865	if (mr == NULL) {
866		result = ISC_R_UNEXPECTEDEND;
867		goto done;
868	}
869
870	result = client_allocsendbuf(client, &buffer, NULL, mr->length,
871				     sendbuf, &data);
872	if (result != ISC_R_SUCCESS)
873		goto done;
874
875	/*
876	 * Copy message to buffer and fixup id.
877	 */
878	isc_buffer_availableregion(&buffer, &r);
879	result = isc_buffer_copyregion(&buffer, mr);
880	if (result != ISC_R_SUCCESS)
881		goto done;
882	r.base[0] = (client->message->id >> 8) & 0xff;
883	r.base[1] = client->message->id & 0xff;
884
885	result = client_sendpkg(client, &buffer);
886	if (result == ISC_R_SUCCESS)
887		return;
888
889 done:
890	if (client->tcpbuf != NULL) {
891		isc_mem_put(client->mctx, client->tcpbuf, TCP_BUFFER_SIZE);
892		client->tcpbuf = NULL;
893	}
894	ns_client_next(client, result);
895}
896
897void
898ns_client_send(ns_client_t *client) {
899	isc_result_t result;
900	unsigned char *data;
901	isc_buffer_t buffer;
902	isc_buffer_t tcpbuffer;
903	isc_region_t r;
904	dns_compress_t cctx;
905	isc_boolean_t cleanup_cctx = ISC_FALSE;
906	unsigned char sendbuf[SEND_BUFFER_SIZE];
907	unsigned int dnssec_opts;
908	unsigned int preferred_glue;
909
910	REQUIRE(NS_CLIENT_VALID(client));
911
912	CTRACE("send");
913
914	if ((client->attributes & NS_CLIENTATTR_RA) != 0)
915		client->message->flags |= DNS_MESSAGEFLAG_RA;
916
917	if ((client->attributes & NS_CLIENTATTR_WANTDNSSEC) != 0)
918		dnssec_opts = 0;
919	else
920		dnssec_opts = DNS_MESSAGERENDER_OMITDNSSEC;
921
922	preferred_glue = 0;
923	if (client->view != NULL) {
924		if (client->view->preferred_glue == dns_rdatatype_a)
925			preferred_glue = DNS_MESSAGERENDER_PREFER_A;
926		else if (client->view->preferred_glue == dns_rdatatype_aaaa)
927			preferred_glue = DNS_MESSAGERENDER_PREFER_AAAA;
928	}
929
930	/*
931	 * XXXRTH  The following doesn't deal with TCP buffer resizing.
932	 */
933	result = client_allocsendbuf(client, &buffer, &tcpbuffer, 0,
934				     sendbuf, &data);
935	if (result != ISC_R_SUCCESS)
936		goto done;
937
938	result = dns_compress_init(&cctx, -1, client->mctx);
939	if (result != ISC_R_SUCCESS)
940		goto done;
941	cleanup_cctx = ISC_TRUE;
942
943	result = dns_message_renderbegin(client->message, &cctx, &buffer);
944	if (result != ISC_R_SUCCESS)
945		goto done;
946	if (client->opt != NULL) {
947		result = dns_message_setopt(client->message, client->opt);
948		/*
949		 * XXXRTH dns_message_setopt() should probably do this...
950		 */
951		client->opt = NULL;
952		if (result != ISC_R_SUCCESS)
953			goto done;
954	}
955	result = dns_message_rendersection(client->message,
956					   DNS_SECTION_QUESTION, 0);
957	if (result == ISC_R_NOSPACE) {
958		client->message->flags |= DNS_MESSAGEFLAG_TC;
959		goto renderend;
960	}
961	if (result != ISC_R_SUCCESS)
962		goto done;
963	result = dns_message_rendersection(client->message,
964					   DNS_SECTION_ANSWER,
965					   DNS_MESSAGERENDER_PARTIAL |
966					   dnssec_opts);
967	if (result == ISC_R_NOSPACE) {
968		client->message->flags |= DNS_MESSAGEFLAG_TC;
969		goto renderend;
970	}
971	if (result != ISC_R_SUCCESS)
972		goto done;
973	result = dns_message_rendersection(client->message,
974					   DNS_SECTION_AUTHORITY,
975					   DNS_MESSAGERENDER_PARTIAL |
976					   dnssec_opts);
977	if (result == ISC_R_NOSPACE) {
978		client->message->flags |= DNS_MESSAGEFLAG_TC;
979		goto renderend;
980	}
981	if (result != ISC_R_SUCCESS)
982		goto done;
983	result = dns_message_rendersection(client->message,
984					   DNS_SECTION_ADDITIONAL,
985					   preferred_glue | dnssec_opts);
986	if (result != ISC_R_SUCCESS && result != ISC_R_NOSPACE)
987		goto done;
988 renderend:
989	result = dns_message_renderend(client->message);
990
991	if (result != ISC_R_SUCCESS)
992		goto done;
993
994	if (cleanup_cctx) {
995		dns_compress_invalidate(&cctx);
996		cleanup_cctx = ISC_FALSE;
997	}
998
999	if (TCP_CLIENT(client)) {
1000		isc_buffer_usedregion(&buffer, &r);
1001		isc_buffer_putuint16(&tcpbuffer, (isc_uint16_t) r.length);
1002		isc_buffer_add(&tcpbuffer, r.length);
1003		result = client_sendpkg(client, &tcpbuffer);
1004	} else
1005		result = client_sendpkg(client, &buffer);
1006	if (result == ISC_R_SUCCESS)
1007		return;
1008
1009 done:
1010	if (client->tcpbuf != NULL) {
1011		isc_mem_put(client->mctx, client->tcpbuf, TCP_BUFFER_SIZE);
1012		client->tcpbuf = NULL;
1013	}
1014
1015	if (cleanup_cctx)
1016		dns_compress_invalidate(&cctx);
1017
1018	ns_client_next(client, result);
1019}
1020
1021#if NS_CLIENT_DROPPORT
1022#define DROPPORT_NO		0
1023#define DROPPORT_REQUEST	1
1024#define DROPPORT_RESPONSE	2
1025/*%
1026 * ns_client_dropport determines if certain requests / responses
1027 * should be dropped based on the port number.
1028 *
1029 * Returns:
1030 * \li	0:	Don't drop.
1031 * \li	1:	Drop request.
1032 * \li	2:	Drop (error) response.
1033 */
1034static int
1035ns_client_dropport(in_port_t port) {
1036	switch (port) {
1037	case 7: /* echo */
1038	case 13: /* daytime */
1039	case 19: /* chargen */
1040	case 37: /* time */
1041		return (DROPPORT_REQUEST);
1042	case 464: /* kpasswd */
1043		return (DROPPORT_RESPONSE);
1044	}
1045	return (DROPPORT_NO);
1046}
1047#endif
1048
1049void
1050ns_client_error(ns_client_t *client, isc_result_t result) {
1051	dns_rcode_t rcode;
1052	dns_message_t *message;
1053
1054	REQUIRE(NS_CLIENT_VALID(client));
1055
1056	CTRACE("error");
1057
1058	message = client->message;
1059	rcode = dns_result_torcode(result);
1060
1061#if NS_CLIENT_DROPPORT
1062	/*
1063	 * Don't send FORMERR to ports on the drop port list.
1064	 */
1065	if (rcode == dns_rcode_formerr &&
1066	    ns_client_dropport(isc_sockaddr_getport(&client->peeraddr)) !=
1067	    DROPPORT_NO) {
1068		char buf[64];
1069		isc_buffer_t b;
1070
1071		isc_buffer_init(&b, buf, sizeof(buf) - 1);
1072		if (dns_rcode_totext(rcode, &b) != ISC_R_SUCCESS)
1073			isc_buffer_putstr(&b, "UNKNOWN RCODE");
1074		ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
1075			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10),
1076			      "dropped error (%.*s) response: suspicious port",
1077			      (int)isc_buffer_usedlength(&b), buf);
1078		ns_client_next(client, ISC_R_SUCCESS);
1079		return;
1080	}
1081#endif
1082
1083	/*
1084	 * Message may be an in-progress reply that we had trouble
1085	 * with, in which case QR will be set.  We need to clear QR before
1086	 * calling dns_message_reply() to avoid triggering an assertion.
1087	 */
1088	message->flags &= ~DNS_MESSAGEFLAG_QR;
1089	/*
1090	 * AA and AD shouldn't be set.
1091	 */
1092	message->flags &= ~(DNS_MESSAGEFLAG_AA | DNS_MESSAGEFLAG_AD);
1093	result = dns_message_reply(message, ISC_TRUE);
1094	if (result != ISC_R_SUCCESS) {
1095		/*
1096		 * It could be that we've got a query with a good header,
1097		 * but a bad question section, so we try again with
1098		 * want_question_section set to ISC_FALSE.
1099		 */
1100		result = dns_message_reply(message, ISC_FALSE);
1101		if (result != ISC_R_SUCCESS) {
1102			ns_client_next(client, result);
1103			return;
1104		}
1105	}
1106	message->rcode = rcode;
1107
1108	/*
1109	 * FORMERR loop avoidance:  If we sent a FORMERR message
1110	 * with the same ID to the same client less than two
1111	 * seconds ago, assume that we are in an infinite error
1112	 * packet dialog with a server for some protocol whose
1113	 * error responses look enough like DNS queries to
1114	 * elicit a FORMERR response.  Drop a packet to break
1115	 * the loop.
1116	 */
1117	if (rcode == dns_rcode_formerr) {
1118		if (isc_sockaddr_equal(&client->peeraddr,
1119				       &client->formerrcache.addr) &&
1120		    message->id == client->formerrcache.id &&
1121		    client->requesttime - client->formerrcache.time < 2) {
1122			/* Drop packet. */
1123			ns_client_log(client, NS_LOGCATEGORY_CLIENT,
1124				      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(1),
1125				      "possible error packet loop, "
1126				      "FORMERR dropped");
1127			ns_client_next(client, result);
1128			return;
1129		}
1130		client->formerrcache.addr = client->peeraddr;
1131		client->formerrcache.time = client->requesttime;
1132		client->formerrcache.id = message->id;
1133	}
1134	ns_client_send(client);
1135}
1136
1137static inline isc_result_t
1138client_addopt(ns_client_t *client) {
1139	dns_rdataset_t *rdataset;
1140	dns_rdatalist_t *rdatalist;
1141	dns_rdata_t *rdata;
1142	isc_result_t result;
1143	dns_view_t *view;
1144	dns_resolver_t *resolver;
1145	isc_uint16_t udpsize;
1146
1147	REQUIRE(client->opt == NULL);	/* XXXRTH free old. */
1148
1149	rdatalist = NULL;
1150	result = dns_message_gettemprdatalist(client->message, &rdatalist);
1151	if (result != ISC_R_SUCCESS)
1152		return (result);
1153	rdata = NULL;
1154	result = dns_message_gettemprdata(client->message, &rdata);
1155	if (result != ISC_R_SUCCESS)
1156		return (result);
1157	rdataset = NULL;
1158	result = dns_message_gettemprdataset(client->message, &rdataset);
1159	if (result != ISC_R_SUCCESS)
1160		return (result);
1161	dns_rdataset_init(rdataset);
1162
1163	rdatalist->type = dns_rdatatype_opt;
1164	rdatalist->covers = 0;
1165
1166	/*
1167	 * Set the maximum UDP buffer size.
1168	 */
1169	view = client->view;
1170	resolver = (view != NULL) ? view->resolver : NULL;
1171	if (resolver != NULL)
1172		udpsize = dns_resolver_getudpsize(resolver);
1173	else
1174		udpsize = ns_g_udpsize;
1175	rdatalist->rdclass = udpsize;
1176
1177	/*
1178	 * Set EXTENDED-RCODE, VERSION and Z to 0.
1179	 */
1180	rdatalist->ttl = (client->extflags & DNS_MESSAGEEXTFLAG_REPLYPRESERVE);
1181
1182	/*
1183	 * No EDNS options in the default case.
1184	 */
1185	rdata->data = NULL;
1186	rdata->length = 0;
1187	rdata->rdclass = rdatalist->rdclass;
1188	rdata->type = rdatalist->type;
1189	rdata->flags = 0;
1190
1191	ISC_LIST_INIT(rdatalist->rdata);
1192	ISC_LIST_APPEND(rdatalist->rdata, rdata, link);
1193	RUNTIME_CHECK(dns_rdatalist_tordataset(rdatalist, rdataset)
1194		      == ISC_R_SUCCESS);
1195
1196	client->opt = rdataset;
1197
1198	return (ISC_R_SUCCESS);
1199}
1200
1201static inline isc_boolean_t
1202allowed(isc_netaddr_t *addr, dns_name_t *signer, dns_acl_t *acl) {
1203	int match;
1204	isc_result_t result;
1205
1206	if (acl == NULL)
1207		return (ISC_TRUE);
1208	result = dns_acl_match(addr, signer, acl, &ns_g_server->aclenv,
1209			       &match, NULL);
1210	if (result == ISC_R_SUCCESS && match > 0)
1211		return (ISC_TRUE);
1212	return (ISC_FALSE);
1213}
1214
1215/*
1216 * Callback to see if a non-recursive query coming from 'srcaddr' to
1217 * 'destaddr', with optional key 'mykey' for class 'rdclass' would be
1218 * delivered to 'myview'.
1219 *
1220 * We run this unlocked as both the view list and the interface list
1221 * are updated when the approprite task has exclusivity.
1222 */
1223isc_boolean_t
1224ns_client_isself(dns_view_t *myview, dns_tsigkey_t *mykey,
1225		 isc_sockaddr_t *srcaddr, isc_sockaddr_t *dstaddr,
1226		 dns_rdataclass_t rdclass, void *arg)
1227{
1228	dns_view_t *view;
1229	dns_tsigkey_t *key = NULL;
1230	dns_name_t *tsig = NULL;
1231	isc_netaddr_t netsrc;
1232	isc_netaddr_t netdst;
1233
1234	UNUSED(arg);
1235
1236	if (!ns_interfacemgr_listeningon(ns_g_server->interfacemgr, dstaddr))
1237		return (ISC_FALSE);
1238
1239	isc_netaddr_fromsockaddr(&netsrc, srcaddr);
1240	isc_netaddr_fromsockaddr(&netdst, dstaddr);
1241
1242	for (view = ISC_LIST_HEAD(ns_g_server->viewlist);
1243	     view != NULL;
1244	     view = ISC_LIST_NEXT(view, link)) {
1245
1246		if (view->matchrecursiveonly)
1247			continue;
1248
1249		if (rdclass != view->rdclass)
1250			continue;
1251
1252		if (mykey != NULL) {
1253			isc_boolean_t match;
1254			isc_result_t result;
1255
1256			tsig = &mykey->name;
1257			result = dns_view_gettsig(view, tsig, &key);
1258			if (result != ISC_R_SUCCESS)
1259				continue;
1260			match = dst_key_compare(mykey->key, key->key);
1261			dns_tsigkey_detach(&key);
1262			if (!match)
1263				continue;
1264		}
1265
1266		if (allowed(&netsrc, tsig, view->matchclients) &&
1267		    allowed(&netdst, tsig, view->matchdestinations))
1268			break;
1269	}
1270	return (ISC_TF(view == myview));
1271}
1272
1273/*
1274 * Handle an incoming request event from the socket (UDP case)
1275 * or tcpmsg (TCP case).
1276 */
1277static void
1278client_request(isc_task_t *task, isc_event_t *event) {
1279	ns_client_t *client;
1280	isc_socketevent_t *sevent;
1281	isc_result_t result;
1282	isc_result_t sigresult = ISC_R_SUCCESS;
1283	isc_buffer_t *buffer;
1284	isc_buffer_t tbuffer;
1285	dns_view_t *view;
1286	dns_rdataset_t *opt;
1287	isc_boolean_t ra; 	/* Recursion available. */
1288	isc_netaddr_t netaddr;
1289	isc_netaddr_t destaddr;
1290	int match;
1291	dns_messageid_t id;
1292	unsigned int flags;
1293	isc_boolean_t notimp;
1294
1295	REQUIRE(event != NULL);
1296	client = event->ev_arg;
1297	REQUIRE(NS_CLIENT_VALID(client));
1298	REQUIRE(task == client->task);
1299
1300	INSIST(client->recursionquota == NULL);
1301
1302	INSIST(client->state ==
1303	       TCP_CLIENT(client) ?
1304	       NS_CLIENTSTATE_READING :
1305	       NS_CLIENTSTATE_READY);
1306
1307	ns_client_requests++;
1308
1309	if (event->ev_type == ISC_SOCKEVENT_RECVDONE) {
1310		INSIST(!TCP_CLIENT(client));
1311		sevent = (isc_socketevent_t *)event;
1312		REQUIRE(sevent == client->recvevent);
1313		isc_buffer_init(&tbuffer, sevent->region.base, sevent->n);
1314		isc_buffer_add(&tbuffer, sevent->n);
1315		buffer = &tbuffer;
1316		result = sevent->result;
1317		if (result == ISC_R_SUCCESS) {
1318			client->peeraddr = sevent->address;
1319			client->peeraddr_valid = ISC_TRUE;
1320		}
1321		if ((sevent->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0) {
1322			client->attributes |= NS_CLIENTATTR_PKTINFO;
1323			client->pktinfo = sevent->pktinfo;
1324		}
1325		if ((sevent->attributes & ISC_SOCKEVENTATTR_MULTICAST) != 0)
1326			client->attributes |= NS_CLIENTATTR_MULTICAST;
1327		client->nrecvs--;
1328	} else {
1329		INSIST(TCP_CLIENT(client));
1330		REQUIRE(event->ev_type == DNS_EVENT_TCPMSG);
1331		REQUIRE(event->ev_sender == &client->tcpmsg);
1332		buffer = &client->tcpmsg.buffer;
1333		result = client->tcpmsg.result;
1334		INSIST(client->nreads == 1);
1335		/*
1336		 * client->peeraddr was set when the connection was accepted.
1337		 */
1338		client->nreads--;
1339	}
1340
1341	if (exit_check(client))
1342		goto cleanup;
1343	client->state = client->newstate = NS_CLIENTSTATE_WORKING;
1344
1345	isc_task_getcurrenttime(task, &client->requesttime);
1346	client->now = client->requesttime;
1347
1348	if (result != ISC_R_SUCCESS) {
1349		if (TCP_CLIENT(client)) {
1350			ns_client_next(client, result);
1351		} else {
1352			if  (result != ISC_R_CANCELED)
1353				isc_log_write(ns_g_lctx, NS_LOGCATEGORY_CLIENT,
1354					      NS_LOGMODULE_CLIENT,
1355					      ISC_LOG_ERROR,
1356					      "UDP client handler shutting "
1357					      "down due to fatal receive "
1358					      "error: %s",
1359					      isc_result_totext(result));
1360			isc_task_shutdown(client->task);
1361		}
1362		goto cleanup;
1363	}
1364
1365	isc_netaddr_fromsockaddr(&netaddr, &client->peeraddr);
1366
1367#if NS_CLIENT_DROPPORT
1368	if (ns_client_dropport(isc_sockaddr_getport(&client->peeraddr)) ==
1369	    DROPPORT_REQUEST) {
1370		ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
1371			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10),
1372			      "dropped request: suspicious port");
1373		ns_client_next(client, ISC_R_SUCCESS);
1374		goto cleanup;
1375	}
1376#endif
1377
1378	ns_client_log(client, NS_LOGCATEGORY_CLIENT,
1379		      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3),
1380		      "%s request",
1381		      TCP_CLIENT(client) ? "TCP" : "UDP");
1382
1383	/*
1384	 * Check the blackhole ACL for UDP only, since TCP is done in
1385	 * client_newconn.
1386	 */
1387	if (!TCP_CLIENT(client)) {
1388
1389		if (ns_g_server->blackholeacl != NULL &&
1390		    dns_acl_match(&netaddr, NULL, ns_g_server->blackholeacl,
1391				  &ns_g_server->aclenv,
1392				  &match, NULL) == ISC_R_SUCCESS &&
1393		    match > 0)
1394		{
1395			ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
1396				      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10),
1397				      "blackholed UDP datagram");
1398			ns_client_next(client, ISC_R_SUCCESS);
1399			goto cleanup;
1400		}
1401	}
1402
1403	/*
1404	 * Silently drop multicast requests for the present.
1405	 * XXXMPA look at when/if mDNS spec stabilizes.
1406	 */
1407	if ((client->attributes & NS_CLIENTATTR_MULTICAST) != 0) {
1408		ns_client_log(client, NS_LOGCATEGORY_CLIENT,
1409			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(2),
1410			      "dropping multicast request");
1411		ns_client_next(client, DNS_R_REFUSED);
1412		goto cleanup;
1413	}
1414
1415	result = dns_message_peekheader(buffer, &id, &flags);
1416	if (result != ISC_R_SUCCESS) {
1417		/*
1418		 * There isn't enough header to determine whether
1419		 * this was a request or a response.  Drop it.
1420		 */
1421		ns_client_next(client, result);
1422		goto cleanup;
1423	}
1424
1425	/*
1426	 * The client object handles requests, not responses.
1427	 * If this is a UDP response, forward it to the dispatcher.
1428	 * If it's a TCP response, discard it here.
1429	 */
1430	if ((flags & DNS_MESSAGEFLAG_QR) != 0) {
1431		if (TCP_CLIENT(client)) {
1432			CTRACE("unexpected response");
1433			ns_client_next(client, DNS_R_FORMERR);
1434			goto cleanup;
1435		} else {
1436			dns_dispatch_importrecv(client->dispatch, event);
1437			ns_client_next(client, ISC_R_SUCCESS);
1438			goto cleanup;
1439		}
1440	}
1441
1442	/*
1443	 * It's a request.  Parse it.
1444	 */
1445	result = dns_message_parse(client->message, buffer, 0);
1446	if (result != ISC_R_SUCCESS) {
1447		/*
1448		 * Parsing the request failed.  Send a response
1449		 * (typically FORMERR or SERVFAIL).
1450		 */
1451		ns_client_error(client, result);
1452		goto cleanup;
1453	}
1454
1455	switch (client->message->opcode) {
1456	case dns_opcode_query:
1457	case dns_opcode_update:
1458	case dns_opcode_notify:
1459		notimp = ISC_FALSE;
1460		break;
1461	case dns_opcode_iquery:
1462	default:
1463		notimp = ISC_TRUE;
1464		break;
1465	}
1466
1467	client->message->rcode = dns_rcode_noerror;
1468
1469	/* RFC1123 section 6.1.3.2 */
1470	if ((client->attributes & NS_CLIENTATTR_MULTICAST) != 0)
1471		client->message->flags &= ~DNS_MESSAGEFLAG_RD;
1472
1473	/*
1474	 * Deal with EDNS.
1475	 */
1476	opt = dns_message_getopt(client->message);
1477	if (opt != NULL) {
1478		/*
1479		 * Set the client's UDP buffer size.
1480		 */
1481		client->udpsize = opt->rdclass;
1482
1483		/*
1484		 * If the requested UDP buffer size is less than 512,
1485		 * ignore it and use 512.
1486		 */
1487		if (client->udpsize < 512)
1488			client->udpsize = 512;
1489
1490		/*
1491		 * Get the flags out of the OPT record.
1492		 */
1493		client->extflags = (isc_uint16_t)(opt->ttl & 0xFFFF);
1494
1495		/*
1496		 * Do we understand this version of EDNS?
1497		 *
1498		 * XXXRTH need library support for this!
1499		 */
1500		client->ednsversion = (opt->ttl & 0x00FF0000) >> 16;
1501		if (client->ednsversion > 0) {
1502			result = client_addopt(client);
1503			if (result == ISC_R_SUCCESS)
1504				result = DNS_R_BADVERS;
1505			ns_client_error(client, result);
1506			goto cleanup;
1507		}
1508		/*
1509		 * Create an OPT for our reply.
1510		 */
1511		result = client_addopt(client);
1512		if (result != ISC_R_SUCCESS) {
1513			ns_client_error(client, result);
1514			goto cleanup;
1515		}
1516	}
1517
1518	if (client->message->rdclass == 0) {
1519		ns_client_log(client, NS_LOGCATEGORY_CLIENT,
1520			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(1),
1521			      "message class could not be determined");
1522		ns_client_dumpmessage(client,
1523				      "message class could not be determined");
1524		ns_client_error(client, notimp ? DNS_R_NOTIMP : DNS_R_FORMERR);
1525		goto cleanup;
1526	}
1527
1528	/*
1529	 * Determine the destination address.  If the receiving interface is
1530	 * bound to a specific address, we simply use it regardless of the
1531	 * address family.  All IPv4 queries should fall into this case.
1532	 * Otherwise, if this is a TCP query, get the address from the
1533	 * receiving socket (this needs a system call and can be heavy).
1534	 * For IPv6 UDP queries, we get this from the pktinfo structure (if
1535	 * supported).
1536	 * If all the attempts fail (this can happen due to memory shortage,
1537	 * etc), we regard this as an error for safety.
1538	 */
1539	if ((client->interface->flags & NS_INTERFACEFLAG_ANYADDR) == 0)
1540		isc_netaddr_fromsockaddr(&destaddr, &client->interface->addr);
1541	else {
1542		result = ISC_R_FAILURE;
1543
1544		if (TCP_CLIENT(client)) {
1545			isc_sockaddr_t destsockaddr;
1546
1547			result = isc_socket_getsockname(client->tcpsocket,
1548							&destsockaddr);
1549			if (result == ISC_R_SUCCESS)
1550				isc_netaddr_fromsockaddr(&destaddr,
1551							 &destsockaddr);
1552		}
1553		if (result != ISC_R_SUCCESS &&
1554		    client->interface->addr.type.sa.sa_family == AF_INET6 &&
1555		    (client->attributes & NS_CLIENTATTR_PKTINFO) != 0) {
1556			isc_uint32_t zone = 0;
1557
1558			/*
1559			 * XXXJT technically, we should convert the receiving
1560			 * interface ID to a proper scope zone ID.  However,
1561			 * due to the fact there is no standard API for this,
1562			 * we only handle link-local addresses and use the
1563			 * interface index as link ID.  Despite the assumption,
1564			 * it should cover most typical cases.
1565			 */
1566			if (IN6_IS_ADDR_LINKLOCAL(&client->pktinfo.ipi6_addr))
1567				zone = (isc_uint32_t)client->pktinfo.ipi6_ifindex;
1568
1569			isc_netaddr_fromin6(&destaddr,
1570					    &client->pktinfo.ipi6_addr);
1571			isc_netaddr_setzone(&destaddr, zone);
1572			result = ISC_R_SUCCESS;
1573		}
1574		if (result != ISC_R_SUCCESS) {
1575			UNEXPECTED_ERROR(__FILE__, __LINE__,
1576					 "failed to get request's "
1577					 "destination: %s",
1578					 isc_result_totext(result));
1579			ns_client_next(client, ISC_R_SUCCESS);
1580			goto cleanup;
1581		}
1582	}
1583
1584	/*
1585	 * Find a view that matches the client's source address.
1586	 */
1587	for (view = ISC_LIST_HEAD(ns_g_server->viewlist);
1588	     view != NULL;
1589	     view = ISC_LIST_NEXT(view, link)) {
1590		if (client->message->rdclass == view->rdclass ||
1591		    client->message->rdclass == dns_rdataclass_any)
1592		{
1593			dns_name_t *tsig = NULL;
1594			sigresult = dns_message_rechecksig(client->message,
1595							   view);
1596			if (sigresult == ISC_R_SUCCESS)
1597				tsig = client->message->tsigname;
1598
1599			if (allowed(&netaddr, tsig, view->matchclients) &&
1600			    allowed(&destaddr, tsig, view->matchdestinations) &&
1601			    !((client->message->flags & DNS_MESSAGEFLAG_RD)
1602			      == 0 && view->matchrecursiveonly))
1603			{
1604				dns_view_attach(view, &client->view);
1605				break;
1606			}
1607		}
1608	}
1609
1610	if (view == NULL) {
1611		char classname[DNS_RDATACLASS_FORMATSIZE];
1612
1613		/*
1614		 * Do a dummy TSIG verification attempt so that the
1615		 * response will have a TSIG if the query did, as
1616		 * required by RFC2845.
1617		 */
1618		isc_buffer_t b;
1619		isc_region_t *r;
1620
1621		dns_message_resetsig(client->message);
1622
1623		r = dns_message_getrawmessage(client->message);
1624		isc_buffer_init(&b, r->base, r->length);
1625		isc_buffer_add(&b, r->length);
1626		(void)dns_tsig_verify(&b, client->message, NULL, NULL);
1627
1628		dns_rdataclass_format(client->message->rdclass, classname,
1629				      sizeof(classname));
1630		ns_client_log(client, NS_LOGCATEGORY_CLIENT,
1631			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(1),
1632			      "no matching view in class '%s'", classname);
1633		ns_client_dumpmessage(client, "no matching view in class");
1634		ns_client_error(client, notimp ? DNS_R_NOTIMP : DNS_R_REFUSED);
1635		goto cleanup;
1636	}
1637
1638	ns_client_log(client, NS_LOGCATEGORY_CLIENT,
1639		      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(5),
1640		      "using view '%s'", view->name);
1641
1642	/*
1643	 * Check for a signature.  We log bad signatures regardless of
1644	 * whether they ultimately cause the request to be rejected or
1645	 * not.  We do not log the lack of a signature unless we are
1646	 * debugging.
1647	 */
1648	client->signer = NULL;
1649	dns_name_init(&client->signername, NULL);
1650	result = dns_message_signer(client->message, &client->signername);
1651	if (result == ISC_R_SUCCESS) {
1652		ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
1653			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3),
1654			      "request has valid signature");
1655		client->signer = &client->signername;
1656	} else if (result == ISC_R_NOTFOUND) {
1657		ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
1658			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3),
1659			      "request is not signed");
1660	} else if (result == DNS_R_NOIDENTITY) {
1661		ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
1662			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3),
1663			      "request is signed by a nonauthoritative key");
1664	} else {
1665		char tsigrcode[64];
1666		isc_buffer_t b;
1667		dns_name_t *name = NULL;
1668		dns_rcode_t status;
1669		isc_result_t tresult;
1670
1671		/* There is a signature, but it is bad. */
1672		if (dns_message_gettsig(client->message, &name) != NULL) {
1673			char namebuf[DNS_NAME_FORMATSIZE];
1674			dns_name_format(name, namebuf, sizeof(namebuf));
1675			status = client->message->tsigstatus;
1676			isc_buffer_init(&b, tsigrcode, sizeof(tsigrcode) - 1);
1677			tresult = dns_tsigrcode_totext(status, &b);
1678			INSIST(tresult == ISC_R_SUCCESS);
1679			tsigrcode[isc_buffer_usedlength(&b)] = '\0';
1680			ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
1681				      NS_LOGMODULE_CLIENT, ISC_LOG_ERROR,
1682				      "request has invalid signature: "
1683				      "TSIG %s: %s (%s)", namebuf,
1684				      isc_result_totext(result), tsigrcode);
1685		} else {
1686			status = client->message->sig0status;
1687			isc_buffer_init(&b, tsigrcode, sizeof(tsigrcode) - 1);
1688			tresult = dns_tsigrcode_totext(status, &b);
1689			INSIST(tresult == ISC_R_SUCCESS);
1690			tsigrcode[isc_buffer_usedlength(&b)] = '\0';
1691			ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
1692				      NS_LOGMODULE_CLIENT, ISC_LOG_ERROR,
1693				      "request has invalid signature: %s (%s)",
1694				      isc_result_totext(result), tsigrcode);
1695		}
1696		/*
1697		 * Accept update messages signed by unknown keys so that
1698		 * update forwarding works transparently through slaves
1699		 * that don't have all the same keys as the master.
1700		 */
1701		if (!(client->message->tsigstatus == dns_tsigerror_badkey &&
1702		      client->message->opcode == dns_opcode_update)) {
1703			ns_client_error(client, sigresult);
1704			goto cleanup;
1705		}
1706	}
1707
1708	/*
1709	 * Decide whether recursive service is available to this client.
1710	 * We do this here rather than in the query code so that we can
1711	 * set the RA bit correctly on all kinds of responses, not just
1712	 * responses to ordinary queries.  Note if you can't query the
1713	 * cache there is no point in setting RA.
1714	 */
1715	ra = ISC_FALSE;
1716	if (client->view->resolver != NULL &&
1717	    client->view->recursion == ISC_TRUE &&
1718	    ns_client_checkaclsilent(client, client->view->recursionacl,
1719				     ISC_TRUE) == ISC_R_SUCCESS &&
1720	    ns_client_checkaclsilent(client, client->view->queryacl,
1721				     ISC_TRUE) == ISC_R_SUCCESS)
1722		ra = ISC_TRUE;
1723
1724	if (ra == ISC_TRUE)
1725		client->attributes |= NS_CLIENTATTR_RA;
1726
1727	ns_client_log(client, DNS_LOGCATEGORY_SECURITY, NS_LOGMODULE_CLIENT,
1728		      ISC_LOG_DEBUG(3), ra ? "recursion available" :
1729		      			     "recursion not available");
1730
1731	/*
1732	 * Adjust maximum UDP response size for this client.
1733	 */
1734	if (client->udpsize > 512) {
1735		dns_peer_t *peer = NULL;
1736		isc_uint16_t udpsize = view->maxudp;
1737		(void) dns_peerlist_peerbyaddr(view->peers, &netaddr, &peer);
1738		if (peer != NULL)
1739			dns_peer_getmaxudp(peer, &udpsize);
1740		if (client->udpsize > udpsize)
1741			client->udpsize = udpsize;
1742	}
1743
1744	/*
1745	 * Dispatch the request.
1746	 */
1747	switch (client->message->opcode) {
1748	case dns_opcode_query:
1749		CTRACE("query");
1750		ns_query_start(client);
1751		break;
1752	case dns_opcode_update:
1753		CTRACE("update");
1754		ns_client_settimeout(client, 60);
1755		ns_update_start(client, sigresult);
1756		break;
1757	case dns_opcode_notify:
1758		CTRACE("notify");
1759		ns_client_settimeout(client, 60);
1760		ns_notify_start(client);
1761		break;
1762	case dns_opcode_iquery:
1763		CTRACE("iquery");
1764		ns_client_error(client, DNS_R_NOTIMP);
1765		break;
1766	default:
1767		CTRACE("unknown opcode");
1768		ns_client_error(client, DNS_R_NOTIMP);
1769	}
1770
1771 cleanup:
1772	return;
1773}
1774
1775static void
1776client_timeout(isc_task_t *task, isc_event_t *event) {
1777	ns_client_t *client;
1778
1779	REQUIRE(event != NULL);
1780	REQUIRE(event->ev_type == ISC_TIMEREVENT_LIFE ||
1781		event->ev_type == ISC_TIMEREVENT_IDLE);
1782	client = event->ev_arg;
1783	REQUIRE(NS_CLIENT_VALID(client));
1784	REQUIRE(task == client->task);
1785	REQUIRE(client->timer != NULL);
1786
1787	UNUSED(task);
1788
1789	CTRACE("timeout");
1790
1791	isc_event_free(&event);
1792
1793	if (client->shutdown != NULL) {
1794		(client->shutdown)(client->shutdown_arg, ISC_R_TIMEDOUT);
1795		client->shutdown = NULL;
1796		client->shutdown_arg = NULL;
1797	}
1798
1799	if (client->newstate > NS_CLIENTSTATE_READY)
1800		client->newstate = NS_CLIENTSTATE_READY;
1801	(void)exit_check(client);
1802}
1803
1804static isc_result_t
1805get_clientmctx(ns_clientmgr_t *manager, isc_mem_t **mctxp) {
1806	isc_mem_t *clientmctx;
1807#if NMCTXS > 0
1808	isc_result_t result;
1809#endif
1810
1811	/*
1812	 * Caller must be holding the manager lock.
1813	 */
1814#if NMCTXS > 0
1815	INSIST(manager->nextmctx < NMCTXS);
1816	clientmctx = manager->mctxpool[manager->nextmctx];
1817	if (clientmctx == NULL) {
1818		result = isc_mem_create(0, 0, &clientmctx);
1819		if (result != ISC_R_SUCCESS)
1820			return (result);
1821
1822		manager->mctxpool[manager->nextmctx] = clientmctx;
1823		manager->nextmctx++;
1824		if (manager->nextmctx == NMCTXS)
1825			manager->nextmctx = 0;
1826	}
1827#else
1828	clientmctx = manager->mctx;
1829#endif
1830
1831	isc_mem_attach(clientmctx, mctxp);
1832
1833	return (ISC_R_SUCCESS);
1834}
1835
1836static isc_result_t
1837client_create(ns_clientmgr_t *manager, ns_client_t **clientp) {
1838	ns_client_t *client;
1839	isc_result_t result;
1840	isc_mem_t *mctx = NULL;
1841
1842	/*
1843	 * Caller must be holding the manager lock.
1844	 *
1845	 * Note: creating a client does not add the client to the
1846	 * manager's client list or set the client's manager pointer.
1847	 * The caller is responsible for that.
1848	 */
1849
1850	REQUIRE(clientp != NULL && *clientp == NULL);
1851
1852	result = get_clientmctx(manager, &mctx);
1853	if (result != ISC_R_SUCCESS)
1854		return (result);
1855
1856	client = isc_mem_get(mctx, sizeof(*client));
1857	if (client == NULL) {
1858		isc_mem_detach(&mctx);
1859		return (ISC_R_NOMEMORY);
1860	}
1861	client->mctx = mctx;
1862
1863	client->task = NULL;
1864	result = isc_task_create(manager->taskmgr, 0, &client->task);
1865	if (result != ISC_R_SUCCESS)
1866		goto cleanup_client;
1867	isc_task_setname(client->task, "client", client);
1868
1869	client->timer = NULL;
1870	result = isc_timer_create(manager->timermgr, isc_timertype_inactive,
1871				  NULL, NULL, client->task, client_timeout,
1872				  client, &client->timer);
1873	if (result != ISC_R_SUCCESS)
1874		goto cleanup_task;
1875	client->timerset = ISC_FALSE;
1876
1877	client->message = NULL;
1878	result = dns_message_create(client->mctx, DNS_MESSAGE_INTENTPARSE,
1879				    &client->message);
1880	if (result != ISC_R_SUCCESS)
1881		goto cleanup_timer;
1882
1883	/* XXXRTH  Hardwired constants */
1884
1885	client->sendevent = (isc_socketevent_t *)
1886			    isc_event_allocate(client->mctx, client,
1887					       ISC_SOCKEVENT_SENDDONE,
1888					       client_senddone, client,
1889					       sizeof(isc_socketevent_t));
1890	if (client->sendevent == NULL) {
1891		result = ISC_R_NOMEMORY;
1892		goto cleanup_message;
1893	}
1894
1895	client->recvbuf = isc_mem_get(client->mctx, RECV_BUFFER_SIZE);
1896	if  (client->recvbuf == NULL) {
1897		result = ISC_R_NOMEMORY;
1898		goto cleanup_sendevent;
1899	}
1900
1901	client->recvevent = (isc_socketevent_t *)
1902			    isc_event_allocate(client->mctx, client,
1903					       ISC_SOCKEVENT_RECVDONE,
1904					       client_request, client,
1905					       sizeof(isc_socketevent_t));
1906	if (client->recvevent == NULL) {
1907		result = ISC_R_NOMEMORY;
1908		goto cleanup_recvbuf;
1909	}
1910
1911	client->magic = NS_CLIENT_MAGIC;
1912	client->manager = NULL;
1913	client->state = NS_CLIENTSTATE_INACTIVE;
1914	client->newstate = NS_CLIENTSTATE_MAX;
1915	client->naccepts = 0;
1916	client->nreads = 0;
1917	client->nsends = 0;
1918	client->nrecvs = 0;
1919	client->nupdates = 0;
1920	client->nctls = 0;
1921	client->references = 0;
1922	client->attributes = 0;
1923	client->view = NULL;
1924	client->dispatch = NULL;
1925	client->udpsocket = NULL;
1926	client->tcplistener = NULL;
1927	client->tcpsocket = NULL;
1928	client->tcpmsg_valid = ISC_FALSE;
1929	client->tcpbuf = NULL;
1930	client->opt = NULL;
1931	client->udpsize = 512;
1932	client->extflags = 0;
1933	client->ednsversion = -1;
1934	client->next = NULL;
1935	client->shutdown = NULL;
1936	client->shutdown_arg = NULL;
1937	dns_name_init(&client->signername, NULL);
1938	client->mortal = ISC_FALSE;
1939	client->tcpquota = NULL;
1940	client->recursionquota = NULL;
1941	client->interface = NULL;
1942	client->peeraddr_valid = ISC_FALSE;
1943	ISC_EVENT_INIT(&client->ctlevent, sizeof(client->ctlevent), 0, NULL,
1944		       NS_EVENT_CLIENTCONTROL, client_start, client, client,
1945		       NULL, NULL);
1946	/*
1947	 * Initialize FORMERR cache to sentinel value that will not match
1948	 * any actual FORMERR response.
1949	 */
1950	isc_sockaddr_any(&client->formerrcache.addr);
1951	client->formerrcache.time = 0;
1952	client->formerrcache.id = 0;
1953	ISC_LINK_INIT(client, link);
1954	client->list = NULL;
1955
1956	/*
1957	 * We call the init routines for the various kinds of client here,
1958	 * after we have created an otherwise valid client, because some
1959	 * of them call routines that REQUIRE(NS_CLIENT_VALID(client)).
1960	 */
1961	result = ns_query_init(client);
1962	if (result != ISC_R_SUCCESS)
1963		goto cleanup_recvevent;
1964
1965	result = isc_task_onshutdown(client->task, client_shutdown, client);
1966	if (result != ISC_R_SUCCESS)
1967		goto cleanup_query;
1968
1969	CTRACE("create");
1970
1971	*clientp = client;
1972
1973	return (ISC_R_SUCCESS);
1974
1975 cleanup_query:
1976	ns_query_free(client);
1977
1978 cleanup_recvevent:
1979	isc_event_free((isc_event_t **)&client->recvevent);
1980
1981 cleanup_recvbuf:
1982	isc_mem_put(client->mctx, client->recvbuf, RECV_BUFFER_SIZE);
1983
1984 cleanup_sendevent:
1985	isc_event_free((isc_event_t **)&client->sendevent);
1986
1987	client->magic = 0;
1988
1989 cleanup_message:
1990	dns_message_destroy(&client->message);
1991
1992 cleanup_timer:
1993	isc_timer_detach(&client->timer);
1994
1995 cleanup_task:
1996	isc_task_detach(&client->task);
1997
1998 cleanup_client:
1999	isc_mem_putanddetach(&client->mctx, client, sizeof(*client));
2000
2001	return (result);
2002}
2003
2004static void
2005client_read(ns_client_t *client) {
2006	isc_result_t result;
2007
2008	CTRACE("read");
2009
2010	result = dns_tcpmsg_readmessage(&client->tcpmsg, client->task,
2011					client_request, client);
2012	if (result != ISC_R_SUCCESS)
2013		goto fail;
2014
2015	/*
2016	 * Set a timeout to limit the amount of time we will wait
2017	 * for a request on this TCP connection.
2018	 */
2019	ns_client_settimeout(client, 30);
2020
2021	client->state = client->newstate = NS_CLIENTSTATE_READING;
2022	INSIST(client->nreads == 0);
2023	INSIST(client->recursionquota == NULL);
2024	client->nreads++;
2025
2026	return;
2027 fail:
2028	ns_client_next(client, result);
2029}
2030
2031static void
2032client_newconn(isc_task_t *task, isc_event_t *event) {
2033	ns_client_t *client = event->ev_arg;
2034	isc_socket_newconnev_t *nevent = (isc_socket_newconnev_t *)event;
2035	isc_result_t result;
2036
2037	REQUIRE(event->ev_type == ISC_SOCKEVENT_NEWCONN);
2038	REQUIRE(NS_CLIENT_VALID(client));
2039	REQUIRE(client->task == task);
2040
2041	UNUSED(task);
2042
2043	INSIST(client->state == NS_CLIENTSTATE_READY);
2044
2045	INSIST(client->naccepts == 1);
2046	client->naccepts--;
2047
2048	LOCK(&client->interface->lock);
2049	INSIST(client->interface->ntcpcurrent > 0);
2050	client->interface->ntcpcurrent--;
2051	UNLOCK(&client->interface->lock);
2052
2053	/*
2054	 * We must take ownership of the new socket before the exit
2055	 * check to make sure it gets destroyed if we decide to exit.
2056	 */
2057	if (nevent->result == ISC_R_SUCCESS) {
2058		client->tcpsocket = nevent->newsocket;
2059		client->state = NS_CLIENTSTATE_READING;
2060		INSIST(client->recursionquota == NULL);
2061
2062		(void)isc_socket_getpeername(client->tcpsocket,
2063					     &client->peeraddr);
2064		client->peeraddr_valid = ISC_TRUE;
2065		ns_client_log(client, NS_LOGCATEGORY_CLIENT,
2066			   NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3),
2067			   "new TCP connection");
2068	} else {
2069		/*
2070		 * XXXRTH  What should we do?  We're trying to accept but
2071		 *         it didn't work.  If we just give up, then TCP
2072		 *	   service may eventually stop.
2073		 *
2074		 *	   For now, we just go idle.
2075		 *
2076		 *	   Going idle is probably the right thing if the
2077		 *	   I/O was canceled.
2078		 */
2079		ns_client_log(client, NS_LOGCATEGORY_CLIENT,
2080			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3),
2081			      "accept failed: %s",
2082			      isc_result_totext(nevent->result));
2083	}
2084
2085	if (exit_check(client))
2086		goto freeevent;
2087
2088	if (nevent->result == ISC_R_SUCCESS) {
2089		int match;
2090		isc_netaddr_t netaddr;
2091
2092		isc_netaddr_fromsockaddr(&netaddr, &client->peeraddr);
2093
2094		if (ns_g_server->blackholeacl != NULL &&
2095		    dns_acl_match(&netaddr, NULL,
2096			    	  ns_g_server->blackholeacl,
2097				  &ns_g_server->aclenv,
2098				  &match, NULL) == ISC_R_SUCCESS &&
2099		    match > 0)
2100		{
2101			ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
2102				      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10),
2103				      "blackholed connection attempt");
2104			client->newstate = NS_CLIENTSTATE_READY;
2105			(void)exit_check(client);
2106			goto freeevent;
2107		}
2108
2109		INSIST(client->tcpmsg_valid == ISC_FALSE);
2110		dns_tcpmsg_init(client->mctx, client->tcpsocket,
2111				&client->tcpmsg);
2112		client->tcpmsg_valid = ISC_TRUE;
2113
2114		/*
2115		 * Let a new client take our place immediately, before
2116		 * we wait for a request packet.  If we don't,
2117		 * telnetting to port 53 (once per CPU) will
2118		 * deny service to legititmate TCP clients.
2119		 */
2120		result = isc_quota_attach(&ns_g_server->tcpquota,
2121					  &client->tcpquota);
2122		if (result == ISC_R_SUCCESS)
2123			result = ns_client_replace(client);
2124		if (result != ISC_R_SUCCESS) {
2125			ns_client_log(client, NS_LOGCATEGORY_CLIENT,
2126				      NS_LOGMODULE_CLIENT, ISC_LOG_WARNING,
2127				      "no more TCP clients: %s",
2128				      isc_result_totext(result));
2129		}
2130
2131		client_read(client);
2132	}
2133
2134 freeevent:
2135	isc_event_free(&event);
2136}
2137
2138static void
2139client_accept(ns_client_t *client) {
2140	isc_result_t result;
2141
2142	CTRACE("accept");
2143
2144	result = isc_socket_accept(client->tcplistener, client->task,
2145				   client_newconn, client);
2146	if (result != ISC_R_SUCCESS) {
2147		UNEXPECTED_ERROR(__FILE__, __LINE__,
2148				 "isc_socket_accept() failed: %s",
2149				 isc_result_totext(result));
2150		/*
2151		 * XXXRTH  What should we do?  We're trying to accept but
2152		 *         it didn't work.  If we just give up, then TCP
2153		 *	   service may eventually stop.
2154		 *
2155		 *	   For now, we just go idle.
2156		 */
2157		return;
2158	}
2159	INSIST(client->naccepts == 0);
2160	client->naccepts++;
2161	LOCK(&client->interface->lock);
2162	client->interface->ntcpcurrent++;
2163	UNLOCK(&client->interface->lock);
2164}
2165
2166static void
2167client_udprecv(ns_client_t *client) {
2168	isc_result_t result;
2169	isc_region_t r;
2170
2171	CTRACE("udprecv");
2172
2173	r.base = client->recvbuf;
2174	r.length = RECV_BUFFER_SIZE;
2175	result = isc_socket_recv2(client->udpsocket, &r, 1,
2176				  client->task, client->recvevent, 0);
2177	if (result != ISC_R_SUCCESS) {
2178		UNEXPECTED_ERROR(__FILE__, __LINE__,
2179				 "isc_socket_recv2() failed: %s",
2180				 isc_result_totext(result));
2181		/*
2182		 * This cannot happen in the current implementation, since
2183		 * isc_socket_recv2() cannot fail if flags == 0.
2184		 *
2185		 * If this does fail, we just go idle.
2186		 */
2187		return;
2188	}
2189	INSIST(client->nrecvs == 0);
2190	client->nrecvs++;
2191}
2192
2193void
2194ns_client_attach(ns_client_t *source, ns_client_t **targetp) {
2195	REQUIRE(NS_CLIENT_VALID(source));
2196	REQUIRE(targetp != NULL && *targetp == NULL);
2197
2198	source->references++;
2199	ns_client_log(source, NS_LOGCATEGORY_CLIENT,
2200		      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10),
2201		      "ns_client_attach: ref = %d", source->references);
2202	*targetp = source;
2203}
2204
2205void
2206ns_client_detach(ns_client_t **clientp) {
2207	ns_client_t *client = *clientp;
2208
2209	client->references--;
2210	INSIST(client->references >= 0);
2211	*clientp = NULL;
2212	ns_client_log(client, NS_LOGCATEGORY_CLIENT,
2213		      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(10),
2214		      "ns_client_detach: ref = %d", client->references);
2215	(void)exit_check(client);
2216}
2217
2218isc_boolean_t
2219ns_client_shuttingdown(ns_client_t *client) {
2220	return (ISC_TF(client->newstate == NS_CLIENTSTATE_FREED));
2221}
2222
2223isc_result_t
2224ns_client_replace(ns_client_t *client) {
2225	isc_result_t result;
2226
2227	CTRACE("replace");
2228
2229	result = ns_clientmgr_createclients(client->manager,
2230					    1, client->interface,
2231					    (TCP_CLIENT(client) ?
2232					     ISC_TRUE : ISC_FALSE));
2233	if (result != ISC_R_SUCCESS)
2234		return (result);
2235
2236	/*
2237	 * The responsibility for listening for new requests is hereby
2238	 * transferred to the new client.  Therefore, the old client
2239	 * should refrain from listening for any more requests.
2240	 */
2241	client->mortal = ISC_TRUE;
2242
2243	return (ISC_R_SUCCESS);
2244}
2245
2246/***
2247 *** Client Manager
2248 ***/
2249
2250static void
2251clientmgr_destroy(ns_clientmgr_t *manager) {
2252#if NMCTXS > 0
2253	int i;
2254#endif
2255
2256	REQUIRE(ISC_LIST_EMPTY(manager->active));
2257	REQUIRE(ISC_LIST_EMPTY(manager->inactive));
2258	REQUIRE(ISC_LIST_EMPTY(manager->recursing));
2259
2260	MTRACE("clientmgr_destroy");
2261
2262#if NMCTXS > 0
2263	for (i = 0; i < NMCTXS; i++) {
2264		if (manager->mctxpool[i] != NULL)
2265			isc_mem_detach(&manager->mctxpool[i]);
2266	}
2267#endif
2268
2269	DESTROYLOCK(&manager->lock);
2270	manager->magic = 0;
2271	isc_mem_put(manager->mctx, manager, sizeof(*manager));
2272}
2273
2274isc_result_t
2275ns_clientmgr_create(isc_mem_t *mctx, isc_taskmgr_t *taskmgr,
2276		    isc_timermgr_t *timermgr, ns_clientmgr_t **managerp)
2277{
2278	ns_clientmgr_t *manager;
2279	isc_result_t result;
2280#if NMCTXS > 0
2281	int i;
2282#endif
2283
2284	manager = isc_mem_get(mctx, sizeof(*manager));
2285	if (manager == NULL)
2286		return (ISC_R_NOMEMORY);
2287
2288	result = isc_mutex_init(&manager->lock);
2289	if (result != ISC_R_SUCCESS)
2290		goto cleanup_manager;
2291
2292	manager->mctx = mctx;
2293	manager->taskmgr = taskmgr;
2294	manager->timermgr = timermgr;
2295	manager->exiting = ISC_FALSE;
2296	ISC_LIST_INIT(manager->active);
2297	ISC_LIST_INIT(manager->inactive);
2298	ISC_LIST_INIT(manager->recursing);
2299#if NMCTXS > 0
2300	manager->nextmctx = 0;
2301	for (i = 0; i < NMCTXS; i++)
2302		manager->mctxpool[i] = NULL; /* will be created on-demand */
2303#endif
2304	manager->magic = MANAGER_MAGIC;
2305
2306	MTRACE("create");
2307
2308	*managerp = manager;
2309
2310	return (ISC_R_SUCCESS);
2311
2312 cleanup_manager:
2313	isc_mem_put(manager->mctx, manager, sizeof(*manager));
2314
2315	return (result);
2316}
2317
2318void
2319ns_clientmgr_destroy(ns_clientmgr_t **managerp) {
2320	ns_clientmgr_t *manager;
2321	ns_client_t *client;
2322	isc_boolean_t need_destroy = ISC_FALSE;
2323
2324	REQUIRE(managerp != NULL);
2325	manager = *managerp;
2326	REQUIRE(VALID_MANAGER(manager));
2327
2328	MTRACE("destroy");
2329
2330	LOCK(&manager->lock);
2331
2332	manager->exiting = ISC_TRUE;
2333
2334	for (client = ISC_LIST_HEAD(manager->recursing);
2335	     client != NULL;
2336	     client = ISC_LIST_NEXT(client, link))
2337		isc_task_shutdown(client->task);
2338
2339	for (client = ISC_LIST_HEAD(manager->active);
2340	     client != NULL;
2341	     client = ISC_LIST_NEXT(client, link))
2342		isc_task_shutdown(client->task);
2343
2344	for (client = ISC_LIST_HEAD(manager->inactive);
2345	     client != NULL;
2346	     client = ISC_LIST_NEXT(client, link))
2347		isc_task_shutdown(client->task);
2348
2349	if (ISC_LIST_EMPTY(manager->active) &&
2350	    ISC_LIST_EMPTY(manager->inactive) &&
2351	    ISC_LIST_EMPTY(manager->recursing))
2352		need_destroy = ISC_TRUE;
2353
2354	UNLOCK(&manager->lock);
2355
2356	if (need_destroy)
2357		clientmgr_destroy(manager);
2358
2359	*managerp = NULL;
2360}
2361
2362isc_result_t
2363ns_clientmgr_createclients(ns_clientmgr_t *manager, unsigned int n,
2364			   ns_interface_t *ifp, isc_boolean_t tcp)
2365{
2366	isc_result_t result = ISC_R_SUCCESS;
2367	unsigned int i;
2368	ns_client_t *client;
2369
2370	REQUIRE(VALID_MANAGER(manager));
2371	REQUIRE(n > 0);
2372
2373	MTRACE("createclients");
2374
2375	/*
2376	 * We MUST lock the manager lock for the entire client creation
2377	 * process.  If we didn't do this, then a client could get a
2378	 * shutdown event and disappear out from under us.
2379	 */
2380
2381	LOCK(&manager->lock);
2382
2383	for (i = 0; i < n; i++) {
2384		isc_event_t *ev;
2385		/*
2386		 * Allocate a client.  First try to get a recycled one;
2387		 * if that fails, make a new one.
2388		 */
2389		client = ISC_LIST_HEAD(manager->inactive);
2390		if (client != NULL) {
2391			MTRACE("recycle");
2392			ISC_LIST_UNLINK(manager->inactive, client, link);
2393			client->list = NULL;
2394		} else {
2395			MTRACE("create new");
2396			result = client_create(manager, &client);
2397			if (result != ISC_R_SUCCESS)
2398				break;
2399		}
2400
2401		ns_interface_attach(ifp, &client->interface);
2402		client->state = NS_CLIENTSTATE_READY;
2403		INSIST(client->recursionquota == NULL);
2404
2405		if (tcp) {
2406			client->attributes |= NS_CLIENTATTR_TCP;
2407			isc_socket_attach(ifp->tcpsocket,
2408					  &client->tcplistener);
2409		} else {
2410			isc_socket_t *sock;
2411
2412			dns_dispatch_attach(ifp->udpdispatch,
2413					    &client->dispatch);
2414			sock = dns_dispatch_getsocket(client->dispatch);
2415			isc_socket_attach(sock, &client->udpsocket);
2416		}
2417		client->manager = manager;
2418		ISC_LIST_APPEND(manager->active, client, link);
2419		client->list = &manager->active;
2420
2421		INSIST(client->nctls == 0);
2422		client->nctls++;
2423		ev = &client->ctlevent;
2424		isc_task_send(client->task, &ev);
2425	}
2426	if (i != 0) {
2427		/*
2428		 * We managed to create at least one client, so we
2429		 * declare victory.
2430		 */
2431		result = ISC_R_SUCCESS;
2432	}
2433
2434	UNLOCK(&manager->lock);
2435
2436	return (result);
2437}
2438
2439isc_sockaddr_t *
2440ns_client_getsockaddr(ns_client_t *client) {
2441	return (&client->peeraddr);
2442}
2443
2444isc_result_t
2445ns_client_checkaclsilent(ns_client_t *client, dns_acl_t *acl,
2446			 isc_boolean_t default_allow)
2447{
2448	isc_result_t result;
2449	int match;
2450	isc_netaddr_t netaddr;
2451
2452	if (acl == NULL) {
2453		if (default_allow)
2454			goto allow;
2455		else
2456			goto deny;
2457	}
2458
2459	isc_netaddr_fromsockaddr(&netaddr, &client->peeraddr);
2460
2461	result = dns_acl_match(&netaddr, client->signer, acl,
2462			       &ns_g_server->aclenv,
2463			       &match, NULL);
2464	if (result != ISC_R_SUCCESS)
2465		goto deny; /* Internal error, already logged. */
2466	if (match > 0)
2467		goto allow;
2468	goto deny; /* Negative match or no match. */
2469
2470 allow:
2471	return (ISC_R_SUCCESS);
2472
2473 deny:
2474	return (DNS_R_REFUSED);
2475}
2476
2477isc_result_t
2478ns_client_checkacl(ns_client_t *client,
2479		   const char *opname, dns_acl_t *acl,
2480		   isc_boolean_t default_allow, int log_level)
2481{
2482	isc_result_t result =
2483		ns_client_checkaclsilent(client, acl, default_allow);
2484
2485	if (result == ISC_R_SUCCESS)
2486		ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
2487			      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3),
2488			      "%s approved", opname);
2489	else
2490		ns_client_log(client, DNS_LOGCATEGORY_SECURITY,
2491			      NS_LOGMODULE_CLIENT,
2492			      log_level, "%s denied", opname);
2493	return (result);
2494}
2495
2496static void
2497ns_client_name(ns_client_t *client, char *peerbuf, size_t len) {
2498	if (client->peeraddr_valid)
2499		isc_sockaddr_format(&client->peeraddr, peerbuf, len);
2500	else
2501		snprintf(peerbuf, len, "@%p", client);
2502}
2503
2504void
2505ns_client_logv(ns_client_t *client, isc_logcategory_t *category,
2506	   isc_logmodule_t *module, int level, const char *fmt, va_list ap)
2507{
2508	char msgbuf[2048];
2509	char peerbuf[ISC_SOCKADDR_FORMATSIZE];
2510	const char *name = "";
2511	const char *sep = "";
2512
2513	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
2514	ns_client_name(client, peerbuf, sizeof(peerbuf));
2515	if (client->view != NULL && strcmp(client->view->name, "_bind") != 0 &&
2516	    strcmp(client->view->name, "_default") != 0) {
2517		name = client->view->name;
2518		sep = ": view ";
2519	}
2520
2521	isc_log_write(ns_g_lctx, category, module, level,
2522		      "client %s%s%s: %s", peerbuf, sep, name, msgbuf);
2523}
2524
2525void
2526ns_client_log(ns_client_t *client, isc_logcategory_t *category,
2527	   isc_logmodule_t *module, int level, const char *fmt, ...)
2528{
2529	va_list ap;
2530
2531	if (! isc_log_wouldlog(ns_g_lctx, level))
2532		return;
2533
2534	va_start(ap, fmt);
2535	ns_client_logv(client, category, module, level, fmt, ap);
2536	va_end(ap);
2537}
2538
2539void
2540ns_client_aclmsg(const char *msg, dns_name_t *name, dns_rdatatype_t type,
2541		 dns_rdataclass_t rdclass, char *buf, size_t len)
2542{
2543        char namebuf[DNS_NAME_FORMATSIZE];
2544        char typebuf[DNS_RDATATYPE_FORMATSIZE];
2545        char classbuf[DNS_RDATACLASS_FORMATSIZE];
2546
2547        dns_name_format(name, namebuf, sizeof(namebuf));
2548        dns_rdatatype_format(type, typebuf, sizeof(typebuf));
2549        dns_rdataclass_format(rdclass, classbuf, sizeof(classbuf));
2550        (void)snprintf(buf, len, "%s '%s/%s/%s'", msg, namebuf, typebuf,
2551		       classbuf);
2552}
2553
2554static void
2555ns_client_dumpmessage(ns_client_t *client, const char *reason) {
2556	isc_buffer_t buffer;
2557	char *buf = NULL;
2558	int len = 1024;
2559	isc_result_t result;
2560
2561	/*
2562	 * Note that these are multiline debug messages.  We want a newline
2563	 * to appear in the log after each message.
2564	 */
2565
2566	do {
2567		buf = isc_mem_get(client->mctx, len);
2568		if (buf == NULL)
2569			break;
2570		isc_buffer_init(&buffer, buf, len);
2571		result = dns_message_totext(client->message,
2572					    &dns_master_style_debug,
2573					    0, &buffer);
2574		if (result == ISC_R_NOSPACE) {
2575			isc_mem_put(client->mctx, buf, len);
2576			len += 1024;
2577		} else if (result == ISC_R_SUCCESS)
2578		        ns_client_log(client, NS_LOGCATEGORY_UNMATCHED,
2579				      NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(1),
2580				      "%s\n%.*s", reason,
2581				       (int)isc_buffer_usedlength(&buffer),
2582				       buf);
2583	} while (result == ISC_R_NOSPACE);
2584
2585	if (buf != NULL)
2586		isc_mem_put(client->mctx, buf, len);
2587}
2588
2589void
2590ns_client_dumprecursing(FILE *f, ns_clientmgr_t *manager) {
2591	ns_client_t *client;
2592	char namebuf[DNS_NAME_FORMATSIZE];
2593	char peerbuf[ISC_SOCKADDR_FORMATSIZE];
2594	const char *name;
2595	const char *sep;
2596
2597	REQUIRE(VALID_MANAGER(manager));
2598
2599	LOCK(&manager->lock);
2600	client = ISC_LIST_HEAD(manager->recursing);
2601	while (client != NULL) {
2602		ns_client_name(client, peerbuf, sizeof(peerbuf));
2603		if (client->view != NULL &&
2604		    strcmp(client->view->name, "_bind") != 0 &&
2605		    strcmp(client->view->name, "_default") != 0) {
2606			name = client->view->name;
2607			sep = ": view ";
2608		} else {
2609			name = "";
2610			sep = "";
2611		}
2612		dns_name_format(client->query.qname, namebuf, sizeof(namebuf));
2613		fprintf(f, "; client %s%s%s: '%s' requesttime %d\n",
2614			peerbuf, sep, name, namebuf, client->requesttime);
2615		client = ISC_LIST_NEXT(client, link);
2616	}
2617	UNLOCK(&manager->lock);
2618}
2619
2620void
2621ns_client_qnamereplace(ns_client_t *client, dns_name_t *name) {
2622
2623	if (client->manager != NULL)
2624		LOCK(&client->manager->lock);
2625	if (client->query.restarts > 0) {
2626		/*
2627		 * client->query.qname was dynamically allocated.
2628		 */
2629		dns_message_puttempname(client->message,
2630					&client->query.qname);
2631	}
2632	client->query.qname = name;
2633	if (client->manager != NULL)
2634		UNLOCK(&client->manager->lock);
2635}
2636