ntp_intres.c revision 275970
1/*
2 * ntp_intres.c - Implements a generic blocking worker child or thread,
3 *		  initially to provide a nonblocking solution for DNS
4 *		  name to address lookups available with getaddrinfo().
5 *
6 * This is a new implementation as of 2009 sharing the filename and
7 * very little else with the prior implementation, which used a
8 * temporary file to receive a single set of requests from the parent,
9 * and a NTP mode 7 authenticated request to push back responses.
10 *
11 * A primary goal in rewriting this code was the need to support the
12 * pool configuration directive's requirement to retrieve multiple
13 * addresses resolving a single name, which has previously been
14 * satisfied with blocking resolver calls from the ntpd mainline code.
15 *
16 * A secondary goal is to provide a generic mechanism for other
17 * blocking operations to be delegated to a worker using a common
18 * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
19 * and work_thread.c implement the generic mechanism.  This file
20 * implements the two current consumers, getaddrinfo_sometime() and the
21 * presently unused getnameinfo_sometime().
22 *
23 * Both routines deliver results to a callback and manage memory
24 * allocation, meaning there is no freeaddrinfo_sometime().
25 *
26 * The initial implementation for Unix uses a pair of unidirectional
27 * pipes, one each for requests and responses, connecting the forked
28 * blocking child worker with the ntpd mainline.  The threaded code
29 * uses arrays of pointers to queue requests and responses.
30 *
31 * The parent drives the process, including scheduling sleeps between
32 * retries.
33 *
34 * Memory is managed differently for a child process, which mallocs
35 * request buffers to read from the pipe into, whereas the threaded
36 * code mallocs a copy of the request to hand off to the worker via
37 * the queueing array.  The resulting request buffer is free()d by
38 * platform-independent code.  A wrinkle is the request needs to be
39 * available to the requestor during response processing.
40 *
41 * Response memory allocation is also platform-dependent.  With a
42 * separate process and pipes, the response is free()d after being
43 * written to the pipe.  With threads, the same memory is handed
44 * over and the requestor frees it after processing is completed.
45 *
46 * The code should be generalized to support threads on Unix using
47 * much of the same code used for Windows initially.
48 *
49 */
50#ifdef HAVE_CONFIG_H
51# include <config.h>
52#endif
53
54#include "ntp_workimpl.h"
55
56#ifdef WORKER
57
58#include <stdio.h>
59#include <ctype.h>
60#include <signal.h>
61
62/**/
63#ifdef HAVE_SYS_TYPES_H
64# include <sys/types.h>
65#endif
66#ifdef HAVE_NETINET_IN_H
67#include <netinet/in.h>
68#endif
69#include <arpa/inet.h>
70/**/
71#ifdef HAVE_SYS_PARAM_H
72# include <sys/param.h>
73#endif
74
75#if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
76# define HAVE_RES_INIT
77#endif
78
79#if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
80# ifdef HAVE_ARPA_NAMESER_H
81#  include <arpa/nameser.h> /* DNS HEADER struct */
82# endif
83# ifdef HAVE_NETDB_H
84#  include <netdb.h>
85# endif
86# include <resolv.h>
87# ifdef HAVE_INT32_ONLY_WITH_DNS
88#  define HAVE_INT32
89# endif
90# ifdef HAVE_U_INT32_ONLY_WITH_DNS
91#  define HAVE_U_INT32
92# endif
93#endif
94
95#include "ntp.h"
96#include "ntp_debug.h"
97#include "ntp_malloc.h"
98#include "ntp_syslog.h"
99#include "ntp_unixtime.h"
100#include "ntp_intres.h"
101#include "intreswork.h"
102
103
104/*
105 * Following are implementations of getaddrinfo_sometime() and
106 * getnameinfo_sometime().  Each is implemented in three routines:
107 *
108 * getaddrinfo_sometime()		getnameinfo_sometime()
109 * blocking_getaddrinfo()		blocking_getnameinfo()
110 * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
111 *
112 * The first runs in the parent and marshalls (or serializes) request
113 * parameters into a request blob which is processed in the child by
114 * the second routine, blocking_*(), which serializes the results into
115 * a response blob unpacked by the third routine, *_complete(), which
116 * calls the callback routine provided with the request and frees
117 * _request_ memory allocated by the first routine.  Response memory
118 * is managed by the code which calls the *_complete routines.
119 */
120
121/* === typedefs === */
122typedef struct blocking_gai_req_tag {	/* marshalled args */
123	size_t			octets;
124	u_int			dns_idx;
125	time_t			scheduled;
126	time_t			earliest;
127	struct addrinfo		hints;
128	int			retry;
129	gai_sometime_callback	callback;
130	void *			context;
131	size_t			nodesize;
132	size_t			servsize;
133} blocking_gai_req;
134
135typedef struct blocking_gai_resp_tag {
136	size_t			octets;
137	int			retcode;
138	int			retry;
139	int			gai_errno; /* for EAI_SYSTEM case */
140	int			ai_count;
141	/*
142	 * Followed by ai_count struct addrinfo and then ai_count
143	 * sockaddr_u and finally the canonical name strings.
144	 */
145} blocking_gai_resp;
146
147typedef struct blocking_gni_req_tag {
148	size_t			octets;
149	u_int			dns_idx;
150	time_t			scheduled;
151	time_t			earliest;
152	int			retry;
153	size_t			hostoctets;
154	size_t			servoctets;
155	int			flags;
156	gni_sometime_callback	callback;
157	void *			context;
158	sockaddr_u		socku;
159} blocking_gni_req;
160
161typedef struct blocking_gni_resp_tag {
162	size_t			octets;
163	int			retcode;
164	int			gni_errno; /* for EAI_SYSTEM case */
165	int			retry;
166	size_t			hostoctets;
167	size_t			servoctets;
168	/*
169	 * Followed by hostoctets bytes of null-terminated host,
170	 * then servoctets bytes of null-terminated service.
171	 */
172} blocking_gni_resp;
173
174/* per-DNS-worker state in parent */
175typedef struct dnschild_ctx_tag {
176	u_int	index;
177	time_t	next_dns_timeslot;
178} dnschild_ctx;
179
180/* per-DNS-worker state in worker */
181typedef struct dnsworker_ctx_tag {
182	blocking_child *	c;
183	time_t			ignore_scheduled_before;
184#ifdef HAVE_RES_INIT
185	time_t	next_res_init;
186#endif
187} dnsworker_ctx;
188
189
190/* === variables === */
191dnschild_ctx **		dnschild_contexts;		/* parent */
192u_int			dnschild_contexts_alloc;
193dnsworker_ctx **	dnsworker_contexts;		/* child */
194u_int			dnsworker_contexts_alloc;
195
196#ifdef HAVE_RES_INIT
197static	time_t		next_res_init;
198#endif
199
200
201/* === forward declarations === */
202static	u_int		reserve_dnschild_ctx(void);
203static	u_int		get_dnschild_ctx(void);
204static	void		alloc_dnsworker_context(u_int);
205/* static	void		free_dnsworker_context(u_int); */
206static	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
207static	void		scheduled_sleep(time_t, time_t,
208					dnsworker_ctx *);
209static	void		manage_dns_retry_interval(time_t *, time_t *,
210						  int *,
211						  time_t *);
212static	int		should_retry_dns(int, int);
213#ifdef HAVE_RES_INIT
214static	void		reload_resolv_conf(dnsworker_ctx *);
215#else
216# define		reload_resolv_conf(wc)		\
217	do {						\
218		(void)(wc);				\
219	} while (FALSE)
220#endif
221static	void		getaddrinfo_sometime_complete(blocking_work_req,
222						      void *, size_t,
223						      void *);
224static	void		getnameinfo_sometime_complete(blocking_work_req,
225						      void *, size_t,
226						      void *);
227
228
229/* === functions === */
230/*
231 * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
232 *			  invokes provided callback completion function.
233 */
234int
235getaddrinfo_sometime(
236	const char *		node,
237	const char *		service,
238	const struct addrinfo *	hints,
239	int			retry,
240	gai_sometime_callback	callback,
241	void *			context
242	)
243{
244	blocking_gai_req *	gai_req;
245	u_int			idx;
246	dnschild_ctx *		child_ctx;
247	size_t			req_size;
248	size_t			nodesize;
249	size_t			servsize;
250	time_t			now;
251
252	NTP_REQUIRE(NULL != node);
253	if (NULL != hints) {
254		NTP_REQUIRE(0 == hints->ai_addrlen);
255		NTP_REQUIRE(NULL == hints->ai_addr);
256		NTP_REQUIRE(NULL == hints->ai_canonname);
257		NTP_REQUIRE(NULL == hints->ai_next);
258	}
259
260	idx = get_dnschild_ctx();
261	child_ctx = dnschild_contexts[idx];
262
263	nodesize = strlen(node) + 1;
264	servsize = strlen(service) + 1;
265	req_size = sizeof(*gai_req) + nodesize + servsize;
266
267	gai_req = emalloc_zero(req_size);
268
269	gai_req->octets = req_size;
270	gai_req->dns_idx = idx;
271	now = time(NULL);
272	gai_req->scheduled = now;
273	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
274	child_ctx->next_dns_timeslot = gai_req->earliest;
275	if (hints != NULL)
276		gai_req->hints = *hints;
277	gai_req->retry = retry;
278	gai_req->callback = callback;
279	gai_req->context = context;
280	gai_req->nodesize = nodesize;
281	gai_req->servsize = servsize;
282
283	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
284	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
285	       servsize);
286
287	if (queue_blocking_request(
288		BLOCKING_GETADDRINFO,
289		gai_req,
290		req_size,
291		&getaddrinfo_sometime_complete,
292		gai_req)) {
293
294		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
295		errno = EFAULT;
296		return -1;
297	}
298
299	return 0;
300}
301
302int
303blocking_getaddrinfo(
304	blocking_child *	c,
305	blocking_pipe_header *	req
306	)
307{
308	blocking_gai_req *	gai_req;
309	dnsworker_ctx *		worker_ctx;
310	blocking_pipe_header *	resp;
311	blocking_gai_resp *	gai_resp;
312	char *			node;
313	char *			service;
314	struct addrinfo *	ai_res;
315	struct addrinfo *	ai;
316	struct addrinfo *	serialized_ai;
317	size_t			canons_octets;
318	size_t			this_octets;
319	size_t			resp_octets;
320	char *			cp;
321	time_t			time_now;
322
323	gai_req = (void *)((char *)req + sizeof(*req));
324	node = (char *)gai_req + sizeof(*gai_req);
325	service = node + gai_req->nodesize;
326
327	worker_ctx = get_worker_context(c, gai_req->dns_idx);
328	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
329			worker_ctx);
330	reload_resolv_conf(worker_ctx);
331
332	/*
333	 * Take a shot at the final size, better to overestimate
334	 * at first and then realloc to a smaller size.
335	 */
336
337	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
338		      16 * (sizeof(struct addrinfo) +
339			    sizeof(sockaddr_u)) +
340		      256;
341	resp = emalloc_zero(resp_octets);
342	gai_resp = (void *)(resp + 1);
343
344	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
345		  node, service, gai_req->hints.ai_family,
346		  gai_req->hints.ai_flags));
347#ifdef DEBUG
348	if (debug >= 2)
349		fflush(stdout);
350#endif
351	ai_res = NULL;
352	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
353					&ai_res);
354	gai_resp->retry = gai_req->retry;
355#ifdef EAI_SYSTEM
356	if (EAI_SYSTEM == gai_resp->retcode)
357		gai_resp->gai_errno = errno;
358#endif
359	canons_octets = 0;
360
361	if (0 == gai_resp->retcode) {
362		ai = ai_res;
363		while (NULL != ai) {
364			gai_resp->ai_count++;
365			if (ai->ai_canonname)
366				canons_octets += strlen(ai->ai_canonname) + 1;
367			ai = ai->ai_next;
368		}
369		/*
370		 * If this query succeeded only after retrying, DNS may have
371		 * just become responsive.  Ignore previously-scheduled
372		 * retry sleeps once for each pending request, similar to
373		 * the way scheduled_sleep() does when its worker_sleep()
374		 * is interrupted.
375		 */
376		if (gai_resp->retry > INITIAL_DNS_RETRY) {
377			time_now = time(NULL);
378			worker_ctx->ignore_scheduled_before = time_now;
379			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
380				  humantime(time_now)));
381		}
382	}
383
384	/*
385	 * Our response consists of a header, followed by ai_count
386	 * addrinfo structs followed by ai_count sockaddr_storage
387	 * structs followed by the canonical names.
388	 */
389	gai_resp->octets = sizeof(*gai_resp)
390			    + gai_resp->ai_count
391				* (sizeof(gai_req->hints)
392				   + sizeof(sockaddr_u))
393			    + canons_octets;
394
395	resp_octets = sizeof(*resp) + gai_resp->octets;
396	resp = erealloc(resp, resp_octets);
397	gai_resp = (void *)(resp + 1);
398
399	/* cp serves as our current pointer while serializing */
400	cp = (void *)(gai_resp + 1);
401	canons_octets = 0;
402
403	if (0 == gai_resp->retcode) {
404		ai = ai_res;
405		while (NULL != ai) {
406			memcpy(cp, ai, sizeof(*ai));
407			serialized_ai = (void *)cp;
408			cp += sizeof(*ai);
409
410			/* transform ai_canonname into offset */
411			if (NULL != serialized_ai->ai_canonname) {
412				serialized_ai->ai_canonname = (char *)canons_octets;
413				canons_octets += strlen(ai->ai_canonname) + 1;
414			}
415
416			/* leave fixup of ai_addr pointer for receiver */
417
418			ai = ai->ai_next;
419		}
420
421		ai = ai_res;
422		while (NULL != ai) {
423			NTP_INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
424			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
425			cp += sizeof(sockaddr_u);
426
427			ai = ai->ai_next;
428		}
429
430		ai = ai_res;
431		while (NULL != ai) {
432			if (NULL != ai->ai_canonname) {
433				this_octets = strlen(ai->ai_canonname) + 1;
434				memcpy(cp, ai->ai_canonname, this_octets);
435				cp += this_octets;
436			}
437
438			ai = ai->ai_next;
439		}
440		freeaddrinfo(ai_res);
441	}
442
443	/*
444	 * make sure our walk and earlier calc match
445	 */
446	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
447
448	if (queue_blocking_response(c, resp, resp_octets, req)) {
449		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
450		return -1;
451	}
452
453	return 0;
454}
455
456
457static void
458getaddrinfo_sometime_complete(
459	blocking_work_req	rtype,
460	void *			context,
461	size_t			respsize,
462	void *			resp
463	)
464{
465	blocking_gai_req *	gai_req;
466	blocking_gai_resp *	gai_resp;
467	dnschild_ctx *		child_ctx;
468	struct addrinfo *	ai;
469	struct addrinfo *	next_ai;
470	sockaddr_u *		psau;
471	char *			node;
472	char *			service;
473	char *			canon_start;
474	time_t			time_now;
475	int			again;
476	int			af;
477	const char *		fam_spec;
478	int			i;
479
480	gai_req = context;
481	gai_resp = resp;
482
483	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
484	DEBUG_REQUIRE(respsize == gai_resp->octets);
485
486	node = (char *)gai_req + sizeof(*gai_req);
487	service = node + gai_req->nodesize;
488
489	child_ctx = dnschild_contexts[gai_req->dns_idx];
490
491	if (0 == gai_resp->retcode) {
492		/*
493		 * If this query succeeded only after retrying, DNS may have
494		 * just become responsive.
495		 */
496		if (gai_resp->retry > INITIAL_DNS_RETRY) {
497			time_now = time(NULL);
498			child_ctx->next_dns_timeslot = time_now;
499			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
500				  gai_req->dns_idx, humantime(time_now)));
501		}
502	} else {
503		again = should_retry_dns(gai_resp->retcode,
504					 gai_resp->gai_errno);
505		/*
506		 * exponential backoff of DNS retries to 64s
507		 */
508		if (gai_req->retry > 0 && again) {
509			/* log the first retry only */
510			if (INITIAL_DNS_RETRY == gai_req->retry)
511				NLOG(NLOG_SYSINFO) {
512					af = gai_req->hints.ai_family;
513					fam_spec = (AF_INET6 == af)
514						       ? " (AAAA)"
515						       : (AF_INET == af)
516							     ? " (A)"
517							     : "";
518#ifdef EAI_SYSTEM
519					if (EAI_SYSTEM == gai_resp->retcode) {
520						errno = gai_resp->gai_errno;
521						msyslog(LOG_INFO,
522							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
523							node, fam_spec,
524							gai_resp->gai_errno);
525					} else
526#endif
527						msyslog(LOG_INFO,
528							"retrying DNS %s%s: %s (%d)",
529							node, fam_spec,
530							gai_strerror(gai_resp->retcode),
531							gai_resp->retcode);
532				}
533			manage_dns_retry_interval(&gai_req->scheduled,
534			    &gai_req->earliest, &gai_req->retry,
535			    &child_ctx->next_dns_timeslot);
536			if (!queue_blocking_request(
537					BLOCKING_GETADDRINFO,
538					gai_req,
539					gai_req->octets,
540					&getaddrinfo_sometime_complete,
541					gai_req))
542				return;
543			else
544				msyslog(LOG_ERR,
545					"unable to retry hostname %s",
546					node);
547		}
548	}
549
550	/*
551	 * fixup pointers in returned addrinfo array
552	 */
553	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
554	next_ai = NULL;
555	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
556		ai[i].ai_next = next_ai;
557		next_ai = &ai[i];
558	}
559
560	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
561	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
562
563	for (i = 0; i < gai_resp->ai_count; i++) {
564		if (NULL != ai[i].ai_addr)
565			ai[i].ai_addr = &psau->sa;
566		psau++;
567		if (NULL != ai[i].ai_canonname)
568			ai[i].ai_canonname += (size_t)canon_start;
569	}
570
571	NTP_ENSURE((char *)psau == canon_start);
572
573	if (!gai_resp->ai_count)
574		ai = NULL;
575
576	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
577			     gai_req->context, node, service,
578			     &gai_req->hints, ai);
579
580	free(gai_req);
581	/* gai_resp is part of block freed by process_blocking_resp() */
582}
583
584
585#ifdef TEST_BLOCKING_WORKER
586void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
587{
588	sockaddr_u addr;
589
590	if (rescode) {
591		TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
592			  context, rescode, name, service));
593		return;
594	}
595	while (!rescode && NULL != ai_res) {
596		ZERO_SOCK(&addr);
597		memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
598		TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
599			  context,
600			  AF(&addr),
601			  stoa(&addr),
602			  (ai_res->ai_canonname)
603			      ? ai_res->ai_canonname
604			      : "",
605			  (SOCK_DGRAM == ai_res->ai_socktype)
606			      ? "DGRAM"
607			      : (SOCK_STREAM == ai_res->ai_socktype)
608				    ? "STREAM"
609				    : "(other)",
610			  ai_res,
611			  ai_res->ai_addr,
612			  ai_res->ai_next));
613
614		getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
615
616		ai_res = ai_res->ai_next;
617	}
618}
619#endif	/* TEST_BLOCKING_WORKER */
620
621
622int
623getnameinfo_sometime(
624	sockaddr_u *		psau,
625	size_t			hostoctets,
626	size_t			servoctets,
627	int			flags,
628	gni_sometime_callback	callback,
629	void *			context
630	)
631{
632	blocking_gni_req *	gni_req;
633	u_int			idx;
634	dnschild_ctx *		child_ctx;
635	time_t			time_now;
636
637	NTP_REQUIRE(hostoctets);
638	NTP_REQUIRE(hostoctets + servoctets < 1024);
639
640	idx = get_dnschild_ctx();
641	child_ctx = dnschild_contexts[idx];
642
643	gni_req = emalloc_zero(sizeof(*gni_req));
644
645	gni_req->octets = sizeof(*gni_req);
646	gni_req->dns_idx = idx;
647	time_now = time(NULL);
648	gni_req->scheduled = time_now;
649	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
650	child_ctx->next_dns_timeslot = gni_req->earliest;
651	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
652	gni_req->hostoctets = hostoctets;
653	gni_req->servoctets = servoctets;
654	gni_req->flags = flags;
655	gni_req->retry = INITIAL_DNS_RETRY;
656	gni_req->callback = callback;
657	gni_req->context = context;
658
659	if (queue_blocking_request(
660		BLOCKING_GETNAMEINFO,
661		gni_req,
662		sizeof(*gni_req),
663		&getnameinfo_sometime_complete,
664		gni_req)) {
665
666		msyslog(LOG_ERR, "unable to queue getnameinfo request");
667		errno = EFAULT;
668		return -1;
669	}
670
671	return 0;
672}
673
674
675int
676blocking_getnameinfo(
677	blocking_child *	c,
678	blocking_pipe_header *	req
679	)
680{
681	blocking_gni_req *	gni_req;
682	dnsworker_ctx *		worker_ctx;
683	blocking_pipe_header *	resp;
684	blocking_gni_resp *	gni_resp;
685	size_t			octets;
686	size_t			resp_octets;
687	char *			host;
688	char *			service;
689	char *			cp;
690	int			rc;
691	time_t			time_now;
692
693	gni_req = (void *)((char *)req + sizeof(*req));
694
695	octets = gni_req->hostoctets + gni_req->servoctets;
696
697	/*
698	 * Some alloca() implementations are fragile regarding
699	 * large allocations.  We only need room for the host
700	 * and service names.
701	 */
702	NTP_REQUIRE(octets < 1024);
703
704#ifndef HAVE_ALLOCA
705	host = emalloc(octets);
706#else
707	host = alloca(octets);
708	if (NULL == host) {
709		msyslog(LOG_ERR,
710			"blocking_getnameinfo unable to allocate %lu octets on stack",
711			(u_long)octets);
712		exit(1);
713	}
714#endif
715	service = host + gni_req->hostoctets;
716
717	worker_ctx = get_worker_context(c, gni_req->dns_idx);
718	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
719			worker_ctx);
720	reload_resolv_conf(worker_ctx);
721
722	/*
723	 * Take a shot at the final size, better to overestimate
724	 * then realloc to a smaller size.
725	 */
726
727	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
728	resp = emalloc_zero(resp_octets);
729	gni_resp = (void *)((char *)resp + sizeof(*resp));
730
731	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
732		  stoa(&gni_req->socku), gni_req->flags,
733		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
734
735	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
736					SOCKLEN(&gni_req->socku),
737					host,
738					gni_req->hostoctets,
739					service,
740					gni_req->servoctets,
741					gni_req->flags);
742	gni_resp->retry = gni_req->retry;
743#ifdef EAI_SYSTEM
744	if (EAI_SYSTEM == gni_resp->retcode)
745		gni_resp->gni_errno = errno;
746#endif
747
748	if (0 != gni_resp->retcode) {
749		gni_resp->hostoctets = 0;
750		gni_resp->servoctets = 0;
751	} else {
752		gni_resp->hostoctets = strlen(host) + 1;
753		gni_resp->servoctets = strlen(service) + 1;
754		/*
755		 * If this query succeeded only after retrying, DNS may have
756		 * just become responsive.  Ignore previously-scheduled
757		 * retry sleeps once for each pending request, similar to
758		 * the way scheduled_sleep() does when its worker_sleep()
759		 * is interrupted.
760		 */
761		if (gni_req->retry > INITIAL_DNS_RETRY) {
762			time_now = time(NULL);
763			worker_ctx->ignore_scheduled_before = time_now;
764			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
765				humantime(time_now)));
766		}
767	}
768	octets = gni_resp->hostoctets + gni_resp->servoctets;
769	/*
770	 * Our response consists of a header, followed by the host and
771	 * service strings, each null-terminated.
772	 */
773	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
774
775	resp = erealloc(resp, resp_octets);
776	gni_resp = (void *)(resp + 1);
777
778	gni_resp->octets = sizeof(*gni_resp) + octets;
779
780	/* cp serves as our current pointer while serializing */
781	cp = (void *)(gni_resp + 1);
782
783	if (0 == gni_resp->retcode) {
784		memcpy(cp, host, gni_resp->hostoctets);
785		cp += gni_resp->hostoctets;
786		memcpy(cp, service, gni_resp->servoctets);
787		cp += gni_resp->servoctets;
788	}
789
790	NTP_INSIST((size_t)(cp - (char *)resp) == resp_octets);
791	NTP_INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
792
793	rc = queue_blocking_response(c, resp, resp_octets, req);
794	if (rc)
795		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
796#ifndef HAVE_ALLOCA
797	free(host);
798#endif
799	return rc;
800}
801
802
803static void
804getnameinfo_sometime_complete(
805	blocking_work_req	rtype,
806	void *			context,
807	size_t			respsize,
808	void *			resp
809	)
810{
811	blocking_gni_req *	gni_req;
812	blocking_gni_resp *	gni_resp;
813	dnschild_ctx *		child_ctx;
814	char *			host;
815	char *			service;
816	time_t			time_now;
817	int			again;
818
819	gni_req = context;
820	gni_resp = resp;
821
822	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
823	DEBUG_REQUIRE(respsize == gni_resp->octets);
824
825	child_ctx = dnschild_contexts[gni_req->dns_idx];
826
827	if (0 == gni_resp->retcode) {
828		/*
829		 * If this query succeeded only after retrying, DNS may have
830		 * just become responsive.
831		 */
832		if (gni_resp->retry > INITIAL_DNS_RETRY) {
833			time_now = time(NULL);
834			child_ctx->next_dns_timeslot = time_now;
835			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
836				  gni_req->dns_idx, humantime(time_now)));
837		}
838	} else {
839		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
840		/*
841		 * exponential backoff of DNS retries to 64s
842		 */
843		if (gni_req->retry > 0)
844			manage_dns_retry_interval(&gni_req->scheduled,
845			    &gni_req->earliest, &gni_req->retry,
846			    &child_ctx->next_dns_timeslot);
847
848		if (gni_req->retry > 0 && again) {
849			if (!queue_blocking_request(
850				BLOCKING_GETNAMEINFO,
851				gni_req,
852				gni_req->octets,
853				&getnameinfo_sometime_complete,
854				gni_req))
855				return;
856
857			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
858		}
859	}
860
861	if (!gni_resp->hostoctets) {
862		host = NULL;
863		service = NULL;
864	} else {
865		host = (char *)gni_resp + sizeof(*gni_resp);
866		service = (gni_resp->servoctets)
867			      ? host + gni_resp->hostoctets
868			      : NULL;
869	}
870
871	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
872			     &gni_req->socku, gni_req->flags, host,
873			     service, gni_req->context);
874
875	free(gni_req);
876	/* gni_resp is part of block freed by process_blocking_resp() */
877}
878
879
880#ifdef TEST_BLOCKING_WORKER
881void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
882{
883	if (!rescode)
884		TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
885			  host, service, stoa(psau), context));
886	else
887		TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
888			  context, rescode, gni_errno, flags, stoa(psau)));
889}
890#endif	/* TEST_BLOCKING_WORKER */
891
892
893#ifdef HAVE_RES_INIT
894static void
895reload_resolv_conf(
896	dnsworker_ctx *	worker_ctx
897	)
898{
899	time_t	time_now;
900
901	/*
902	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
903	 * to pick up on changes from the DHCP client.  [Bug 1226]
904	 * When using threads for the workers, this needs to happen
905	 * only once per minute process-wide.
906	 */
907	time_now = time(NULL);
908# ifdef WORK_THREAD
909	worker_ctx->next_res_init = next_res_init;
910# endif
911	if (worker_ctx->next_res_init <= time_now) {
912		if (worker_ctx->next_res_init != 0)
913			res_init();
914		worker_ctx->next_res_init = time_now + 60;
915# ifdef WORK_THREAD
916		next_res_init = worker_ctx->next_res_init;
917# endif
918	}
919}
920#endif	/* HAVE_RES_INIT */
921
922
923static u_int
924reserve_dnschild_ctx(void)
925{
926	const size_t	ps = sizeof(dnschild_contexts[0]);
927	const size_t	cs = sizeof(*dnschild_contexts[0]);
928	u_int		c;
929	u_int		new_alloc;
930	size_t		octets;
931	size_t		new_octets;
932
933	c = 0;
934	while (TRUE) {
935		for ( ; c < dnschild_contexts_alloc; c++) {
936			if (NULL == dnschild_contexts[c]) {
937				dnschild_contexts[c] = emalloc_zero(cs);
938
939				return c;
940			}
941		}
942		new_alloc = dnschild_contexts_alloc + 20;
943		new_octets = new_alloc * ps;
944		octets = dnschild_contexts_alloc * ps;
945		dnschild_contexts = erealloc_zero(dnschild_contexts,
946						  new_octets, octets);
947		dnschild_contexts_alloc = new_alloc;
948	}
949}
950
951
952static u_int
953get_dnschild_ctx(void)
954{
955	static u_int	shared_ctx = UINT_MAX;
956
957	if (worker_per_query)
958		return reserve_dnschild_ctx();
959
960	if (UINT_MAX == shared_ctx)
961		shared_ctx = reserve_dnschild_ctx();
962
963	return shared_ctx;
964}
965
966
967static void
968alloc_dnsworker_context(
969	u_int idx
970	)
971{
972	const size_t worker_context_sz = sizeof(*dnsworker_contexts[0]);
973
974	REQUIRE(NULL == dnsworker_contexts[idx]);
975	dnsworker_contexts[idx] = emalloc_zero(worker_context_sz);
976}
977
978
979static dnsworker_ctx *
980get_worker_context(
981	blocking_child *	c,
982	u_int			idx
983	)
984{
985	static size_t	ps = sizeof(dnsworker_contexts[0]);
986	u_int	min_new_alloc;
987	u_int	new_alloc;
988	size_t	octets;
989	size_t	new_octets;
990
991	if (dnsworker_contexts_alloc <= idx) {
992		min_new_alloc = 1 + idx;
993		/* round new_alloc up to nearest multiple of 4 */
994		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
995		new_octets = new_alloc * ps;
996		octets = dnsworker_contexts_alloc * ps;
997		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
998						   new_octets, octets);
999		dnsworker_contexts_alloc = new_alloc;
1000	}
1001
1002	if (NULL == dnsworker_contexts[idx])
1003		alloc_dnsworker_context(idx);
1004	ZERO(*dnsworker_contexts[idx]);
1005	dnsworker_contexts[idx]->c = c;
1006
1007	return dnsworker_contexts[idx];
1008}
1009
1010
1011static void
1012scheduled_sleep(
1013	time_t		scheduled,
1014	time_t		earliest,
1015	dnsworker_ctx *	worker_ctx
1016	)
1017{
1018	time_t now;
1019
1020	if (scheduled < worker_ctx->ignore_scheduled_before) {
1021		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
1022			  humantime(earliest), humantime(scheduled),
1023			  humantime(worker_ctx->ignore_scheduled_before)));
1024		return;
1025	}
1026
1027	now = time(NULL);
1028
1029	if (now < earliest) {
1030		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1031			  humantime(earliest), humantime(scheduled),
1032			  humantime(worker_ctx->ignore_scheduled_before)));
1033		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1034			/* our sleep was interrupted */
1035			now = time(NULL);
1036			worker_ctx->ignore_scheduled_before = now;
1037#ifdef HAVE_RES_INIT
1038			worker_ctx->next_res_init = now + 60;
1039			next_res_init = worker_ctx->next_res_init;
1040			res_init();
1041#endif
1042			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1043				  humantime(worker_ctx->ignore_scheduled_before)));
1044		}
1045	}
1046}
1047
1048
1049/*
1050 * manage_dns_retry_interval is a helper used by
1051 * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1052 * to calculate the new retry interval and schedule the next query.
1053 */
1054static void
1055manage_dns_retry_interval(
1056	time_t *	pscheduled,
1057	time_t *	pwhen,
1058	int *		pretry,
1059	time_t *	pnext_timeslot
1060	)
1061{
1062	time_t	now;
1063	time_t	when;
1064	int	retry;
1065
1066	now = time(NULL);
1067	retry = *pretry;
1068	when = max(now + retry, *pnext_timeslot);
1069	*pnext_timeslot = when;
1070	retry = min(64, retry << 1);
1071
1072	*pscheduled = now;
1073	*pwhen = when;
1074	*pretry = retry;
1075}
1076
1077/*
1078 * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1079 * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1080 * policy.
1081 */
1082static int
1083should_retry_dns(
1084	int	rescode,
1085	int	res_errno
1086	)
1087{
1088	static int	eai_again_seen;
1089	int		again;
1090#if defined (EAI_SYSTEM) && defined(DEBUG)
1091	char		msg[256];
1092#endif
1093
1094	/*
1095	 * If the resolver failed, see if the failure is
1096	 * temporary. If so, return success.
1097	 */
1098	again = 0;
1099
1100	switch (rescode) {
1101
1102	case EAI_FAIL:
1103		again = 1;
1104		break;
1105
1106	case EAI_AGAIN:
1107		again = 1;
1108		eai_again_seen = 1;		/* [Bug 1178] */
1109		break;
1110
1111	case EAI_NONAME:
1112#if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1113	case EAI_NODATA:
1114#endif
1115		again = !eai_again_seen;	/* [Bug 1178] */
1116		break;
1117
1118#ifdef EAI_SYSTEM
1119	case EAI_SYSTEM:
1120		/*
1121		 * EAI_SYSTEM means the real error is in errno.  We should be more
1122		 * discriminating about which errno values require retrying, but
1123		 * this matches existing behavior.
1124		 */
1125		again = 1;
1126# ifdef DEBUG
1127		errno_to_str(res_errno, msg, sizeof(msg));
1128		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1129			  res_errno, msg));
1130# endif
1131		break;
1132#endif
1133	}
1134
1135	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1136		  gai_strerror(rescode), rescode, again ? "" : "not "));
1137
1138	return again;
1139}
1140
1141#else	/* !WORKER follows */
1142int ntp_intres_nonempty_compilation_unit;
1143#endif
1144