ntp_intres.c revision 302408
1153838Sdfr/*
2153838Sdfr * ntp_intres.c - Implements a generic blocking worker child or thread,
3153838Sdfr *		  initially to provide a nonblocking solution for DNS
4153838Sdfr *		  name to address lookups available with getaddrinfo().
5153838Sdfr *
6153838Sdfr * This is a new implementation as of 2009 sharing the filename and
7153838Sdfr * very little else with the prior implementation, which used a
8153838Sdfr * temporary file to receive a single set of requests from the parent,
9153838Sdfr * and a NTP mode 7 authenticated request to push back responses.
10153838Sdfr *
11153838Sdfr * A primary goal in rewriting this code was the need to support the
12153838Sdfr * pool configuration directive's requirement to retrieve multiple
13153838Sdfr * addresses resolving a single name, which has previously been
14153838Sdfr * satisfied with blocking resolver calls from the ntpd mainline code.
15153838Sdfr *
16153838Sdfr * A secondary goal is to provide a generic mechanism for other
17153838Sdfr * blocking operations to be delegated to a worker using a common
18153838Sdfr * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
19153838Sdfr * and work_thread.c implement the generic mechanism.  This file
20153838Sdfr * implements the two current consumers, getaddrinfo_sometime() and the
21153838Sdfr * presently unused getnameinfo_sometime().
22153838Sdfr *
23153838Sdfr * Both routines deliver results to a callback and manage memory
24153838Sdfr * allocation, meaning there is no freeaddrinfo_sometime().
25153838Sdfr *
26153838Sdfr * The initial implementation for Unix uses a pair of unidirectional
27153838Sdfr * pipes, one each for requests and responses, connecting the forked
28153838Sdfr * blocking child worker with the ntpd mainline.  The threaded code
29153838Sdfr * uses arrays of pointers to queue requests and responses.
30203027Sgavin *
31206622Suqs * The parent drives the process, including scheduling sleeps between
32153838Sdfr * retries.
33153838Sdfr *
34153838Sdfr * Memory is managed differently for a child process, which mallocs
35153838Sdfr * request buffers to read from the pipe into, whereas the threaded
36153838Sdfr * code mallocs a copy of the request to hand off to the worker via
37153838Sdfr * the queueing array.  The resulting request buffer is free()d by
38153838Sdfr * platform-independent code.  A wrinkle is the request needs to be
39153838Sdfr * available to the requestor during response processing.
40153838Sdfr *
41153838Sdfr * Response memory allocation is also platform-dependent.  With a
42153838Sdfr * separate process and pipes, the response is free()d after being
43153838Sdfr * written to the pipe.  With threads, the same memory is handed
44153838Sdfr * over and the requestor frees it after processing is completed.
45153838Sdfr *
46153838Sdfr * The code should be generalized to support threads on Unix using
47153838Sdfr * much of the same code used for Windows initially.
48153838Sdfr *
49153838Sdfr */
50153838Sdfr#ifdef HAVE_CONFIG_H
51153838Sdfr# include <config.h>
52153838Sdfr#endif
53153838Sdfr
54153838Sdfr#include "ntp_workimpl.h"
55153838Sdfr
56153838Sdfr#ifdef WORKER
57153838Sdfr
58153838Sdfr#include <stdio.h>
59153838Sdfr#include <ctype.h>
60153838Sdfr#include <signal.h>
61153838Sdfr
62153838Sdfr/**/
63153838Sdfr#ifdef HAVE_SYS_TYPES_H
64153838Sdfr# include <sys/types.h>
65153838Sdfr#endif
66153838Sdfr#ifdef HAVE_NETINET_IN_H
67153838Sdfr#include <netinet/in.h>
68153838Sdfr#endif
69153838Sdfr#include <arpa/inet.h>
70153838Sdfr/**/
71153838Sdfr#ifdef HAVE_SYS_PARAM_H
72153838Sdfr# include <sys/param.h>
73153838Sdfr#endif
74153838Sdfr
75153838Sdfr#if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
76153838Sdfr# define HAVE_RES_INIT
77153838Sdfr#endif
78153838Sdfr
79153838Sdfr#if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
80153838Sdfr# ifdef HAVE_ARPA_NAMESER_H
81153838Sdfr#  include <arpa/nameser.h> /* DNS HEADER struct */
82153838Sdfr# endif
83153838Sdfr# ifdef HAVE_NETDB_H
84153838Sdfr#  include <netdb.h>
85153838Sdfr# endif
86153838Sdfr# include <resolv.h>
87153838Sdfr# ifdef HAVE_INT32_ONLY_WITH_DNS
88153838Sdfr#  define HAVE_INT32
89153838Sdfr# endif
90153838Sdfr# ifdef HAVE_U_INT32_ONLY_WITH_DNS
91153838Sdfr#  define HAVE_U_INT32
92153838Sdfr# endif
93153838Sdfr#endif
94153838Sdfr
95153838Sdfr#include "ntp.h"
96153838Sdfr#include "ntp_debug.h"
97153838Sdfr#include "ntp_malloc.h"
98153838Sdfr#include "ntp_syslog.h"
99153838Sdfr#include "ntp_unixtime.h"
100153838Sdfr#include "ntp_intres.h"
101153838Sdfr#include "intreswork.h"
102153838Sdfr
103153838Sdfr
104153838Sdfr/*
105153838Sdfr * Following are implementations of getaddrinfo_sometime() and
106153838Sdfr * getnameinfo_sometime().  Each is implemented in three routines:
107153838Sdfr *
108153838Sdfr * getaddrinfo_sometime()		getnameinfo_sometime()
109153838Sdfr * blocking_getaddrinfo()		blocking_getnameinfo()
110153838Sdfr * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
111153838Sdfr *
112153838Sdfr * The first runs in the parent and marshalls (or serializes) request
113153838Sdfr * parameters into a request blob which is processed in the child by
114153838Sdfr * the second routine, blocking_*(), which serializes the results into
115153838Sdfr * a response blob unpacked by the third routine, *_complete(), which
116153838Sdfr * calls the callback routine provided with the request and frees
117153838Sdfr * _request_ memory allocated by the first routine.  Response memory
118153838Sdfr * is managed by the code which calls the *_complete routines.
119153838Sdfr */
120153838Sdfr
121153838Sdfr/* === typedefs === */
122153838Sdfrtypedef struct blocking_gai_req_tag {	/* marshalled args */
123153838Sdfr	size_t			octets;
124153838Sdfr	u_int			dns_idx;
125153838Sdfr	time_t			scheduled;
126153838Sdfr	time_t			earliest;
127153838Sdfr	struct addrinfo		hints;
128153838Sdfr	int			retry;
129153838Sdfr	gai_sometime_callback	callback;
130153838Sdfr	void *			context;
131153838Sdfr	size_t			nodesize;
132153838Sdfr	size_t			servsize;
133153838Sdfr} blocking_gai_req;
134153838Sdfr
135153838Sdfrtypedef struct blocking_gai_resp_tag {
136153838Sdfr	size_t			octets;
137153838Sdfr	int			retcode;
138153838Sdfr	int			retry;
139153838Sdfr	int			gai_errno; /* for EAI_SYSTEM case */
140153838Sdfr	int			ai_count;
141153838Sdfr	/*
142153838Sdfr	 * Followed by ai_count struct addrinfo and then ai_count
143153838Sdfr	 * sockaddr_u and finally the canonical name strings.
144153838Sdfr	 */
145153838Sdfr} blocking_gai_resp;
146153838Sdfr
147153838Sdfrtypedef struct blocking_gni_req_tag {
148153838Sdfr	size_t			octets;
149153838Sdfr	u_int			dns_idx;
150153838Sdfr	time_t			scheduled;
151153838Sdfr	time_t			earliest;
152153838Sdfr	int			retry;
153153838Sdfr	size_t			hostoctets;
154153838Sdfr	size_t			servoctets;
155153838Sdfr	int			flags;
156205076Suqs	gni_sometime_callback	callback;
157153838Sdfr	void *			context;
158153838Sdfr	sockaddr_u		socku;
159153838Sdfr} blocking_gni_req;
160153838Sdfr
161153838Sdfrtypedef struct blocking_gni_resp_tag {
162153838Sdfr	size_t			octets;
163153838Sdfr	int			retcode;
164153838Sdfr	int			gni_errno; /* for EAI_SYSTEM case */
165153838Sdfr	int			retry;
166153838Sdfr	size_t			hostoctets;
167153838Sdfr	size_t			servoctets;
168153838Sdfr	/*
169153838Sdfr	 * Followed by hostoctets bytes of null-terminated host,
170153838Sdfr	 * then servoctets bytes of null-terminated service.
171153838Sdfr	 */
172153838Sdfr} blocking_gni_resp;
173153838Sdfr
174153838Sdfr/* per-DNS-worker state in parent */
175153838Sdfrtypedef struct dnschild_ctx_tag {
176153838Sdfr	u_int	index;
177153838Sdfr	time_t	next_dns_timeslot;
178153838Sdfr} dnschild_ctx;
179153838Sdfr
180153838Sdfr/* per-DNS-worker state in worker */
181153838Sdfrtypedef struct dnsworker_ctx_tag {
182153838Sdfr	blocking_child *	c;
183153838Sdfr	time_t			ignore_scheduled_before;
184153838Sdfr#ifdef HAVE_RES_INIT
185153838Sdfr	time_t	next_res_init;
186153838Sdfr#endif
187153838Sdfr} dnsworker_ctx;
188153838Sdfr
189153838Sdfr
190153838Sdfr/* === variables === */
191153838Sdfrdnschild_ctx **		dnschild_contexts;		/* parent */
192153838Sdfru_int			dnschild_contexts_alloc;
193153838Sdfrdnsworker_ctx **	dnsworker_contexts;		/* child */
194153838Sdfru_int			dnsworker_contexts_alloc;
195153838Sdfr
196153838Sdfr#ifdef HAVE_RES_INIT
197153838Sdfrstatic	time_t		next_res_init;
198153838Sdfr#endif
199153838Sdfr
200153838Sdfr
201153838Sdfr/* === forward declarations === */
202153838Sdfrstatic	u_int		reserve_dnschild_ctx(void);
203153838Sdfrstatic	u_int		get_dnschild_ctx(void);
204153838Sdfrstatic	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
205153838Sdfrstatic	void		scheduled_sleep(time_t, time_t,
206153838Sdfr					dnsworker_ctx *);
207153838Sdfrstatic	void		manage_dns_retry_interval(time_t *, time_t *,
208153838Sdfr						  int *,
209153838Sdfr						  time_t *);
210153838Sdfrstatic	int		should_retry_dns(int, int);
211153838Sdfr#ifdef HAVE_RES_INIT
212153838Sdfrstatic	void		reload_resolv_conf(dnsworker_ctx *);
213153838Sdfr#else
214153838Sdfr# define		reload_resolv_conf(wc)		\
215153838Sdfr	do {						\
216153838Sdfr		(void)(wc);				\
217153838Sdfr	} while (FALSE)
218153838Sdfr#endif
219153838Sdfrstatic	void		getaddrinfo_sometime_complete(blocking_work_req,
220153838Sdfr						      void *, size_t,
221153838Sdfr						      void *);
222153838Sdfrstatic	void		getnameinfo_sometime_complete(blocking_work_req,
223153838Sdfr						      void *, size_t,
224153838Sdfr						      void *);
225153838Sdfr
226153838Sdfr
227153838Sdfr/* === functions === */
228153838Sdfr/*
229153838Sdfr * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
230153838Sdfr *			  invokes provided callback completion function.
231153838Sdfr */
232153838Sdfrint
233153838Sdfrgetaddrinfo_sometime(
234153838Sdfr	const char *		node,
235153838Sdfr	const char *		service,
236236746Sjoel	const struct addrinfo *	hints,
237153838Sdfr	int			retry,
238153838Sdfr	gai_sometime_callback	callback,
239153838Sdfr	void *			context
240153838Sdfr	)
241153838Sdfr{
242153838Sdfr	blocking_gai_req *	gai_req;
243153838Sdfr	u_int			idx;
244153838Sdfr	dnschild_ctx *		child_ctx;
245153838Sdfr	size_t			req_size;
246153838Sdfr	size_t			nodesize;
247153838Sdfr	size_t			servsize;
248153838Sdfr	time_t			now;
249153838Sdfr
250153838Sdfr	REQUIRE(NULL != node);
251153838Sdfr	if (NULL != hints) {
252153838Sdfr		REQUIRE(0 == hints->ai_addrlen);
253153838Sdfr		REQUIRE(NULL == hints->ai_addr);
254153838Sdfr		REQUIRE(NULL == hints->ai_canonname);
255153838Sdfr		REQUIRE(NULL == hints->ai_next);
256153838Sdfr	}
257153838Sdfr
258153838Sdfr	idx = get_dnschild_ctx();
259153838Sdfr	child_ctx = dnschild_contexts[idx];
260153838Sdfr
261153838Sdfr	nodesize = strlen(node) + 1;
262153838Sdfr	servsize = strlen(service) + 1;
263153838Sdfr	req_size = sizeof(*gai_req) + nodesize + servsize;
264153838Sdfr
265153838Sdfr	gai_req = emalloc_zero(req_size);
266153838Sdfr
267153838Sdfr	gai_req->octets = req_size;
268153838Sdfr	gai_req->dns_idx = idx;
269153838Sdfr	now = time(NULL);
270153838Sdfr	gai_req->scheduled = now;
271153838Sdfr	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
272153838Sdfr	child_ctx->next_dns_timeslot = gai_req->earliest;
273153838Sdfr	if (hints != NULL)
274153838Sdfr		gai_req->hints = *hints;
275153838Sdfr	gai_req->retry = retry;
276153838Sdfr	gai_req->callback = callback;
277153838Sdfr	gai_req->context = context;
278153838Sdfr	gai_req->nodesize = nodesize;
279153838Sdfr	gai_req->servsize = servsize;
280153838Sdfr
281153838Sdfr	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
282153838Sdfr	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
283153838Sdfr	       servsize);
284153838Sdfr
285153838Sdfr	if (queue_blocking_request(
286153838Sdfr		BLOCKING_GETADDRINFO,
287153838Sdfr		gai_req,
288153838Sdfr		req_size,
289153838Sdfr		&getaddrinfo_sometime_complete,
290153838Sdfr		gai_req)) {
291153838Sdfr
292153838Sdfr		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
293153838Sdfr		errno = EFAULT;
294153838Sdfr		return -1;
295153838Sdfr	}
296153838Sdfr
297153838Sdfr	return 0;
298153838Sdfr}
299153838Sdfr
300153838Sdfrint
301153838Sdfrblocking_getaddrinfo(
302153838Sdfr	blocking_child *	c,
303153838Sdfr	blocking_pipe_header *	req
304153838Sdfr	)
305153838Sdfr{
306153838Sdfr	blocking_gai_req *	gai_req;
307153838Sdfr	dnsworker_ctx *		worker_ctx;
308153838Sdfr	blocking_pipe_header *	resp;
309153838Sdfr	blocking_gai_resp *	gai_resp;
310153838Sdfr	char *			node;
311153838Sdfr	char *			service;
312153838Sdfr	struct addrinfo *	ai_res;
313153838Sdfr	struct addrinfo *	ai;
314153838Sdfr	struct addrinfo *	serialized_ai;
315153838Sdfr	size_t			canons_octets;
316153838Sdfr	size_t			this_octets;
317153838Sdfr	size_t			resp_octets;
318153838Sdfr	char *			cp;
319153838Sdfr	time_t			time_now;
320153838Sdfr
321153838Sdfr	gai_req = (void *)((char *)req + sizeof(*req));
322153838Sdfr	node = (char *)gai_req + sizeof(*gai_req);
323153838Sdfr	service = node + gai_req->nodesize;
324153838Sdfr
325153838Sdfr	worker_ctx = get_worker_context(c, gai_req->dns_idx);
326153838Sdfr	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
327153838Sdfr			worker_ctx);
328153838Sdfr	reload_resolv_conf(worker_ctx);
329153838Sdfr
330153838Sdfr	/*
331153838Sdfr	 * Take a shot at the final size, better to overestimate
332153838Sdfr	 * at first and then realloc to a smaller size.
333153838Sdfr	 */
334153838Sdfr
335153838Sdfr	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
336153838Sdfr		      16 * (sizeof(struct addrinfo) +
337153838Sdfr			    sizeof(sockaddr_u)) +
338153838Sdfr		      256;
339153838Sdfr	resp = emalloc_zero(resp_octets);
340153838Sdfr	gai_resp = (void *)(resp + 1);
341153838Sdfr
342153838Sdfr	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
343153838Sdfr		  node, service, gai_req->hints.ai_family,
344153838Sdfr		  gai_req->hints.ai_flags));
345153838Sdfr#ifdef DEBUG
346153838Sdfr	if (debug >= 2)
347153838Sdfr		fflush(stdout);
348153838Sdfr#endif
349153838Sdfr	ai_res = NULL;
350153838Sdfr	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
351153838Sdfr					&ai_res);
352153838Sdfr	gai_resp->retry = gai_req->retry;
353153838Sdfr#ifdef EAI_SYSTEM
354153838Sdfr	if (EAI_SYSTEM == gai_resp->retcode)
355153838Sdfr		gai_resp->gai_errno = errno;
356153838Sdfr#endif
357153838Sdfr	canons_octets = 0;
358153838Sdfr
359153838Sdfr	if (0 == gai_resp->retcode) {
360153838Sdfr		ai = ai_res;
361153838Sdfr		while (NULL != ai) {
362153838Sdfr			gai_resp->ai_count++;
363153838Sdfr			if (ai->ai_canonname)
364153838Sdfr				canons_octets += strlen(ai->ai_canonname) + 1;
365153838Sdfr			ai = ai->ai_next;
366153838Sdfr		}
367153838Sdfr		/*
368153838Sdfr		 * If this query succeeded only after retrying, DNS may have
369153838Sdfr		 * just become responsive.  Ignore previously-scheduled
370153838Sdfr		 * retry sleeps once for each pending request, similar to
371153838Sdfr		 * the way scheduled_sleep() does when its worker_sleep()
372153838Sdfr		 * is interrupted.
373153838Sdfr		 */
374153838Sdfr		if (gai_resp->retry > INITIAL_DNS_RETRY) {
375153838Sdfr			time_now = time(NULL);
376153838Sdfr			worker_ctx->ignore_scheduled_before = time_now;
377153838Sdfr			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
378153838Sdfr				  humantime(time_now)));
379153838Sdfr		}
380153838Sdfr	}
381153838Sdfr
382153838Sdfr	/*
383153838Sdfr	 * Our response consists of a header, followed by ai_count
384153838Sdfr	 * addrinfo structs followed by ai_count sockaddr_storage
385153838Sdfr	 * structs followed by the canonical names.
386153838Sdfr	 */
387153838Sdfr	gai_resp->octets = sizeof(*gai_resp)
388153838Sdfr			    + gai_resp->ai_count
389153838Sdfr				* (sizeof(gai_req->hints)
390153838Sdfr				   + sizeof(sockaddr_u))
391153838Sdfr			    + canons_octets;
392153838Sdfr
393153838Sdfr	resp_octets = sizeof(*resp) + gai_resp->octets;
394153838Sdfr	resp = erealloc(resp, resp_octets);
395153838Sdfr	gai_resp = (void *)(resp + 1);
396153838Sdfr
397153838Sdfr	/* cp serves as our current pointer while serializing */
398153838Sdfr	cp = (void *)(gai_resp + 1);
399153838Sdfr	canons_octets = 0;
400153838Sdfr
401153838Sdfr	if (0 == gai_resp->retcode) {
402153838Sdfr		ai = ai_res;
403153838Sdfr		while (NULL != ai) {
404153838Sdfr			memcpy(cp, ai, sizeof(*ai));
405153838Sdfr			serialized_ai = (void *)cp;
406153838Sdfr			cp += sizeof(*ai);
407153838Sdfr
408153838Sdfr			/* transform ai_canonname into offset */
409153838Sdfr			if (NULL != serialized_ai->ai_canonname) {
410153838Sdfr				serialized_ai->ai_canonname = (char *)canons_octets;
411153838Sdfr				canons_octets += strlen(ai->ai_canonname) + 1;
412153838Sdfr			}
413153838Sdfr
414153838Sdfr			/* leave fixup of ai_addr pointer for receiver */
415153838Sdfr
416153838Sdfr			ai = ai->ai_next;
417153838Sdfr		}
418153838Sdfr
419153838Sdfr		ai = ai_res;
420153838Sdfr		while (NULL != ai) {
421153838Sdfr			INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
422153838Sdfr			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
423153838Sdfr			cp += sizeof(sockaddr_u);
424153838Sdfr
425153838Sdfr			ai = ai->ai_next;
426153838Sdfr		}
427153838Sdfr
428153838Sdfr		ai = ai_res;
429153838Sdfr		while (NULL != ai) {
430153838Sdfr			if (NULL != ai->ai_canonname) {
431153838Sdfr				this_octets = strlen(ai->ai_canonname) + 1;
432153838Sdfr				memcpy(cp, ai->ai_canonname, this_octets);
433153838Sdfr				cp += this_octets;
434153838Sdfr			}
435153838Sdfr
436153838Sdfr			ai = ai->ai_next;
437153838Sdfr		}
438153838Sdfr		freeaddrinfo(ai_res);
439153838Sdfr	}
440153838Sdfr
441153838Sdfr	/*
442153838Sdfr	 * make sure our walk and earlier calc match
443153838Sdfr	 */
444153838Sdfr	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
445153838Sdfr
446153838Sdfr	if (queue_blocking_response(c, resp, resp_octets, req)) {
447153838Sdfr		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
448153838Sdfr		return -1;
449153838Sdfr	}
450153838Sdfr
451153838Sdfr	return 0;
452153838Sdfr}
453153838Sdfr
454153838Sdfr
455153838Sdfrstatic void
456153838Sdfrgetaddrinfo_sometime_complete(
457153838Sdfr	blocking_work_req	rtype,
458153838Sdfr	void *			context,
459153838Sdfr	size_t			respsize,
460153838Sdfr	void *			resp
461153838Sdfr	)
462153838Sdfr{
463153838Sdfr	blocking_gai_req *	gai_req;
464153838Sdfr	blocking_gai_resp *	gai_resp;
465153838Sdfr	dnschild_ctx *		child_ctx;
466153838Sdfr	struct addrinfo *	ai;
467153838Sdfr	struct addrinfo *	next_ai;
468153838Sdfr	sockaddr_u *		psau;
469153838Sdfr	char *			node;
470153838Sdfr	char *			service;
471153838Sdfr	char *			canon_start;
472153838Sdfr	time_t			time_now;
473153838Sdfr	int			again;
474153838Sdfr	int			af;
475153838Sdfr	const char *		fam_spec;
476153838Sdfr	int			i;
477153838Sdfr
478236746Sjoel	gai_req = context;
479153838Sdfr	gai_resp = resp;
480153838Sdfr
481153838Sdfr	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
482153838Sdfr	DEBUG_REQUIRE(respsize == gai_resp->octets);
483153838Sdfr
484153838Sdfr	node = (char *)gai_req + sizeof(*gai_req);
485153838Sdfr	service = node + gai_req->nodesize;
486153838Sdfr
487153838Sdfr	child_ctx = dnschild_contexts[gai_req->dns_idx];
488153838Sdfr
489153838Sdfr	if (0 == gai_resp->retcode) {
490153838Sdfr		/*
491153838Sdfr		 * If this query succeeded only after retrying, DNS may have
492153838Sdfr		 * just become responsive.
493153838Sdfr		 */
494153838Sdfr		if (gai_resp->retry > INITIAL_DNS_RETRY) {
495153838Sdfr			time_now = time(NULL);
496153838Sdfr			child_ctx->next_dns_timeslot = time_now;
497153838Sdfr			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
498153838Sdfr				  gai_req->dns_idx, humantime(time_now)));
499153838Sdfr		}
500153838Sdfr	} else {
501153838Sdfr		again = should_retry_dns(gai_resp->retcode,
502153838Sdfr					 gai_resp->gai_errno);
503153838Sdfr		/*
504153838Sdfr		 * exponential backoff of DNS retries to 64s
505153838Sdfr		 */
506153838Sdfr		if (gai_req->retry > 0 && again) {
507153838Sdfr			/* log the first retry only */
508153838Sdfr			if (INITIAL_DNS_RETRY == gai_req->retry)
509153838Sdfr				NLOG(NLOG_SYSINFO) {
510153838Sdfr					af = gai_req->hints.ai_family;
511153838Sdfr					fam_spec = (AF_INET6 == af)
512153838Sdfr						       ? " (AAAA)"
513153838Sdfr						       : (AF_INET == af)
514153838Sdfr							     ? " (A)"
515153838Sdfr							     : "";
516153838Sdfr#ifdef EAI_SYSTEM
517153838Sdfr					if (EAI_SYSTEM == gai_resp->retcode) {
518153838Sdfr						errno = gai_resp->gai_errno;
519153838Sdfr						msyslog(LOG_INFO,
520153838Sdfr							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
521153838Sdfr							node, fam_spec,
522153838Sdfr							gai_resp->gai_errno);
523153838Sdfr					} else
524153838Sdfr#endif
525153838Sdfr						msyslog(LOG_INFO,
526153838Sdfr							"retrying DNS %s%s: %s (%d)",
527153838Sdfr							node, fam_spec,
528153838Sdfr							gai_strerror(gai_resp->retcode),
529153838Sdfr							gai_resp->retcode);
530153838Sdfr				}
531153838Sdfr			manage_dns_retry_interval(&gai_req->scheduled,
532236746Sjoel			    &gai_req->earliest, &gai_req->retry,
533153838Sdfr			    &child_ctx->next_dns_timeslot);
534153838Sdfr			if (!queue_blocking_request(
535153838Sdfr					BLOCKING_GETADDRINFO,
536153838Sdfr					gai_req,
537153838Sdfr					gai_req->octets,
538153838Sdfr					&getaddrinfo_sometime_complete,
539153838Sdfr					gai_req))
540153838Sdfr				return;
541203027Sgavin			else
542153838Sdfr				msyslog(LOG_ERR,
543153838Sdfr					"unable to retry hostname %s",
544153838Sdfr					node);
545154811Sdfr		}
546154811Sdfr	}
547154811Sdfr
548154811Sdfr	/*
549154811Sdfr	 * fixup pointers in returned addrinfo array
550154811Sdfr	 */
551154811Sdfr	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
552154811Sdfr	next_ai = NULL;
553154811Sdfr	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
554154811Sdfr		ai[i].ai_next = next_ai;
555154811Sdfr		next_ai = &ai[i];
556154811Sdfr	}
557154811Sdfr
558154811Sdfr	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
559154811Sdfr	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
560154811Sdfr
561154811Sdfr	for (i = 0; i < gai_resp->ai_count; i++) {
562154811Sdfr		if (NULL != ai[i].ai_addr)
563154811Sdfr			ai[i].ai_addr = &psau->sa;
564154811Sdfr		psau++;
565154811Sdfr		if (NULL != ai[i].ai_canonname)
566154811Sdfr			ai[i].ai_canonname += (size_t)canon_start;
567154811Sdfr	}
568154811Sdfr
569154811Sdfr	ENSURE((char *)psau == canon_start);
570154811Sdfr
571	if (!gai_resp->ai_count)
572		ai = NULL;
573
574	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
575			     gai_req->context, node, service,
576			     &gai_req->hints, ai);
577
578	free(gai_req);
579	/* gai_resp is part of block freed by process_blocking_resp() */
580}
581
582
583#ifdef TEST_BLOCKING_WORKER
584void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
585{
586	sockaddr_u addr;
587
588	if (rescode) {
589		TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
590			  context, rescode, name, service));
591		return;
592	}
593	while (!rescode && NULL != ai_res) {
594		ZERO_SOCK(&addr);
595		memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
596		TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
597			  context,
598			  AF(&addr),
599			  stoa(&addr),
600			  (ai_res->ai_canonname)
601			      ? ai_res->ai_canonname
602			      : "",
603			  (SOCK_DGRAM == ai_res->ai_socktype)
604			      ? "DGRAM"
605			      : (SOCK_STREAM == ai_res->ai_socktype)
606				    ? "STREAM"
607				    : "(other)",
608			  ai_res,
609			  ai_res->ai_addr,
610			  ai_res->ai_next));
611
612		getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
613
614		ai_res = ai_res->ai_next;
615	}
616}
617#endif	/* TEST_BLOCKING_WORKER */
618
619
620int
621getnameinfo_sometime(
622	sockaddr_u *		psau,
623	size_t			hostoctets,
624	size_t			servoctets,
625	int			flags,
626	gni_sometime_callback	callback,
627	void *			context
628	)
629{
630	blocking_gni_req *	gni_req;
631	u_int			idx;
632	dnschild_ctx *		child_ctx;
633	time_t			time_now;
634
635	REQUIRE(hostoctets);
636	REQUIRE(hostoctets + servoctets < 1024);
637
638	idx = get_dnschild_ctx();
639	child_ctx = dnschild_contexts[idx];
640
641	gni_req = emalloc_zero(sizeof(*gni_req));
642
643	gni_req->octets = sizeof(*gni_req);
644	gni_req->dns_idx = idx;
645	time_now = time(NULL);
646	gni_req->scheduled = time_now;
647	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
648	child_ctx->next_dns_timeslot = gni_req->earliest;
649	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
650	gni_req->hostoctets = hostoctets;
651	gni_req->servoctets = servoctets;
652	gni_req->flags = flags;
653	gni_req->retry = INITIAL_DNS_RETRY;
654	gni_req->callback = callback;
655	gni_req->context = context;
656
657	if (queue_blocking_request(
658		BLOCKING_GETNAMEINFO,
659		gni_req,
660		sizeof(*gni_req),
661		&getnameinfo_sometime_complete,
662		gni_req)) {
663
664		msyslog(LOG_ERR, "unable to queue getnameinfo request");
665		errno = EFAULT;
666		return -1;
667	}
668
669	return 0;
670}
671
672
673int
674blocking_getnameinfo(
675	blocking_child *	c,
676	blocking_pipe_header *	req
677	)
678{
679	blocking_gni_req *	gni_req;
680	dnsworker_ctx *		worker_ctx;
681	blocking_pipe_header *	resp;
682	blocking_gni_resp *	gni_resp;
683	size_t			octets;
684	size_t			resp_octets;
685	char *			service;
686	char *			cp;
687	int			rc;
688	time_t			time_now;
689	char			host[1024];
690
691	gni_req = (void *)((char *)req + sizeof(*req));
692
693	octets = gni_req->hostoctets + gni_req->servoctets;
694
695	/*
696	 * Some alloca() implementations are fragile regarding
697	 * large allocations.  We only need room for the host
698	 * and service names.
699	 */
700	REQUIRE(octets < sizeof(host));
701	service = host + gni_req->hostoctets;
702
703	worker_ctx = get_worker_context(c, gni_req->dns_idx);
704	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
705			worker_ctx);
706	reload_resolv_conf(worker_ctx);
707
708	/*
709	 * Take a shot at the final size, better to overestimate
710	 * then realloc to a smaller size.
711	 */
712
713	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
714	resp = emalloc_zero(resp_octets);
715	gni_resp = (void *)((char *)resp + sizeof(*resp));
716
717	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
718		  stoa(&gni_req->socku), gni_req->flags,
719		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
720
721	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
722					SOCKLEN(&gni_req->socku),
723					host,
724					gni_req->hostoctets,
725					service,
726					gni_req->servoctets,
727					gni_req->flags);
728	gni_resp->retry = gni_req->retry;
729#ifdef EAI_SYSTEM
730	if (EAI_SYSTEM == gni_resp->retcode)
731		gni_resp->gni_errno = errno;
732#endif
733
734	if (0 != gni_resp->retcode) {
735		gni_resp->hostoctets = 0;
736		gni_resp->servoctets = 0;
737	} else {
738		gni_resp->hostoctets = strlen(host) + 1;
739		gni_resp->servoctets = strlen(service) + 1;
740		/*
741		 * If this query succeeded only after retrying, DNS may have
742		 * just become responsive.  Ignore previously-scheduled
743		 * retry sleeps once for each pending request, similar to
744		 * the way scheduled_sleep() does when its worker_sleep()
745		 * is interrupted.
746		 */
747		if (gni_req->retry > INITIAL_DNS_RETRY) {
748			time_now = time(NULL);
749			worker_ctx->ignore_scheduled_before = time_now;
750			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
751				humantime(time_now)));
752		}
753	}
754	octets = gni_resp->hostoctets + gni_resp->servoctets;
755	/*
756	 * Our response consists of a header, followed by the host and
757	 * service strings, each null-terminated.
758	 */
759	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
760
761	resp = erealloc(resp, resp_octets);
762	gni_resp = (void *)(resp + 1);
763
764	gni_resp->octets = sizeof(*gni_resp) + octets;
765
766	/* cp serves as our current pointer while serializing */
767	cp = (void *)(gni_resp + 1);
768
769	if (0 == gni_resp->retcode) {
770		memcpy(cp, host, gni_resp->hostoctets);
771		cp += gni_resp->hostoctets;
772		memcpy(cp, service, gni_resp->servoctets);
773		cp += gni_resp->servoctets;
774	}
775
776	INSIST((size_t)(cp - (char *)resp) == resp_octets);
777	INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
778
779	rc = queue_blocking_response(c, resp, resp_octets, req);
780	if (rc)
781		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
782	return rc;
783}
784
785
786static void
787getnameinfo_sometime_complete(
788	blocking_work_req	rtype,
789	void *			context,
790	size_t			respsize,
791	void *			resp
792	)
793{
794	blocking_gni_req *	gni_req;
795	blocking_gni_resp *	gni_resp;
796	dnschild_ctx *		child_ctx;
797	char *			host;
798	char *			service;
799	time_t			time_now;
800	int			again;
801
802	gni_req = context;
803	gni_resp = resp;
804
805	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
806	DEBUG_REQUIRE(respsize == gni_resp->octets);
807
808	child_ctx = dnschild_contexts[gni_req->dns_idx];
809
810	if (0 == gni_resp->retcode) {
811		/*
812		 * If this query succeeded only after retrying, DNS may have
813		 * just become responsive.
814		 */
815		if (gni_resp->retry > INITIAL_DNS_RETRY) {
816			time_now = time(NULL);
817			child_ctx->next_dns_timeslot = time_now;
818			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
819				  gni_req->dns_idx, humantime(time_now)));
820		}
821	} else {
822		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
823		/*
824		 * exponential backoff of DNS retries to 64s
825		 */
826		if (gni_req->retry > 0)
827			manage_dns_retry_interval(&gni_req->scheduled,
828			    &gni_req->earliest, &gni_req->retry,
829			    &child_ctx->next_dns_timeslot);
830
831		if (gni_req->retry > 0 && again) {
832			if (!queue_blocking_request(
833				BLOCKING_GETNAMEINFO,
834				gni_req,
835				gni_req->octets,
836				&getnameinfo_sometime_complete,
837				gni_req))
838				return;
839
840			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
841		}
842	}
843
844	if (!gni_resp->hostoctets) {
845		host = NULL;
846		service = NULL;
847	} else {
848		host = (char *)gni_resp + sizeof(*gni_resp);
849		service = (gni_resp->servoctets)
850			      ? host + gni_resp->hostoctets
851			      : NULL;
852	}
853
854	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
855			     &gni_req->socku, gni_req->flags, host,
856			     service, gni_req->context);
857
858	free(gni_req);
859	/* gni_resp is part of block freed by process_blocking_resp() */
860}
861
862
863#ifdef TEST_BLOCKING_WORKER
864void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
865{
866	if (!rescode)
867		TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
868			  host, service, stoa(psau), context));
869	else
870		TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
871			  context, rescode, gni_errno, flags, stoa(psau)));
872}
873#endif	/* TEST_BLOCKING_WORKER */
874
875
876#ifdef HAVE_RES_INIT
877static void
878reload_resolv_conf(
879	dnsworker_ctx *	worker_ctx
880	)
881{
882	time_t	time_now;
883
884	/*
885	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
886	 * to pick up on changes from the DHCP client.  [Bug 1226]
887	 * When using threads for the workers, this needs to happen
888	 * only once per minute process-wide.
889	 */
890	time_now = time(NULL);
891# ifdef WORK_THREAD
892	worker_ctx->next_res_init = next_res_init;
893# endif
894	if (worker_ctx->next_res_init <= time_now) {
895		if (worker_ctx->next_res_init != 0)
896			res_init();
897		worker_ctx->next_res_init = time_now + 60;
898# ifdef WORK_THREAD
899		next_res_init = worker_ctx->next_res_init;
900# endif
901	}
902}
903#endif	/* HAVE_RES_INIT */
904
905
906static u_int
907reserve_dnschild_ctx(void)
908{
909	const size_t	ps = sizeof(dnschild_contexts[0]);
910	const size_t	cs = sizeof(*dnschild_contexts[0]);
911	u_int		c;
912	u_int		new_alloc;
913	size_t		octets;
914	size_t		new_octets;
915
916	c = 0;
917	while (TRUE) {
918		for ( ; c < dnschild_contexts_alloc; c++) {
919			if (NULL == dnschild_contexts[c]) {
920				dnschild_contexts[c] = emalloc_zero(cs);
921
922				return c;
923			}
924		}
925		new_alloc = dnschild_contexts_alloc + 20;
926		new_octets = new_alloc * ps;
927		octets = dnschild_contexts_alloc * ps;
928		dnschild_contexts = erealloc_zero(dnschild_contexts,
929						  new_octets, octets);
930		dnschild_contexts_alloc = new_alloc;
931	}
932}
933
934
935static u_int
936get_dnschild_ctx(void)
937{
938	static u_int	shared_ctx = UINT_MAX;
939
940	if (worker_per_query)
941		return reserve_dnschild_ctx();
942
943	if (UINT_MAX == shared_ctx)
944		shared_ctx = reserve_dnschild_ctx();
945
946	return shared_ctx;
947}
948
949
950static dnsworker_ctx *
951get_worker_context(
952	blocking_child *	c,
953	u_int			idx
954	)
955{
956	u_int		min_new_alloc;
957	u_int		new_alloc;
958	size_t		octets;
959	size_t		new_octets;
960	dnsworker_ctx *	retv;
961
962	worker_global_lock(TRUE);
963
964	if (dnsworker_contexts_alloc <= idx) {
965		min_new_alloc = 1 + idx;
966		/* round new_alloc up to nearest multiple of 4 */
967		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
968		new_octets = new_alloc * sizeof(dnsworker_ctx*);
969		octets = dnsworker_contexts_alloc * sizeof(dnsworker_ctx*);
970		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
971						   new_octets, octets);
972		dnsworker_contexts_alloc = new_alloc;
973		retv = emalloc_zero(sizeof(dnsworker_ctx));
974		dnsworker_contexts[idx] = retv;
975	} else if (NULL == (retv = dnsworker_contexts[idx])) {
976		retv = emalloc_zero(sizeof(dnsworker_ctx));
977		dnsworker_contexts[idx] = retv;
978	}
979
980	worker_global_lock(FALSE);
981
982	ZERO(*retv);
983	retv->c = c;
984	return retv;
985}
986
987
988static void
989scheduled_sleep(
990	time_t		scheduled,
991	time_t		earliest,
992	dnsworker_ctx *	worker_ctx
993	)
994{
995	time_t now;
996
997	if (scheduled < worker_ctx->ignore_scheduled_before) {
998		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
999			  humantime(earliest), humantime(scheduled),
1000			  humantime(worker_ctx->ignore_scheduled_before)));
1001		return;
1002	}
1003
1004	now = time(NULL);
1005
1006	if (now < earliest) {
1007		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1008			  humantime(earliest), humantime(scheduled),
1009			  humantime(worker_ctx->ignore_scheduled_before)));
1010		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1011			/* our sleep was interrupted */
1012			now = time(NULL);
1013			worker_ctx->ignore_scheduled_before = now;
1014#ifdef HAVE_RES_INIT
1015			worker_ctx->next_res_init = now + 60;
1016			next_res_init = worker_ctx->next_res_init;
1017			res_init();
1018#endif
1019			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1020				  humantime(worker_ctx->ignore_scheduled_before)));
1021		}
1022	}
1023}
1024
1025
1026/*
1027 * manage_dns_retry_interval is a helper used by
1028 * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1029 * to calculate the new retry interval and schedule the next query.
1030 */
1031static void
1032manage_dns_retry_interval(
1033	time_t *	pscheduled,
1034	time_t *	pwhen,
1035	int *		pretry,
1036	time_t *	pnext_timeslot
1037	)
1038{
1039	time_t	now;
1040	time_t	when;
1041	int	retry;
1042
1043	now = time(NULL);
1044	retry = *pretry;
1045	when = max(now + retry, *pnext_timeslot);
1046	*pnext_timeslot = when;
1047	retry = min(64, retry << 1);
1048
1049	*pscheduled = now;
1050	*pwhen = when;
1051	*pretry = retry;
1052}
1053
1054/*
1055 * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1056 * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1057 * policy.
1058 */
1059static int
1060should_retry_dns(
1061	int	rescode,
1062	int	res_errno
1063	)
1064{
1065	static int	eai_again_seen;
1066	int		again;
1067#if defined (EAI_SYSTEM) && defined(DEBUG)
1068	char		msg[256];
1069#endif
1070
1071	/*
1072	 * If the resolver failed, see if the failure is
1073	 * temporary. If so, return success.
1074	 */
1075	again = 0;
1076
1077	switch (rescode) {
1078
1079	case EAI_FAIL:
1080		again = 1;
1081		break;
1082
1083	case EAI_AGAIN:
1084		again = 1;
1085		eai_again_seen = 1;		/* [Bug 1178] */
1086		break;
1087
1088	case EAI_NONAME:
1089#if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1090	case EAI_NODATA:
1091#endif
1092		again = !eai_again_seen;	/* [Bug 1178] */
1093		break;
1094
1095#ifdef EAI_SYSTEM
1096	case EAI_SYSTEM:
1097		/*
1098		 * EAI_SYSTEM means the real error is in errno.  We should be more
1099		 * discriminating about which errno values require retrying, but
1100		 * this matches existing behavior.
1101		 */
1102		again = 1;
1103# ifdef DEBUG
1104		errno_to_str(res_errno, msg, sizeof(msg));
1105		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1106			  res_errno, msg));
1107# endif
1108		break;
1109#endif
1110	}
1111
1112	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1113		  gai_strerror(rescode), rescode, again ? "" : "not "));
1114
1115	return again;
1116}
1117
1118#else	/* !WORKER follows */
1119int ntp_intres_nonempty_compilation_unit;
1120#endif
1121