work_fork.c revision 316068
1119418Sobrien/*
215813Sse * work_fork.c - fork implementation for blocking worker child.
315813Sse */
415813Sse#include <config.h>
515813Sse#include "ntp_workimpl.h"
615813Sse
715813Sse#ifdef WORK_FORK
815813Sse#include <stdio.h>
915813Sse#include <ctype.h>
1015813Sse#include <signal.h>
1115813Sse#include <sys/wait.h>
1215813Sse
1315813Sse#include "iosignal.h"
1415813Sse#include "ntp_stdlib.h"
1515813Sse#include "ntp_malloc.h"
1615813Sse#include "ntp_syslog.h"
1715813Sse#include "ntpd.h"
1815813Sse#include "ntp_io.h"
1915813Sse#include "ntp_assert.h"
20119418Sobrien#include "ntp_unixtime.h"
21119418Sobrien#include "ntp_worker.h"
22119418Sobrien
2315813Sse/* === variables === */
2473374Simp	int			worker_process;
2550852Speter	addremove_io_fd_func	addremove_io_fd;
2615813Ssestatic	volatile int		worker_sighup_received;
2752247Smdoddint	saved_argc = 0;
2850852Speterchar	**saved_argv;
2950852Speter
3052247Smdodd/* === function prototypes === */
3150852Speterstatic	void		fork_blocking_child(blocking_child *);
3252247Smdoddstatic	RETSIGTYPE	worker_sighup(int);
3352247Smdoddstatic	void		send_worker_home_atexit(void);
3450852Speterstatic	void		cleanup_after_child(blocking_child *);
3550852Speter
3650852Speter/* === I/O helpers === */
37149558Simp/* Since we have signals enabled, there's a good chance that blocking IO
3850852Speter * via pipe suffers from EINTR -- and this goes for both directions.
3915813Sse * The next two wrappers will loop until either all the data is written
40119277Simp * or read, plus handling the EOF condition on read. They may return
41119277Simp * zero if no data was transferred at all, and effectively every return
4252247Smdodd * value that differs from the given transfer length signifies an error
4351442Speter * condition.
44150957Simp */
4550852Speter
4624995Sdavidnstatic size_t
4724995Sdavidnnetread(
48140468Simp	int		fd,
4950852Speter	void *		vb,
5024995Sdavidn	size_t		l
5124995Sdavidn	)
52175007Simp{
53175007Simp	char *		b = vb;
54150102Simp	ssize_t		r;
55175007Simp
56175007Simp	while (l) {
57150102Simp		r = read(fd, b, l);
58150102Simp		if (r > 0) {
59175007Simp			l -= r;
60175007Simp			b += r;
61175007Simp		} else if (r == 0 || errno != EINTR) {
62175007Simp			l = 0;
63175007Simp		}
64175007Simp	}
65175007Simp	return (size_t)(b - (char *)vb);
66150102Simp}
6724995Sdavidn
6815813Sse
69130657Simpstatic size_t
70130657Simpnetwrite(
7116289Salex	int		fd,
7250852Speter	const void *	vb,
73130659Simp	size_t		l
7415813Sse	)
75140468Simp{
7624995Sdavidn	const char *	b = vb;
7724995Sdavidn	ssize_t		w;
7824995Sdavidn
7924995Sdavidn	while (l) {
80143161Simp		w = write(fd, b, l);
81143161Simp		if (w > 0) {
82143161Simp			l -= w;
83143161Simp			b += w;
8415813Sse		} else if (errno != EINTR) {
8515813Sse			l = 0;
8650852Speter		}
8750852Speter	}
8815813Sse	return (size_t)(b - (const char *)vb);
89147184Simp}
90147184Simp
91151300Simp
9252247Smdodd/* === functions === */
93151300Simp/*
94190794Simp * exit_worker()
95190794Simp *
96190794Simp * On some systems _exit() is preferred to exit() for forked children.
97151300Simp * For example, http://netbsd.gw.com/cgi-bin/man-cgi?fork++NetBSD-5.0
98150957Simp * recommends _exit() to avoid double-flushing C runtime stream buffers
99150957Simp * and also to avoid calling the parent's atexit() routines in the
100151300Simp * child.  On those systems WORKER_CHILD_EXIT is _exit.  Since _exit
101150957Simp * bypasses CRT cleanup, fflush() files we know might have output
102147184Simp * buffered.
103147184Simp */
104147184Simpvoid
105141577Simpexit_worker(
106147149Simp	int	exitcode
10752247Smdodd	)
108147184Simp{
109147184Simp	if (syslog_file != NULL)
110147184Simp		fflush(syslog_file);
111147184Simp	fflush(stdout);
112147184Simp	fflush(stderr);
113191234Simp	WORKER_CHILD_EXIT (exitcode);	/* space before ( required */
114191234Simp}
115191234Simp
116147184Simp
117147184Simpstatic RETSIGTYPE
118147184Simpworker_sighup(
119147184Simp	int sig
120191234Simp	)
121191234Simp{
122147184Simp	if (SIGHUP == sig)
123147184Simp		worker_sighup_received = 1;
12452247Smdodd}
12550852Speter
12615813Sse
12750852Speterint
12850852Speterworker_sleep(
12950852Speter	blocking_child *	c,
13050852Speter	time_t			seconds
131150136Sru	)
13215813Sse{
13350852Speter	u_int sleep_remain;
13450852Speter
13515813Sse	sleep_remain = (u_int)seconds;
13650852Speter	do {
13750852Speter		if (!worker_sighup_received)
13850852Speter			sleep_remain = sleep(sleep_remain);
13950852Speter		if (worker_sighup_received) {
14050852Speter			TRACE(1, ("worker SIGHUP with %us left to sleep",
14115813Sse				  sleep_remain));
142113506Smdodd			worker_sighup_received = 0;
143113506Smdodd			return -1;
144113506Smdodd		}
145	} while (sleep_remain);
146
147	return 0;
148}
149
150
151void
152interrupt_worker_sleep(void)
153{
154	u_int			idx;
155	blocking_child *	c;
156	int			rc;
157
158	for (idx = 0; idx < blocking_children_alloc; idx++) {
159		c = blocking_children[idx];
160
161		if (NULL == c || c->reusable == TRUE)
162			continue;
163
164		rc = kill(c->pid, SIGHUP);
165		if (rc < 0)
166			msyslog(LOG_ERR,
167				"Unable to signal HUP to wake child pid %d: %m",
168				c->pid);
169	}
170}
171
172
173/*
174 * harvest_child_status() runs in the parent.
175 *
176 * Note the error handling -- this is an interaction with SIGCHLD.
177 * SIG_IGN on SIGCHLD on some OSes means do not wait but reap
178 * automatically. Since we're not really interested in the result code,
179 * we simply ignore the error.
180 */
181static void
182harvest_child_status(
183	blocking_child *	c
184	)
185{
186	if (c->pid) {
187		/* Wait on the child so it can finish terminating */
188		if (waitpid(c->pid, NULL, 0) == c->pid)
189			TRACE(4, ("harvested child %d\n", c->pid));
190		else if (errno != ECHILD)
191			msyslog(LOG_ERR, "error waiting on child %d: %m", c->pid);
192		c->pid = 0;
193	}
194}
195
196/*
197 * req_child_exit() runs in the parent.
198 */
199int
200req_child_exit(
201	blocking_child *	c
202	)
203{
204	if (-1 != c->req_write_pipe) {
205		close(c->req_write_pipe);
206		c->req_write_pipe = -1;
207		return 0;
208	}
209	/* Closing the pipe forces the child to exit */
210	harvest_child_status(c);
211	return -1;
212}
213
214
215/*
216 * cleanup_after_child() runs in parent.
217 */
218static void
219cleanup_after_child(
220	blocking_child *	c
221	)
222{
223	harvest_child_status(c);
224	if (-1 != c->resp_read_pipe) {
225		(*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
226		close(c->resp_read_pipe);
227		c->resp_read_pipe = -1;
228	}
229	c->resp_read_ctx = NULL;
230	DEBUG_INSIST(-1 == c->req_read_pipe);
231	DEBUG_INSIST(-1 == c->resp_write_pipe);
232	c->reusable = TRUE;
233}
234
235
236static void
237send_worker_home_atexit(void)
238{
239	u_int			idx;
240	blocking_child *	c;
241
242	if (worker_process)
243		return;
244
245	for (idx = 0; idx < blocking_children_alloc; idx++) {
246		c = blocking_children[idx];
247		if (NULL == c)
248			continue;
249		req_child_exit(c);
250	}
251}
252
253
254int
255send_blocking_req_internal(
256	blocking_child *	c,
257	blocking_pipe_header *	hdr,
258	void *			data
259	)
260{
261	size_t	octets;
262	size_t	rc;
263
264	DEBUG_REQUIRE(hdr != NULL);
265	DEBUG_REQUIRE(data != NULL);
266	DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
267
268	if (-1 == c->req_write_pipe) {
269		fork_blocking_child(c);
270		DEBUG_INSIST(-1 != c->req_write_pipe);
271	}
272
273	octets = sizeof(*hdr);
274	rc = netwrite(c->req_write_pipe, hdr, octets);
275
276	if (rc == octets) {
277		octets = hdr->octets - sizeof(*hdr);
278		rc = netwrite(c->req_write_pipe, data, octets);
279		if (rc == octets)
280			return 0;
281	}
282
283	msyslog(LOG_ERR,
284		"send_blocking_req_internal: short write (%zu of %zu), %m",
285		rc, octets);
286
287	/* Fatal error.  Clean up the child process.  */
288	req_child_exit(c);
289	exit(1);	/* otherwise would be return -1 */
290}
291
292
293blocking_pipe_header *
294receive_blocking_req_internal(
295	blocking_child *	c
296	)
297{
298	blocking_pipe_header	hdr;
299	blocking_pipe_header *	req;
300	size_t			rc;
301	size_t			octets;
302
303	DEBUG_REQUIRE(-1 != c->req_read_pipe);
304
305	req = NULL;
306	rc = netread(c->req_read_pipe, &hdr, sizeof(hdr));
307
308	if (0 == rc) {
309		TRACE(4, ("parent closed request pipe, child %d terminating\n",
310			  c->pid));
311	} else if (rc != sizeof(hdr)) {
312		msyslog(LOG_ERR,
313			"receive_blocking_req_internal: short header read (%zu of %zu), %m",
314			rc, sizeof(hdr));
315	} else {
316		INSIST(sizeof(hdr) < hdr.octets && hdr.octets < 4 * 1024);
317		req = emalloc(hdr.octets);
318		memcpy(req, &hdr, sizeof(*req));
319		octets = hdr.octets - sizeof(hdr);
320		rc = netread(c->req_read_pipe, (char *)(req + 1),
321			     octets);
322
323		if (rc != octets)
324			msyslog(LOG_ERR,
325				"receive_blocking_req_internal: short read (%zu of %zu), %m",
326				rc, octets);
327		else if (BLOCKING_REQ_MAGIC != req->magic_sig)
328			msyslog(LOG_ERR,
329				"receive_blocking_req_internal: packet header mismatch (0x%x)",
330				req->magic_sig);
331		else
332			return req;
333	}
334
335	if (req != NULL)
336		free(req);
337
338	return NULL;
339}
340
341
342int
343send_blocking_resp_internal(
344	blocking_child *	c,
345	blocking_pipe_header *	resp
346	)
347{
348	size_t	octets;
349	size_t	rc;
350
351	DEBUG_REQUIRE(-1 != c->resp_write_pipe);
352
353	octets = resp->octets;
354	rc = netwrite(c->resp_write_pipe, resp, octets);
355	free(resp);
356
357	if (octets == rc)
358		return 0;
359
360	TRACE(1, ("send_blocking_resp_internal: short write (%zu of %zu), %m\n",
361		  rc, octets));
362	return -1;
363}
364
365
366blocking_pipe_header *
367receive_blocking_resp_internal(
368	blocking_child *	c
369	)
370{
371	blocking_pipe_header	hdr;
372	blocking_pipe_header *	resp;
373	size_t			rc;
374	size_t			octets;
375
376	DEBUG_REQUIRE(c->resp_read_pipe != -1);
377
378	resp = NULL;
379	rc = netread(c->resp_read_pipe, &hdr, sizeof(hdr));
380
381	if (0 == rc) {
382		/* this is the normal child exited indication */
383	} else if (rc != sizeof(hdr)) {
384		TRACE(1, ("receive_blocking_resp_internal: short header read (%zu of %zu), %m\n",
385			  rc, sizeof(hdr)));
386	} else if (BLOCKING_RESP_MAGIC != hdr.magic_sig) {
387		TRACE(1, ("receive_blocking_resp_internal: header mismatch (0x%x)\n",
388			  hdr.magic_sig));
389	} else {
390		INSIST(sizeof(hdr) < hdr.octets &&
391		       hdr.octets < 16 * 1024);
392		resp = emalloc(hdr.octets);
393		memcpy(resp, &hdr, sizeof(*resp));
394		octets = hdr.octets - sizeof(hdr);
395		rc = netread(c->resp_read_pipe, (char *)(resp + 1),
396			     octets);
397
398		if (rc != octets)
399			TRACE(1, ("receive_blocking_resp_internal: short read (%zu of %zu), %m\n",
400				  rc, octets));
401		else
402			return resp;
403	}
404
405	cleanup_after_child(c);
406
407	if (resp != NULL)
408		free(resp);
409
410	return NULL;
411}
412
413
414#if defined(HAVE_DROPROOT) && defined(WORK_FORK)
415void
416fork_deferred_worker(void)
417{
418	u_int			idx;
419	blocking_child *	c;
420
421	REQUIRE(droproot && root_dropped);
422
423	for (idx = 0; idx < blocking_children_alloc; idx++) {
424		c = blocking_children[idx];
425		if (NULL == c)
426			continue;
427		if (-1 != c->req_write_pipe && 0 == c->pid)
428			fork_blocking_child(c);
429	}
430}
431#endif
432
433
434static void
435fork_blocking_child(
436	blocking_child *	c
437	)
438{
439	static int	atexit_installed;
440	static int	blocking_pipes[4] = { -1, -1, -1, -1 };
441	int		rc;
442	int		was_pipe;
443	int		is_pipe;
444	int		saved_errno = 0;
445	int		childpid;
446	int		keep_fd;
447	int		fd;
448
449	/*
450	 * parent and child communicate via a pair of pipes.
451	 *
452	 * 0 child read request
453	 * 1 parent write request
454	 * 2 parent read response
455	 * 3 child write response
456	 */
457	if (-1 == c->req_write_pipe) {
458		rc = pipe_socketpair(&blocking_pipes[0], &was_pipe);
459		if (0 != rc) {
460			saved_errno = errno;
461		} else {
462			rc = pipe_socketpair(&blocking_pipes[2], &is_pipe);
463			if (0 != rc) {
464				saved_errno = errno;
465				close(blocking_pipes[0]);
466				close(blocking_pipes[1]);
467			} else {
468				INSIST(was_pipe == is_pipe);
469			}
470		}
471		if (0 != rc) {
472			errno = saved_errno;
473			msyslog(LOG_ERR, "unable to create worker pipes: %m");
474			exit(1);
475		}
476
477		/*
478		 * Move the descriptors the parent will keep open out of the
479		 * low descriptors preferred by C runtime buffered FILE *.
480		 */
481		c->req_write_pipe = move_fd(blocking_pipes[1]);
482		c->resp_read_pipe = move_fd(blocking_pipes[2]);
483		/*
484		 * wake any worker child on orderly shutdown of the
485		 * daemon so that it can notice the broken pipes and
486		 * go away promptly.
487		 */
488		if (!atexit_installed) {
489			atexit(&send_worker_home_atexit);
490			atexit_installed = TRUE;
491		}
492	}
493
494#if defined(HAVE_DROPROOT) && !defined(NEED_EARLY_FORK)
495	/* defer the fork until after root is dropped */
496	if (droproot && !root_dropped)
497		return;
498#endif
499	if (syslog_file != NULL)
500		fflush(syslog_file);
501	fflush(stdout);
502	fflush(stderr);
503
504	/* [BUG 3050] setting SIGCHLD to SIG_IGN likely causes unwanted
505	 * or undefined effects. We don't do it and leave SIGCHLD alone.
506	 */
507	/* signal_no_reset(SIGCHLD, SIG_IGN); */
508
509	childpid = fork();
510	if (-1 == childpid) {
511		msyslog(LOG_ERR, "unable to fork worker: %m");
512		exit(1);
513	}
514
515	if (childpid) {
516		/* this is the parent */
517		TRACE(1, ("forked worker child (pid %d)\n", childpid));
518		c->pid = childpid;
519		c->ispipe = is_pipe;
520
521		/* close the child's pipe descriptors. */
522		close(blocking_pipes[0]);
523		close(blocking_pipes[3]);
524
525		memset(blocking_pipes, -1, sizeof(blocking_pipes));
526
527		/* wire into I/O loop */
528		(*addremove_io_fd)(c->resp_read_pipe, is_pipe, FALSE);
529
530		return;		/* parent returns */
531	}
532
533	/*
534	 * The parent gets the child pid as the return value of fork().
535	 * The child must work for it.
536	 */
537	c->pid = getpid();
538	worker_process = TRUE;
539
540	/*
541	 * Change the process name of the child to avoid confusion
542	 * about ntpd trunning twice.
543	 */
544	if (saved_argc != 0) {
545		int argcc;
546		int argvlen = 0;
547		/* Clear argv */
548		for (argcc = 0; argcc < saved_argc; argcc++) {
549			int l = strlen(saved_argv[argcc]);
550			argvlen += l + 1;
551			memset(saved_argv[argcc], 0, l);
552		}
553		strlcpy(saved_argv[0], "ntpd: asynchronous dns resolver", argvlen);
554	}
555
556	/*
557	 * In the child, close all files except stdin, stdout, stderr,
558	 * and the two child ends of the pipes.
559	 */
560	DEBUG_INSIST(-1 == c->req_read_pipe);
561	DEBUG_INSIST(-1 == c->resp_write_pipe);
562	c->req_read_pipe = blocking_pipes[0];
563	c->resp_write_pipe = blocking_pipes[3];
564
565	kill_asyncio(0);
566	closelog();
567	if (syslog_file != NULL) {
568		fclose(syslog_file);
569		syslog_file = NULL;
570		syslogit = TRUE;
571	}
572	keep_fd = max(c->req_read_pipe, c->resp_write_pipe);
573	for (fd = 3; fd < keep_fd; fd++)
574		if (fd != c->req_read_pipe &&
575		    fd != c->resp_write_pipe)
576			close(fd);
577	close_all_beyond(keep_fd);
578	/*
579	 * We get signals from refclock serial I/O on NetBSD in the
580	 * worker if we do not reset SIGIO's handler to the default.
581	 * It is not conditionalized for NetBSD alone because on
582	 * systems where it is not needed, it is harmless, and that
583	 * allows us to handle unknown others with NetBSD behavior.
584	 * [Bug 1386]
585	 */
586#if defined(USE_SIGIO)
587	signal_no_reset(SIGIO, SIG_DFL);
588#elif defined(USE_SIGPOLL)
589	signal_no_reset(SIGPOLL, SIG_DFL);
590#endif
591	signal_no_reset(SIGHUP, worker_sighup);
592	init_logging("ntp_intres", 0, FALSE);
593	setup_logfile(NULL);
594
595	/*
596	 * And now back to the portable code
597	 */
598	exit_worker(blocking_child_common(c));
599}
600
601
602void worker_global_lock(int inOrOut)
603{
604	(void)inOrOut;
605}
606
607#else	/* !WORK_FORK follows */
608char work_fork_nonempty_compilation_unit;
609#endif
610