work_fork.c revision 310419
1/*
2 * work_fork.c - fork implementation for blocking worker child.
3 */
4#include <config.h>
5#include "ntp_workimpl.h"
6
7#ifdef WORK_FORK
8#include <stdio.h>
9#include <ctype.h>
10#include <signal.h>
11#include <sys/wait.h>
12
13#include "iosignal.h"
14#include "ntp_stdlib.h"
15#include "ntp_malloc.h"
16#include "ntp_syslog.h"
17#include "ntpd.h"
18#include "ntp_io.h"
19#include "ntp_assert.h"
20#include "ntp_unixtime.h"
21#include "ntp_worker.h"
22
23/* === variables === */
24	int			worker_process;
25	addremove_io_fd_func	addremove_io_fd;
26static	volatile int		worker_sighup_received;
27
28/* === function prototypes === */
29static	void		fork_blocking_child(blocking_child *);
30static	RETSIGTYPE	worker_sighup(int);
31static	void		send_worker_home_atexit(void);
32static	void		cleanup_after_child(blocking_child *);
33
34/* === functions === */
35/*
36 * exit_worker()
37 *
38 * On some systems _exit() is preferred to exit() for forked children.
39 * For example, http://netbsd.gw.com/cgi-bin/man-cgi?fork++NetBSD-5.0
40 * recommends _exit() to avoid double-flushing C runtime stream buffers
41 * and also to avoid calling the parent's atexit() routines in the
42 * child.  On those systems WORKER_CHILD_EXIT is _exit.  Since _exit
43 * bypasses CRT cleanup, fflush() files we know might have output
44 * buffered.
45 */
46void
47exit_worker(
48	int	exitcode
49	)
50{
51	if (syslog_file != NULL)
52		fflush(syslog_file);
53	fflush(stdout);
54	fflush(stderr);
55	WORKER_CHILD_EXIT (exitcode);	/* space before ( required */
56}
57
58
59static RETSIGTYPE
60worker_sighup(
61	int sig
62	)
63{
64	if (SIGHUP == sig)
65		worker_sighup_received = 1;
66}
67
68
69int
70worker_sleep(
71	blocking_child *	c,
72	time_t			seconds
73	)
74{
75	u_int sleep_remain;
76
77	sleep_remain = (u_int)seconds;
78	do {
79		if (!worker_sighup_received)
80			sleep_remain = sleep(sleep_remain);
81		if (worker_sighup_received) {
82			TRACE(1, ("worker SIGHUP with %us left to sleep",
83				  sleep_remain));
84			worker_sighup_received = 0;
85			return -1;
86		}
87	} while (sleep_remain);
88
89	return 0;
90}
91
92
93void
94interrupt_worker_sleep(void)
95{
96	u_int			idx;
97	blocking_child *	c;
98	int			rc;
99
100	for (idx = 0; idx < blocking_children_alloc; idx++) {
101		c = blocking_children[idx];
102
103		if (NULL == c || c->reusable == TRUE)
104			continue;
105
106		rc = kill(c->pid, SIGHUP);
107		if (rc < 0)
108			msyslog(LOG_ERR,
109				"Unable to signal HUP to wake child pid %d: %m",
110				c->pid);
111	}
112}
113
114
115/*
116 * harvest_child_status() runs in the parent.
117 *
118 * Note the error handling -- this is an interaction with SIGCHLD.
119 * SIG_IGN on SIGCHLD on some OSes means do not wait but reap
120 * automatically. Since we're not really interested in the result code,
121 * we simply ignore the error.
122 */
123static void
124harvest_child_status(
125	blocking_child *	c
126	)
127{
128	if (c->pid) {
129		/* Wait on the child so it can finish terminating */
130		if (waitpid(c->pid, NULL, 0) == c->pid)
131			TRACE(4, ("harvested child %d\n", c->pid));
132		else if (errno != ECHILD)
133			msyslog(LOG_ERR, "error waiting on child %d: %m", c->pid);
134		c->pid = 0;
135	}
136}
137
138/*
139 * req_child_exit() runs in the parent.
140 */
141int
142req_child_exit(
143	blocking_child *	c
144	)
145{
146	if (-1 != c->req_write_pipe) {
147		close(c->req_write_pipe);
148		c->req_write_pipe = -1;
149		return 0;
150	}
151	/* Closing the pipe forces the child to exit */
152	harvest_child_status(c);
153	return -1;
154}
155
156
157/*
158 * cleanup_after_child() runs in parent.
159 */
160static void
161cleanup_after_child(
162	blocking_child *	c
163	)
164{
165	harvest_child_status(c);
166	if (-1 != c->resp_read_pipe) {
167		(*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
168		close(c->resp_read_pipe);
169		c->resp_read_pipe = -1;
170	}
171	c->resp_read_ctx = NULL;
172	DEBUG_INSIST(-1 == c->req_read_pipe);
173	DEBUG_INSIST(-1 == c->resp_write_pipe);
174	c->reusable = TRUE;
175}
176
177
178static void
179send_worker_home_atexit(void)
180{
181	u_int			idx;
182	blocking_child *	c;
183
184	if (worker_process)
185		return;
186
187	for (idx = 0; idx < blocking_children_alloc; idx++) {
188		c = blocking_children[idx];
189		if (NULL == c)
190			continue;
191		req_child_exit(c);
192	}
193}
194
195
196int
197send_blocking_req_internal(
198	blocking_child *	c,
199	blocking_pipe_header *	hdr,
200	void *			data
201	)
202{
203	int octets;
204	int rc;
205
206	DEBUG_REQUIRE(hdr != NULL);
207	DEBUG_REQUIRE(data != NULL);
208	DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
209
210	if (-1 == c->req_write_pipe) {
211		fork_blocking_child(c);
212		DEBUG_INSIST(-1 != c->req_write_pipe);
213	}
214
215	octets = sizeof(*hdr);
216	rc = write(c->req_write_pipe, hdr, octets);
217
218	if (rc == octets) {
219		octets = hdr->octets - sizeof(*hdr);
220		rc = write(c->req_write_pipe, data, octets);
221
222		if (rc == octets)
223			return 0;
224	}
225
226	if (rc < 0)
227		msyslog(LOG_ERR,
228			"send_blocking_req_internal: pipe write: %m");
229	else
230		msyslog(LOG_ERR,
231			"send_blocking_req_internal: short write %d of %d",
232			rc, octets);
233
234	/* Fatal error.  Clean up the child process.  */
235	req_child_exit(c);
236	exit(1);	/* otherwise would be return -1 */
237}
238
239
240blocking_pipe_header *
241receive_blocking_req_internal(
242	blocking_child *	c
243	)
244{
245	blocking_pipe_header	hdr;
246	blocking_pipe_header *	req;
247	int			rc;
248	long			octets;
249
250	DEBUG_REQUIRE(-1 != c->req_read_pipe);
251
252	req = NULL;
253
254	do {
255		rc = read(c->req_read_pipe, &hdr, sizeof(hdr));
256	} while (rc < 0 && EINTR == errno);
257
258	if (rc < 0) {
259		msyslog(LOG_ERR,
260			"receive_blocking_req_internal: pipe read %m");
261	} else if (0 == rc) {
262		TRACE(4, ("parent closed request pipe, child %d terminating\n",
263			  c->pid));
264	} else if (rc != sizeof(hdr)) {
265		msyslog(LOG_ERR,
266			"receive_blocking_req_internal: short header read %d of %lu",
267			rc, (u_long)sizeof(hdr));
268	} else {
269		INSIST(sizeof(hdr) < hdr.octets && hdr.octets < 4 * 1024);
270		req = emalloc(hdr.octets);
271		memcpy(req, &hdr, sizeof(*req));
272		octets = hdr.octets - sizeof(hdr);
273		rc = read(c->req_read_pipe, (char *)req + sizeof(*req),
274			  octets);
275
276		if (rc < 0)
277			msyslog(LOG_ERR,
278				"receive_blocking_req_internal: pipe data read %m");
279		else if (rc != octets)
280			msyslog(LOG_ERR,
281				"receive_blocking_req_internal: short read %d of %ld",
282				rc, octets);
283		else if (BLOCKING_REQ_MAGIC != req->magic_sig)
284			msyslog(LOG_ERR,
285				"receive_blocking_req_internal: packet header mismatch (0x%x)",
286				req->magic_sig);
287		else
288			return req;
289	}
290
291	if (req != NULL)
292		free(req);
293
294	return NULL;
295}
296
297
298int
299send_blocking_resp_internal(
300	blocking_child *	c,
301	blocking_pipe_header *	resp
302	)
303{
304	long	octets;
305	int	rc;
306
307	DEBUG_REQUIRE(-1 != c->resp_write_pipe);
308
309	octets = resp->octets;
310	rc = write(c->resp_write_pipe, resp, octets);
311	free(resp);
312
313	if (octets == rc)
314		return 0;
315
316	if (rc < 0)
317		TRACE(1, ("send_blocking_resp_internal: pipe write %m\n"));
318	else
319		TRACE(1, ("send_blocking_resp_internal: short write %d of %ld\n",
320			  rc, octets));
321
322	return -1;
323}
324
325
326blocking_pipe_header *
327receive_blocking_resp_internal(
328	blocking_child *	c
329	)
330{
331	blocking_pipe_header	hdr;
332	blocking_pipe_header *	resp;
333	int			rc;
334	long			octets;
335
336	DEBUG_REQUIRE(c->resp_read_pipe != -1);
337
338	resp = NULL;
339	rc = read(c->resp_read_pipe, &hdr, sizeof(hdr));
340
341	if (rc < 0) {
342		TRACE(1, ("receive_blocking_resp_internal: pipe read %m\n"));
343	} else if (0 == rc) {
344		/* this is the normal child exited indication */
345	} else if (rc != sizeof(hdr)) {
346		TRACE(1, ("receive_blocking_resp_internal: short header read %d of %lu\n",
347			  rc, (u_long)sizeof(hdr)));
348	} else if (BLOCKING_RESP_MAGIC != hdr.magic_sig) {
349		TRACE(1, ("receive_blocking_resp_internal: header mismatch (0x%x)\n",
350			  hdr.magic_sig));
351	} else {
352		INSIST(sizeof(hdr) < hdr.octets &&
353		       hdr.octets < 16 * 1024);
354		resp = emalloc(hdr.octets);
355		memcpy(resp, &hdr, sizeof(*resp));
356		octets = hdr.octets - sizeof(hdr);
357		rc = read(c->resp_read_pipe,
358			  (char *)resp + sizeof(*resp),
359			  octets);
360
361		if (rc < 0)
362			TRACE(1, ("receive_blocking_resp_internal: pipe data read %m\n"));
363		else if (rc < octets)
364			TRACE(1, ("receive_blocking_resp_internal: short read %d of %ld\n",
365				  rc, octets));
366		else
367			return resp;
368	}
369
370	cleanup_after_child(c);
371
372	if (resp != NULL)
373		free(resp);
374
375	return NULL;
376}
377
378
379#if defined(HAVE_DROPROOT) && defined(WORK_FORK)
380void
381fork_deferred_worker(void)
382{
383	u_int			idx;
384	blocking_child *	c;
385
386	REQUIRE(droproot && root_dropped);
387
388	for (idx = 0; idx < blocking_children_alloc; idx++) {
389		c = blocking_children[idx];
390		if (NULL == c)
391			continue;
392		if (-1 != c->req_write_pipe && 0 == c->pid)
393			fork_blocking_child(c);
394	}
395}
396#endif
397
398
399static void
400fork_blocking_child(
401	blocking_child *	c
402	)
403{
404	static int	atexit_installed;
405	static int	blocking_pipes[4] = { -1, -1, -1, -1 };
406	int		rc;
407	int		was_pipe;
408	int		is_pipe;
409	int		saved_errno = 0;
410	int		childpid;
411	int		keep_fd;
412	int		fd;
413
414	/*
415	 * parent and child communicate via a pair of pipes.
416	 *
417	 * 0 child read request
418	 * 1 parent write request
419	 * 2 parent read response
420	 * 3 child write response
421	 */
422	if (-1 == c->req_write_pipe) {
423		rc = pipe_socketpair(&blocking_pipes[0], &was_pipe);
424		if (0 != rc) {
425			saved_errno = errno;
426		} else {
427			rc = pipe_socketpair(&blocking_pipes[2], &is_pipe);
428			if (0 != rc) {
429				saved_errno = errno;
430				close(blocking_pipes[0]);
431				close(blocking_pipes[1]);
432			} else {
433				INSIST(was_pipe == is_pipe);
434			}
435		}
436		if (0 != rc) {
437			errno = saved_errno;
438			msyslog(LOG_ERR, "unable to create worker pipes: %m");
439			exit(1);
440		}
441
442		/*
443		 * Move the descriptors the parent will keep open out of the
444		 * low descriptors preferred by C runtime buffered FILE *.
445		 */
446		c->req_write_pipe = move_fd(blocking_pipes[1]);
447		c->resp_read_pipe = move_fd(blocking_pipes[2]);
448		/*
449		 * wake any worker child on orderly shutdown of the
450		 * daemon so that it can notice the broken pipes and
451		 * go away promptly.
452		 */
453		if (!atexit_installed) {
454			atexit(&send_worker_home_atexit);
455			atexit_installed = TRUE;
456		}
457	}
458
459#if defined(HAVE_DROPROOT) && !defined(NEED_EARLY_FORK)
460	/* defer the fork until after root is dropped */
461	if (droproot && !root_dropped)
462		return;
463#endif
464	if (syslog_file != NULL)
465		fflush(syslog_file);
466	fflush(stdout);
467	fflush(stderr);
468
469	/* [BUG 3050] setting SIGCHLD to SIG_IGN likely causes unwanted
470	 * or undefined effects. We don't do it and leave SIGCHLD alone.
471	 */
472	/* signal_no_reset(SIGCHLD, SIG_IGN); */
473
474	childpid = fork();
475	if (-1 == childpid) {
476		msyslog(LOG_ERR, "unable to fork worker: %m");
477		exit(1);
478	}
479
480	if (childpid) {
481		/* this is the parent */
482		TRACE(1, ("forked worker child (pid %d)\n", childpid));
483		c->pid = childpid;
484		c->ispipe = is_pipe;
485
486		/* close the child's pipe descriptors. */
487		close(blocking_pipes[0]);
488		close(blocking_pipes[3]);
489
490		memset(blocking_pipes, -1, sizeof(blocking_pipes));
491
492		/* wire into I/O loop */
493		(*addremove_io_fd)(c->resp_read_pipe, is_pipe, FALSE);
494
495		return;		/* parent returns */
496	}
497
498	/*
499	 * The parent gets the child pid as the return value of fork().
500	 * The child must work for it.
501	 */
502	c->pid = getpid();
503	worker_process = TRUE;
504
505	/*
506	 * In the child, close all files except stdin, stdout, stderr,
507	 * and the two child ends of the pipes.
508	 */
509	DEBUG_INSIST(-1 == c->req_read_pipe);
510	DEBUG_INSIST(-1 == c->resp_write_pipe);
511	c->req_read_pipe = blocking_pipes[0];
512	c->resp_write_pipe = blocking_pipes[3];
513
514	kill_asyncio(0);
515	closelog();
516	if (syslog_file != NULL) {
517		fclose(syslog_file);
518		syslog_file = NULL;
519		syslogit = TRUE;
520	}
521	keep_fd = max(c->req_read_pipe, c->resp_write_pipe);
522	for (fd = 3; fd < keep_fd; fd++)
523		if (fd != c->req_read_pipe &&
524		    fd != c->resp_write_pipe)
525			close(fd);
526	close_all_beyond(keep_fd);
527	/*
528	 * We get signals from refclock serial I/O on NetBSD in the
529	 * worker if we do not reset SIGIO's handler to the default.
530	 * It is not conditionalized for NetBSD alone because on
531	 * systems where it is not needed, it is harmless, and that
532	 * allows us to handle unknown others with NetBSD behavior.
533	 * [Bug 1386]
534	 */
535#if defined(USE_SIGIO)
536	signal_no_reset(SIGIO, SIG_DFL);
537#elif defined(USE_SIGPOLL)
538	signal_no_reset(SIGPOLL, SIG_DFL);
539#endif
540	signal_no_reset(SIGHUP, worker_sighup);
541	init_logging("ntp_intres", 0, FALSE);
542	setup_logfile(NULL);
543
544	/*
545	 * And now back to the portable code
546	 */
547	exit_worker(blocking_child_common(c));
548}
549
550
551void worker_global_lock(int inOrOut)
552{
553	(void)inOrOut;
554}
555
556#else	/* !WORK_FORK follows */
557char work_fork_nonempty_compilation_unit;
558#endif
559