1/*	$NetBSD: work_fork.c,v 1.15 2020/10/10 13:41:14 christos Exp $	*/
2
3/*
4 * work_fork.c - fork implementation for blocking worker child.
5 */
6#include <config.h>
7#include "ntp_workimpl.h"
8
9#ifdef WORK_FORK
10#include <stdio.h>
11#include <ctype.h>
12#include <signal.h>
13#include <sys/wait.h>
14
15#include "iosignal.h"
16#include "ntp_stdlib.h"
17#include "ntp_malloc.h"
18#include "ntp_syslog.h"
19#include "ntpd.h"
20#include "ntp_io.h"
21#include "ntp_assert.h"
22#include "ntp_unixtime.h"
23#include "ntp_worker.h"
24
25/* === variables === */
26	int			worker_process;
27	addremove_io_fd_func	addremove_io_fd;
28static	volatile int		worker_sighup_received;
29int	saved_argc = 0;
30char	**saved_argv;
31
32/* === function prototypes === */
33static	void		fork_blocking_child(blocking_child *);
34static	RETSIGTYPE	worker_sighup(int);
35static	void		send_worker_home_atexit(void);
36static	void		cleanup_after_child(blocking_child *);
37
38/* === I/O helpers === */
39/* Since we have signals enabled, there's a good chance that blocking IO
40 * via pipe suffers from EINTR -- and this goes for both directions.
41 * The next two wrappers will loop until either all the data is written
42 * or read, plus handling the EOF condition on read. They may return
43 * zero if no data was transferred at all, and effectively every return
44 * value that differs from the given transfer length signifies an error
45 * condition.
46 */
47
48static size_t
49netread(
50	int		fd,
51	void *		vb,
52	size_t		l
53	)
54{
55	char *		b = vb;
56	ssize_t		r;
57
58	while (l) {
59		r = read(fd, b, l);
60		if (r > 0) {
61			l -= r;
62			b += r;
63		} else if (r == 0 || errno != EINTR) {
64			l = 0;
65		}
66	}
67	return (size_t)(b - (char *)vb);
68}
69
70
71static size_t
72netwrite(
73	int		fd,
74	const void *	vb,
75	size_t		l
76	)
77{
78	const char *	b = vb;
79	ssize_t		w;
80
81	while (l) {
82		w = write(fd, b, l);
83		if (w > 0) {
84			l -= w;
85			b += w;
86		} else if (errno != EINTR) {
87			l = 0;
88		}
89	}
90	return (size_t)(b - (const char *)vb);
91}
92
93
94#if defined(HAVE_DROPROOT)
95extern int set_user_group_ids(void);
96#endif
97
98/* === functions === */
99/*
100 * exit_worker()
101 *
102 * On some systems _exit() is preferred to exit() for forked children.
103 * For example, http://netbsd.gw.com/cgi-bin/man-cgi?fork++NetBSD-5.0
104 * recommends _exit() to avoid double-flushing C runtime stream buffers
105 * and also to avoid calling the parent's atexit() routines in the
106 * child.  On those systems WORKER_CHILD_EXIT is _exit.  Since _exit
107 * bypasses CRT cleanup, fflush() files we know might have output
108 * buffered.
109 */
110void
111exit_worker(
112	int	exitcode
113	)
114{
115	if (syslog_file != NULL)
116		fflush(syslog_file);
117	fflush(stdout);
118	fflush(stderr);
119	WORKER_CHILD_EXIT (exitcode);	/* space before ( required */
120}
121
122
123static RETSIGTYPE
124worker_sighup(
125	int sig
126	)
127{
128	if (SIGHUP == sig)
129		worker_sighup_received = 1;
130}
131
132
133int
134worker_sleep(
135	blocking_child *	c,
136	time_t			seconds
137	)
138{
139	u_int sleep_remain;
140
141	sleep_remain = (u_int)seconds;
142	do {
143		if (!worker_sighup_received)
144			sleep_remain = sleep(sleep_remain);
145		if (worker_sighup_received) {
146			TRACE(1, ("worker SIGHUP with %us left to sleep",
147				  sleep_remain));
148			worker_sighup_received = 0;
149			return -1;
150		}
151	} while (sleep_remain);
152
153	return 0;
154}
155
156
157void
158interrupt_worker_sleep(void)
159{
160	u_int			idx;
161	blocking_child *	c;
162	int			rc;
163
164	for (idx = 0; idx < blocking_children_alloc; idx++) {
165		c = blocking_children[idx];
166
167		if (NULL == c || c->reusable == TRUE)
168			continue;
169
170		rc = kill(c->pid, SIGHUP);
171		if (rc < 0)
172			msyslog(LOG_ERR,
173				"Unable to signal HUP to wake child pid %d: %m",
174				c->pid);
175	}
176}
177
178
179/*
180 * harvest_child_status() runs in the parent.
181 *
182 * Note the error handling -- this is an interaction with SIGCHLD.
183 * SIG_IGN on SIGCHLD on some OSes means do not wait but reap
184 * automatically. Since we're not really interested in the result code,
185 * we simply ignore the error.
186 */
187static void
188harvest_child_status(
189	blocking_child *	c
190	)
191{
192	if (c->pid) {
193		/* Wait on the child so it can finish terminating */
194		if (waitpid(c->pid, NULL, 0) == c->pid)
195			TRACE(4, ("harvested child %d\n", c->pid));
196		else if (errno != ECHILD)
197			msyslog(LOG_ERR, "error waiting on child %d: %m", c->pid);
198		c->pid = 0;
199	}
200}
201
202/*
203 * req_child_exit() runs in the parent.
204 */
205int
206req_child_exit(
207	blocking_child *	c
208	)
209{
210	if (-1 != c->req_write_pipe) {
211		close(c->req_write_pipe);
212		c->req_write_pipe = -1;
213		return 0;
214	}
215	/* Closing the pipe forces the child to exit */
216	harvest_child_status(c);
217	return -1;
218}
219
220
221/*
222 * cleanup_after_child() runs in parent.
223 */
224static void
225cleanup_after_child(
226	blocking_child *	c
227	)
228{
229	harvest_child_status(c);
230	if (-1 != c->resp_read_pipe) {
231		(*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
232		close(c->resp_read_pipe);
233		c->resp_read_pipe = -1;
234	}
235	c->resp_read_ctx = NULL;
236	DEBUG_INSIST(-1 == c->req_read_pipe);
237	DEBUG_INSIST(-1 == c->resp_write_pipe);
238	c->reusable = TRUE;
239}
240
241
242static void
243send_worker_home_atexit(void)
244{
245	u_int			idx;
246	blocking_child *	c;
247
248	if (worker_process)
249		return;
250
251	for (idx = 0; idx < blocking_children_alloc; idx++) {
252		c = blocking_children[idx];
253		if (NULL == c)
254			continue;
255		req_child_exit(c);
256	}
257}
258
259
260int
261send_blocking_req_internal(
262	blocking_child *	c,
263	blocking_pipe_header *	hdr,
264	void *			data
265	)
266{
267	size_t	octets;
268	size_t	rc;
269
270	DEBUG_REQUIRE(hdr != NULL);
271	DEBUG_REQUIRE(data != NULL);
272	DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
273
274	if (-1 == c->req_write_pipe) {
275		fork_blocking_child(c);
276		DEBUG_INSIST(-1 != c->req_write_pipe);
277	}
278
279	octets = sizeof(*hdr);
280	rc = netwrite(c->req_write_pipe, hdr, octets);
281
282	if (rc == octets) {
283		octets = hdr->octets - sizeof(*hdr);
284		rc = netwrite(c->req_write_pipe, data, octets);
285		if (rc == octets)
286			return 0;
287	}
288
289	msyslog(LOG_ERR,
290		"send_blocking_req_internal: short write (%zu of %zu), %m",
291		rc, octets);
292
293	/* Fatal error.  Clean up the child process.  */
294	req_child_exit(c);
295	exit(1);	/* otherwise would be return -1 */
296}
297
298
299blocking_pipe_header *
300receive_blocking_req_internal(
301	blocking_child *	c
302	)
303{
304	blocking_pipe_header	hdr;
305	blocking_pipe_header *	req;
306	size_t			rc;
307	size_t			octets;
308
309	DEBUG_REQUIRE(-1 != c->req_read_pipe);
310
311	req = NULL;
312	rc = netread(c->req_read_pipe, &hdr, sizeof(hdr));
313
314	if (0 == rc) {
315		TRACE(4, ("parent closed request pipe, child %d terminating\n",
316			  c->pid));
317	} else if (rc != sizeof(hdr)) {
318		msyslog(LOG_ERR,
319			"receive_blocking_req_internal: short header read (%zu of %zu), %m",
320			rc, sizeof(hdr));
321	} else {
322		INSIST(sizeof(hdr) < hdr.octets && hdr.octets < 4 * 1024);
323		req = emalloc(hdr.octets);
324		memcpy(req, &hdr, sizeof(*req));
325		octets = hdr.octets - sizeof(hdr);
326		rc = netread(c->req_read_pipe, (char *)(req + 1),
327			     octets);
328
329		if (rc != octets)
330			msyslog(LOG_ERR,
331				"receive_blocking_req_internal: short read (%zu of %zu), %m",
332				rc, octets);
333		else if (BLOCKING_REQ_MAGIC != req->magic_sig)
334			msyslog(LOG_ERR,
335				"receive_blocking_req_internal: packet header mismatch (0x%x)",
336				req->magic_sig);
337		else
338			return req;
339	}
340
341	if (req != NULL)
342		free(req);
343
344	return NULL;
345}
346
347
348int
349send_blocking_resp_internal(
350	blocking_child *	c,
351	blocking_pipe_header *	resp
352	)
353{
354	size_t	octets;
355	size_t	rc;
356
357	DEBUG_REQUIRE(-1 != c->resp_write_pipe);
358
359	octets = resp->octets;
360	rc = netwrite(c->resp_write_pipe, resp, octets);
361	free(resp);
362
363	if (octets == rc)
364		return 0;
365
366	TRACE(1, ("send_blocking_resp_internal: short write (%zu of %zu), %m\n",
367		  rc, octets));
368	return -1;
369}
370
371
372blocking_pipe_header *
373receive_blocking_resp_internal(
374	blocking_child *	c
375	)
376{
377	blocking_pipe_header	hdr;
378	blocking_pipe_header *	resp;
379	size_t			rc;
380	size_t			octets;
381
382	DEBUG_REQUIRE(c->resp_read_pipe != -1);
383
384	resp = NULL;
385	rc = netread(c->resp_read_pipe, &hdr, sizeof(hdr));
386
387	if (0 == rc) {
388		/* this is the normal child exited indication */
389	} else if (rc != sizeof(hdr)) {
390		TRACE(1, ("receive_blocking_resp_internal: short header read (%zu of %zu), %m\n",
391			  rc, sizeof(hdr)));
392	} else if (BLOCKING_RESP_MAGIC != hdr.magic_sig) {
393		TRACE(1, ("receive_blocking_resp_internal: header mismatch (0x%x)\n",
394			  hdr.magic_sig));
395	} else {
396		INSIST(sizeof(hdr) < hdr.octets &&
397		       hdr.octets < 16 * 1024);
398		resp = emalloc(hdr.octets);
399		memcpy(resp, &hdr, sizeof(*resp));
400		octets = hdr.octets - sizeof(hdr);
401		rc = netread(c->resp_read_pipe, (char *)(resp + 1),
402			     octets);
403
404		if (rc != octets)
405			TRACE(1, ("receive_blocking_resp_internal: short read (%zu of %zu), %m\n",
406				  rc, octets));
407		else
408			return resp;
409	}
410
411	cleanup_after_child(c);
412
413	if (resp != NULL)
414		free(resp);
415
416	return NULL;
417}
418
419
420#if defined(HAVE_DROPROOT) && defined(WORK_FORK)
421void
422fork_deferred_worker(void)
423{
424	u_int			idx;
425	blocking_child *	c;
426
427	REQUIRE(droproot && root_dropped);
428
429	for (idx = 0; idx < blocking_children_alloc; idx++) {
430		c = blocking_children[idx];
431		if (NULL == c)
432			continue;
433		if (-1 != c->req_write_pipe && 0 == c->pid)
434			fork_blocking_child(c);
435	}
436}
437#endif
438
439#if HAVE_SETPROCTITLE == 0
440static void
441setproctitle(const char *fmt, ...)
442{
443	va_list ap;
444	char b1[128];
445	int argcc, argvlen, l;
446
447	if (saved_argc == 0)
448		return;
449
450	va_start(ap, fmt);
451	vsnprintf(b1, sizeof(b1), fmt, ap);
452	va_end(ap);
453
454	/* Clear argv */
455	for (argvlen = 0, argcc = 0; argcc < saved_argc; argcc++) {
456		l = strlen(saved_argv[argcc]);
457		argvlen += l + 1;
458		memset(saved_argv[argcc], 0, l);
459	}
460	l = snprintf(saved_argv[0], argvlen, "ntpd: %s", b1);
461	for (argcc = 1; argcc < saved_argc; argcc++)
462		saved_argv[argcc] = &saved_argv[0][l];
463}
464#endif
465
466static void
467fork_blocking_child(
468	blocking_child *	c
469	)
470{
471	static int	atexit_installed;
472	static int	blocking_pipes[4] = { -1, -1, -1, -1 };
473	int		rc;
474	int		was_pipe;
475	int		is_pipe;
476	int		saved_errno = 0;
477	int		childpid;
478	int		keep_fd;
479	int		fd;
480
481	/*
482	 * parent and child communicate via a pair of pipes.
483	 *
484	 * 0 child read request
485	 * 1 parent write request
486	 * 2 parent read response
487	 * 3 child write response
488	 */
489	if (-1 == c->req_write_pipe) {
490		rc = pipe_socketpair(&blocking_pipes[0], &was_pipe);
491		if (0 != rc) {
492			saved_errno = errno;
493		} else {
494			rc = pipe_socketpair(&blocking_pipes[2], &is_pipe);
495			if (0 != rc) {
496				saved_errno = errno;
497				close(blocking_pipes[0]);
498				close(blocking_pipes[1]);
499			} else {
500				INSIST(was_pipe == is_pipe);
501			}
502		}
503		if (0 != rc) {
504			errno = saved_errno;
505			msyslog(LOG_ERR, "unable to create worker pipes: %m");
506			exit(1);
507		}
508
509		/*
510		 * Move the descriptors the parent will keep open out of the
511		 * low descriptors preferred by C runtime buffered FILE *.
512		 */
513		c->req_write_pipe = move_fd(blocking_pipes[1]);
514		c->resp_read_pipe = move_fd(blocking_pipes[2]);
515		/*
516		 * wake any worker child on orderly shutdown of the
517		 * daemon so that it can notice the broken pipes and
518		 * go away promptly.
519		 */
520		if (!atexit_installed) {
521			atexit(&send_worker_home_atexit);
522			atexit_installed = TRUE;
523		}
524	}
525
526#if defined(HAVE_DROPROOT) && !defined(NEED_EARLY_FORK)
527	/* defer the fork until after root is dropped */
528	if (droproot && !root_dropped)
529		return;
530#endif
531	if (syslog_file != NULL)
532		fflush(syslog_file);
533	fflush(stdout);
534	fflush(stderr);
535
536	/* [BUG 3050] setting SIGCHLD to SIG_IGN likely causes unwanted
537	 * or undefined effects. We don't do it and leave SIGCHLD alone.
538	 */
539	/* signal_no_reset(SIGCHLD, SIG_IGN); */
540
541	childpid = fork();
542	if (-1 == childpid) {
543		msyslog(LOG_ERR, "unable to fork worker: %m");
544		exit(1);
545	}
546
547	if (childpid) {
548		/* this is the parent */
549		TRACE(1, ("forked worker child (pid %d)\n", childpid));
550		c->pid = childpid;
551		c->ispipe = is_pipe;
552
553		/* close the child's pipe descriptors. */
554		close(blocking_pipes[0]);
555		close(blocking_pipes[3]);
556
557		memset(blocking_pipes, -1, sizeof(blocking_pipes));
558
559		/* wire into I/O loop */
560		(*addremove_io_fd)(c->resp_read_pipe, is_pipe, FALSE);
561
562		/* wait until child is done */
563		rc = netread(c->resp_read_pipe, &rc, sizeof(rc));
564
565		return;		/* parent returns */
566	}
567
568	/*
569	 * The parent gets the child pid as the return value of fork().
570	 * The child must work for it.
571	 */
572	c->pid = getpid();
573	worker_process = TRUE;
574
575	/*
576	 * Change the process name of the child to avoid confusion
577	 * about ntpd trunning twice.
578	 */
579	setproctitle("asynchronous dns resolver");
580
581	/*
582	 * In the child, close all files except stdin, stdout, stderr,
583	 * and the two child ends of the pipes.
584	 */
585	DEBUG_INSIST(-1 == c->req_read_pipe);
586	DEBUG_INSIST(-1 == c->resp_write_pipe);
587	c->req_read_pipe = blocking_pipes[0];
588	c->resp_write_pipe = blocking_pipes[3];
589
590	kill_asyncio(0);
591
592	/* Tell parent we are ready */
593	rc = netwrite(c->resp_write_pipe, &rc, sizeof(rc));
594
595	closelog();
596	if (syslog_file != NULL) {
597		fclose(syslog_file);
598		syslog_file = NULL;
599		syslogit = TRUE;
600	}
601	keep_fd = max(c->req_read_pipe, c->resp_write_pipe);
602	for (fd = 3; fd < keep_fd; fd++)
603		if (fd != c->req_read_pipe &&
604		    fd != c->resp_write_pipe)
605			close(fd);
606	close_all_beyond(keep_fd);
607	/*
608	 * We get signals from refclock serial I/O on NetBSD in the
609	 * worker if we do not reset SIGIO's handler to the default.
610	 * It is not conditionalized for NetBSD alone because on
611	 * systems where it is not needed, it is harmless, and that
612	 * allows us to handle unknown others with NetBSD behavior.
613	 * [Bug 1386]
614	 */
615#if defined(USE_SIGIO)
616	signal_no_reset(SIGIO, SIG_DFL);
617#elif defined(USE_SIGPOLL)
618	signal_no_reset(SIGPOLL, SIG_DFL);
619#endif
620	signal_no_reset(SIGHUP, worker_sighup);
621	init_logging("ntp_intres", 0, FALSE);
622	setup_logfile(NULL);
623
624#ifdef HAVE_DROPROOT
625	(void) set_user_group_ids();
626#endif
627
628	/*
629	 * And now back to the portable code
630	 */
631	exit_worker(blocking_child_common(c));
632}
633
634
635void worker_global_lock(int inOrOut)
636{
637	(void)inOrOut;
638}
639
640#else	/* !WORK_FORK follows */
641char work_fork_nonempty_compilation_unit;
642#endif
643