work_fork.c revision 316068
1119418Sobrien/* 215813Sse * work_fork.c - fork implementation for blocking worker child. 315813Sse */ 415813Sse#include <config.h> 515813Sse#include "ntp_workimpl.h" 615813Sse 715813Sse#ifdef WORK_FORK 815813Sse#include <stdio.h> 915813Sse#include <ctype.h> 1015813Sse#include <signal.h> 1115813Sse#include <sys/wait.h> 1215813Sse 1315813Sse#include "iosignal.h" 1415813Sse#include "ntp_stdlib.h" 1515813Sse#include "ntp_malloc.h" 1615813Sse#include "ntp_syslog.h" 1715813Sse#include "ntpd.h" 1815813Sse#include "ntp_io.h" 1915813Sse#include "ntp_assert.h" 20119418Sobrien#include "ntp_unixtime.h" 21119418Sobrien#include "ntp_worker.h" 22119418Sobrien 2315813Sse/* === variables === */ 2473374Simp int worker_process; 2550852Speter addremove_io_fd_func addremove_io_fd; 2615813Ssestatic volatile int worker_sighup_received; 2752247Smdoddint saved_argc = 0; 2850852Speterchar **saved_argv; 2950852Speter 3052247Smdodd/* === function prototypes === */ 3150852Speterstatic void fork_blocking_child(blocking_child *); 3252247Smdoddstatic RETSIGTYPE worker_sighup(int); 3352247Smdoddstatic void send_worker_home_atexit(void); 3450852Speterstatic void cleanup_after_child(blocking_child *); 3550852Speter 3650852Speter/* === I/O helpers === */ 37149558Simp/* Since we have signals enabled, there's a good chance that blocking IO 3850852Speter * via pipe suffers from EINTR -- and this goes for both directions. 3915813Sse * The next two wrappers will loop until either all the data is written 40119277Simp * or read, plus handling the EOF condition on read. They may return 41119277Simp * zero if no data was transferred at all, and effectively every return 4252247Smdodd * value that differs from the given transfer length signifies an error 4351442Speter * condition. 44150957Simp */ 4550852Speter 4624995Sdavidnstatic size_t 4724995Sdavidnnetread( 48140468Simp int fd, 4950852Speter void * vb, 5024995Sdavidn size_t l 5124995Sdavidn ) 52175007Simp{ 53175007Simp char * b = vb; 54150102Simp ssize_t r; 55175007Simp 56175007Simp while (l) { 57150102Simp r = read(fd, b, l); 58150102Simp if (r > 0) { 59175007Simp l -= r; 60175007Simp b += r; 61175007Simp } else if (r == 0 || errno != EINTR) { 62175007Simp l = 0; 63175007Simp } 64175007Simp } 65175007Simp return (size_t)(b - (char *)vb); 66150102Simp} 6724995Sdavidn 6815813Sse 69130657Simpstatic size_t 70130657Simpnetwrite( 7116289Salex int fd, 7250852Speter const void * vb, 73130659Simp size_t l 7415813Sse ) 75140468Simp{ 7624995Sdavidn const char * b = vb; 7724995Sdavidn ssize_t w; 7824995Sdavidn 7924995Sdavidn while (l) { 80143161Simp w = write(fd, b, l); 81143161Simp if (w > 0) { 82143161Simp l -= w; 83143161Simp b += w; 8415813Sse } else if (errno != EINTR) { 8515813Sse l = 0; 8650852Speter } 8750852Speter } 8815813Sse return (size_t)(b - (const char *)vb); 89147184Simp} 90147184Simp 91151300Simp 9252247Smdodd/* === functions === */ 93151300Simp/* 94190794Simp * exit_worker() 95190794Simp * 96190794Simp * On some systems _exit() is preferred to exit() for forked children. 97151300Simp * For example, http://netbsd.gw.com/cgi-bin/man-cgi?fork++NetBSD-5.0 98150957Simp * recommends _exit() to avoid double-flushing C runtime stream buffers 99150957Simp * and also to avoid calling the parent's atexit() routines in the 100151300Simp * child. On those systems WORKER_CHILD_EXIT is _exit. Since _exit 101150957Simp * bypasses CRT cleanup, fflush() files we know might have output 102147184Simp * buffered. 103147184Simp */ 104147184Simpvoid 105141577Simpexit_worker( 106147149Simp int exitcode 10752247Smdodd ) 108147184Simp{ 109147184Simp if (syslog_file != NULL) 110147184Simp fflush(syslog_file); 111147184Simp fflush(stdout); 112147184Simp fflush(stderr); 113191234Simp WORKER_CHILD_EXIT (exitcode); /* space before ( required */ 114191234Simp} 115191234Simp 116147184Simp 117147184Simpstatic RETSIGTYPE 118147184Simpworker_sighup( 119147184Simp int sig 120191234Simp ) 121191234Simp{ 122147184Simp if (SIGHUP == sig) 123147184Simp worker_sighup_received = 1; 12452247Smdodd} 12550852Speter 12615813Sse 12750852Speterint 12850852Speterworker_sleep( 12950852Speter blocking_child * c, 13050852Speter time_t seconds 131150136Sru ) 13215813Sse{ 13350852Speter u_int sleep_remain; 13450852Speter 13515813Sse sleep_remain = (u_int)seconds; 13650852Speter do { 13750852Speter if (!worker_sighup_received) 13850852Speter sleep_remain = sleep(sleep_remain); 13950852Speter if (worker_sighup_received) { 14050852Speter TRACE(1, ("worker SIGHUP with %us left to sleep", 14115813Sse sleep_remain)); 142113506Smdodd worker_sighup_received = 0; 143113506Smdodd return -1; 144113506Smdodd } 145 } while (sleep_remain); 146 147 return 0; 148} 149 150 151void 152interrupt_worker_sleep(void) 153{ 154 u_int idx; 155 blocking_child * c; 156 int rc; 157 158 for (idx = 0; idx < blocking_children_alloc; idx++) { 159 c = blocking_children[idx]; 160 161 if (NULL == c || c->reusable == TRUE) 162 continue; 163 164 rc = kill(c->pid, SIGHUP); 165 if (rc < 0) 166 msyslog(LOG_ERR, 167 "Unable to signal HUP to wake child pid %d: %m", 168 c->pid); 169 } 170} 171 172 173/* 174 * harvest_child_status() runs in the parent. 175 * 176 * Note the error handling -- this is an interaction with SIGCHLD. 177 * SIG_IGN on SIGCHLD on some OSes means do not wait but reap 178 * automatically. Since we're not really interested in the result code, 179 * we simply ignore the error. 180 */ 181static void 182harvest_child_status( 183 blocking_child * c 184 ) 185{ 186 if (c->pid) { 187 /* Wait on the child so it can finish terminating */ 188 if (waitpid(c->pid, NULL, 0) == c->pid) 189 TRACE(4, ("harvested child %d\n", c->pid)); 190 else if (errno != ECHILD) 191 msyslog(LOG_ERR, "error waiting on child %d: %m", c->pid); 192 c->pid = 0; 193 } 194} 195 196/* 197 * req_child_exit() runs in the parent. 198 */ 199int 200req_child_exit( 201 blocking_child * c 202 ) 203{ 204 if (-1 != c->req_write_pipe) { 205 close(c->req_write_pipe); 206 c->req_write_pipe = -1; 207 return 0; 208 } 209 /* Closing the pipe forces the child to exit */ 210 harvest_child_status(c); 211 return -1; 212} 213 214 215/* 216 * cleanup_after_child() runs in parent. 217 */ 218static void 219cleanup_after_child( 220 blocking_child * c 221 ) 222{ 223 harvest_child_status(c); 224 if (-1 != c->resp_read_pipe) { 225 (*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE); 226 close(c->resp_read_pipe); 227 c->resp_read_pipe = -1; 228 } 229 c->resp_read_ctx = NULL; 230 DEBUG_INSIST(-1 == c->req_read_pipe); 231 DEBUG_INSIST(-1 == c->resp_write_pipe); 232 c->reusable = TRUE; 233} 234 235 236static void 237send_worker_home_atexit(void) 238{ 239 u_int idx; 240 blocking_child * c; 241 242 if (worker_process) 243 return; 244 245 for (idx = 0; idx < blocking_children_alloc; idx++) { 246 c = blocking_children[idx]; 247 if (NULL == c) 248 continue; 249 req_child_exit(c); 250 } 251} 252 253 254int 255send_blocking_req_internal( 256 blocking_child * c, 257 blocking_pipe_header * hdr, 258 void * data 259 ) 260{ 261 size_t octets; 262 size_t rc; 263 264 DEBUG_REQUIRE(hdr != NULL); 265 DEBUG_REQUIRE(data != NULL); 266 DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig); 267 268 if (-1 == c->req_write_pipe) { 269 fork_blocking_child(c); 270 DEBUG_INSIST(-1 != c->req_write_pipe); 271 } 272 273 octets = sizeof(*hdr); 274 rc = netwrite(c->req_write_pipe, hdr, octets); 275 276 if (rc == octets) { 277 octets = hdr->octets - sizeof(*hdr); 278 rc = netwrite(c->req_write_pipe, data, octets); 279 if (rc == octets) 280 return 0; 281 } 282 283 msyslog(LOG_ERR, 284 "send_blocking_req_internal: short write (%zu of %zu), %m", 285 rc, octets); 286 287 /* Fatal error. Clean up the child process. */ 288 req_child_exit(c); 289 exit(1); /* otherwise would be return -1 */ 290} 291 292 293blocking_pipe_header * 294receive_blocking_req_internal( 295 blocking_child * c 296 ) 297{ 298 blocking_pipe_header hdr; 299 blocking_pipe_header * req; 300 size_t rc; 301 size_t octets; 302 303 DEBUG_REQUIRE(-1 != c->req_read_pipe); 304 305 req = NULL; 306 rc = netread(c->req_read_pipe, &hdr, sizeof(hdr)); 307 308 if (0 == rc) { 309 TRACE(4, ("parent closed request pipe, child %d terminating\n", 310 c->pid)); 311 } else if (rc != sizeof(hdr)) { 312 msyslog(LOG_ERR, 313 "receive_blocking_req_internal: short header read (%zu of %zu), %m", 314 rc, sizeof(hdr)); 315 } else { 316 INSIST(sizeof(hdr) < hdr.octets && hdr.octets < 4 * 1024); 317 req = emalloc(hdr.octets); 318 memcpy(req, &hdr, sizeof(*req)); 319 octets = hdr.octets - sizeof(hdr); 320 rc = netread(c->req_read_pipe, (char *)(req + 1), 321 octets); 322 323 if (rc != octets) 324 msyslog(LOG_ERR, 325 "receive_blocking_req_internal: short read (%zu of %zu), %m", 326 rc, octets); 327 else if (BLOCKING_REQ_MAGIC != req->magic_sig) 328 msyslog(LOG_ERR, 329 "receive_blocking_req_internal: packet header mismatch (0x%x)", 330 req->magic_sig); 331 else 332 return req; 333 } 334 335 if (req != NULL) 336 free(req); 337 338 return NULL; 339} 340 341 342int 343send_blocking_resp_internal( 344 blocking_child * c, 345 blocking_pipe_header * resp 346 ) 347{ 348 size_t octets; 349 size_t rc; 350 351 DEBUG_REQUIRE(-1 != c->resp_write_pipe); 352 353 octets = resp->octets; 354 rc = netwrite(c->resp_write_pipe, resp, octets); 355 free(resp); 356 357 if (octets == rc) 358 return 0; 359 360 TRACE(1, ("send_blocking_resp_internal: short write (%zu of %zu), %m\n", 361 rc, octets)); 362 return -1; 363} 364 365 366blocking_pipe_header * 367receive_blocking_resp_internal( 368 blocking_child * c 369 ) 370{ 371 blocking_pipe_header hdr; 372 blocking_pipe_header * resp; 373 size_t rc; 374 size_t octets; 375 376 DEBUG_REQUIRE(c->resp_read_pipe != -1); 377 378 resp = NULL; 379 rc = netread(c->resp_read_pipe, &hdr, sizeof(hdr)); 380 381 if (0 == rc) { 382 /* this is the normal child exited indication */ 383 } else if (rc != sizeof(hdr)) { 384 TRACE(1, ("receive_blocking_resp_internal: short header read (%zu of %zu), %m\n", 385 rc, sizeof(hdr))); 386 } else if (BLOCKING_RESP_MAGIC != hdr.magic_sig) { 387 TRACE(1, ("receive_blocking_resp_internal: header mismatch (0x%x)\n", 388 hdr.magic_sig)); 389 } else { 390 INSIST(sizeof(hdr) < hdr.octets && 391 hdr.octets < 16 * 1024); 392 resp = emalloc(hdr.octets); 393 memcpy(resp, &hdr, sizeof(*resp)); 394 octets = hdr.octets - sizeof(hdr); 395 rc = netread(c->resp_read_pipe, (char *)(resp + 1), 396 octets); 397 398 if (rc != octets) 399 TRACE(1, ("receive_blocking_resp_internal: short read (%zu of %zu), %m\n", 400 rc, octets)); 401 else 402 return resp; 403 } 404 405 cleanup_after_child(c); 406 407 if (resp != NULL) 408 free(resp); 409 410 return NULL; 411} 412 413 414#if defined(HAVE_DROPROOT) && defined(WORK_FORK) 415void 416fork_deferred_worker(void) 417{ 418 u_int idx; 419 blocking_child * c; 420 421 REQUIRE(droproot && root_dropped); 422 423 for (idx = 0; idx < blocking_children_alloc; idx++) { 424 c = blocking_children[idx]; 425 if (NULL == c) 426 continue; 427 if (-1 != c->req_write_pipe && 0 == c->pid) 428 fork_blocking_child(c); 429 } 430} 431#endif 432 433 434static void 435fork_blocking_child( 436 blocking_child * c 437 ) 438{ 439 static int atexit_installed; 440 static int blocking_pipes[4] = { -1, -1, -1, -1 }; 441 int rc; 442 int was_pipe; 443 int is_pipe; 444 int saved_errno = 0; 445 int childpid; 446 int keep_fd; 447 int fd; 448 449 /* 450 * parent and child communicate via a pair of pipes. 451 * 452 * 0 child read request 453 * 1 parent write request 454 * 2 parent read response 455 * 3 child write response 456 */ 457 if (-1 == c->req_write_pipe) { 458 rc = pipe_socketpair(&blocking_pipes[0], &was_pipe); 459 if (0 != rc) { 460 saved_errno = errno; 461 } else { 462 rc = pipe_socketpair(&blocking_pipes[2], &is_pipe); 463 if (0 != rc) { 464 saved_errno = errno; 465 close(blocking_pipes[0]); 466 close(blocking_pipes[1]); 467 } else { 468 INSIST(was_pipe == is_pipe); 469 } 470 } 471 if (0 != rc) { 472 errno = saved_errno; 473 msyslog(LOG_ERR, "unable to create worker pipes: %m"); 474 exit(1); 475 } 476 477 /* 478 * Move the descriptors the parent will keep open out of the 479 * low descriptors preferred by C runtime buffered FILE *. 480 */ 481 c->req_write_pipe = move_fd(blocking_pipes[1]); 482 c->resp_read_pipe = move_fd(blocking_pipes[2]); 483 /* 484 * wake any worker child on orderly shutdown of the 485 * daemon so that it can notice the broken pipes and 486 * go away promptly. 487 */ 488 if (!atexit_installed) { 489 atexit(&send_worker_home_atexit); 490 atexit_installed = TRUE; 491 } 492 } 493 494#if defined(HAVE_DROPROOT) && !defined(NEED_EARLY_FORK) 495 /* defer the fork until after root is dropped */ 496 if (droproot && !root_dropped) 497 return; 498#endif 499 if (syslog_file != NULL) 500 fflush(syslog_file); 501 fflush(stdout); 502 fflush(stderr); 503 504 /* [BUG 3050] setting SIGCHLD to SIG_IGN likely causes unwanted 505 * or undefined effects. We don't do it and leave SIGCHLD alone. 506 */ 507 /* signal_no_reset(SIGCHLD, SIG_IGN); */ 508 509 childpid = fork(); 510 if (-1 == childpid) { 511 msyslog(LOG_ERR, "unable to fork worker: %m"); 512 exit(1); 513 } 514 515 if (childpid) { 516 /* this is the parent */ 517 TRACE(1, ("forked worker child (pid %d)\n", childpid)); 518 c->pid = childpid; 519 c->ispipe = is_pipe; 520 521 /* close the child's pipe descriptors. */ 522 close(blocking_pipes[0]); 523 close(blocking_pipes[3]); 524 525 memset(blocking_pipes, -1, sizeof(blocking_pipes)); 526 527 /* wire into I/O loop */ 528 (*addremove_io_fd)(c->resp_read_pipe, is_pipe, FALSE); 529 530 return; /* parent returns */ 531 } 532 533 /* 534 * The parent gets the child pid as the return value of fork(). 535 * The child must work for it. 536 */ 537 c->pid = getpid(); 538 worker_process = TRUE; 539 540 /* 541 * Change the process name of the child to avoid confusion 542 * about ntpd trunning twice. 543 */ 544 if (saved_argc != 0) { 545 int argcc; 546 int argvlen = 0; 547 /* Clear argv */ 548 for (argcc = 0; argcc < saved_argc; argcc++) { 549 int l = strlen(saved_argv[argcc]); 550 argvlen += l + 1; 551 memset(saved_argv[argcc], 0, l); 552 } 553 strlcpy(saved_argv[0], "ntpd: asynchronous dns resolver", argvlen); 554 } 555 556 /* 557 * In the child, close all files except stdin, stdout, stderr, 558 * and the two child ends of the pipes. 559 */ 560 DEBUG_INSIST(-1 == c->req_read_pipe); 561 DEBUG_INSIST(-1 == c->resp_write_pipe); 562 c->req_read_pipe = blocking_pipes[0]; 563 c->resp_write_pipe = blocking_pipes[3]; 564 565 kill_asyncio(0); 566 closelog(); 567 if (syslog_file != NULL) { 568 fclose(syslog_file); 569 syslog_file = NULL; 570 syslogit = TRUE; 571 } 572 keep_fd = max(c->req_read_pipe, c->resp_write_pipe); 573 for (fd = 3; fd < keep_fd; fd++) 574 if (fd != c->req_read_pipe && 575 fd != c->resp_write_pipe) 576 close(fd); 577 close_all_beyond(keep_fd); 578 /* 579 * We get signals from refclock serial I/O on NetBSD in the 580 * worker if we do not reset SIGIO's handler to the default. 581 * It is not conditionalized for NetBSD alone because on 582 * systems where it is not needed, it is harmless, and that 583 * allows us to handle unknown others with NetBSD behavior. 584 * [Bug 1386] 585 */ 586#if defined(USE_SIGIO) 587 signal_no_reset(SIGIO, SIG_DFL); 588#elif defined(USE_SIGPOLL) 589 signal_no_reset(SIGPOLL, SIG_DFL); 590#endif 591 signal_no_reset(SIGHUP, worker_sighup); 592 init_logging("ntp_intres", 0, FALSE); 593 setup_logfile(NULL); 594 595 /* 596 * And now back to the portable code 597 */ 598 exit_worker(blocking_child_common(c)); 599} 600 601 602void worker_global_lock(int inOrOut) 603{ 604 (void)inOrOut; 605} 606 607#else /* !WORK_FORK follows */ 608char work_fork_nonempty_compilation_unit; 609#endif 610