control.c revision 217969
1/*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Pawel Jakub Dawidek under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sbin/hastd/control.c 217969 2011-01-27 19:33:57Z pjd $"); 32 33#include <sys/types.h> 34#include <sys/wait.h> 35 36#include <assert.h> 37#include <errno.h> 38#include <pthread.h> 39#include <signal.h> 40#include <stdio.h> 41#include <string.h> 42#include <unistd.h> 43 44#include "hast.h" 45#include "hastd.h" 46#include "hast_proto.h" 47#include "hooks.h" 48#include "nv.h" 49#include "pjdlog.h" 50#include "proto.h" 51#include "subr.h" 52 53#include "control.h" 54 55void 56child_cleanup(struct hast_resource *res) 57{ 58 59 proto_close(res->hr_ctrl); 60 res->hr_ctrl = NULL; 61 if (res->hr_event != NULL) { 62 proto_close(res->hr_event); 63 res->hr_event = NULL; 64 } 65 res->hr_workerpid = 0; 66} 67 68static void 69control_set_role_common(struct hastd_config *cfg, struct nv *nvout, 70 uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 71{ 72 int oldrole; 73 74 /* Name is always needed. */ 75 if (name != NULL) 76 nv_add_string(nvout, name, "resource%u", no); 77 78 if (res == NULL) { 79 assert(cfg != NULL); 80 assert(name != NULL); 81 82 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 83 if (strcmp(res->hr_name, name) == 0) 84 break; 85 } 86 if (res == NULL) { 87 nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 88 return; 89 } 90 } 91 assert(res != NULL); 92 93 /* Send previous role back. */ 94 nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 95 96 /* Nothing changed, return here. */ 97 if (role == res->hr_role) 98 return; 99 100 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 101 pjdlog_info("Role changed to %s.", role2str(role)); 102 103 /* Change role to the new one. */ 104 oldrole = res->hr_role; 105 res->hr_role = role; 106 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 107 108 /* 109 * If previous role was primary or secondary we have to kill process 110 * doing that work. 111 */ 112 if (res->hr_workerpid != 0) { 113 if (kill(res->hr_workerpid, SIGTERM) < 0) { 114 pjdlog_errno(LOG_WARNING, 115 "Unable to kill worker process %u", 116 (unsigned int)res->hr_workerpid); 117 } else if (waitpid(res->hr_workerpid, NULL, 0) != 118 res->hr_workerpid) { 119 pjdlog_errno(LOG_WARNING, 120 "Error while waiting for worker process %u", 121 (unsigned int)res->hr_workerpid); 122 } else { 123 pjdlog_debug(1, "Worker process %u stopped.", 124 (unsigned int)res->hr_workerpid); 125 } 126 child_cleanup(res); 127 } 128 129 /* Start worker process if we are changing to primary. */ 130 if (role == HAST_ROLE_PRIMARY) 131 hastd_primary(res); 132 pjdlog_prefix_set("%s", ""); 133 hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 134 role2str(res->hr_role), NULL); 135} 136 137void 138control_set_role(struct hast_resource *res, uint8_t role) 139{ 140 141 control_set_role_common(NULL, NULL, role, res, NULL, 0); 142} 143 144static void 145control_status_worker(struct hast_resource *res, struct nv *nvout, 146 unsigned int no) 147{ 148 struct nv *cnvin, *cnvout; 149 const char *str; 150 int error; 151 152 cnvin = cnvout = NULL; 153 error = 0; 154 155 /* 156 * Prepare and send command to worker process. 157 */ 158 cnvout = nv_alloc(); 159 nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 160 error = nv_error(cnvout); 161 if (error != 0) { 162 pjdlog_common(LOG_ERR, 0, error, 163 "Unable to prepare control header"); 164 goto end; 165 } 166 if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 167 error = errno; 168 pjdlog_errno(LOG_ERR, "Unable to send control header"); 169 goto end; 170 } 171 172 /* 173 * Receive response. 174 */ 175 if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 176 error = errno; 177 pjdlog_errno(LOG_ERR, "Unable to receive control header"); 178 goto end; 179 } 180 181 error = nv_get_int16(cnvin, "error"); 182 if (error != 0) 183 goto end; 184 185 if ((str = nv_get_string(cnvin, "status")) == NULL) { 186 error = ENOENT; 187 pjdlog_errno(LOG_ERR, "Field 'status' is missing."); 188 goto end; 189 } 190 nv_add_string(nvout, str, "status%u", no); 191 nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 192 nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 193 "extentsize%u", no); 194 nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 195 "keepdirty%u", no); 196end: 197 if (cnvin != NULL) 198 nv_free(cnvin); 199 if (cnvout != NULL) 200 nv_free(cnvout); 201 if (error != 0) 202 nv_add_int16(nvout, error, "error"); 203} 204 205static void 206control_status(struct hastd_config *cfg, struct nv *nvout, 207 struct hast_resource *res, const char *name, unsigned int no) 208{ 209 210 assert(cfg != NULL); 211 assert(nvout != NULL); 212 assert(name != NULL); 213 214 /* Name is always needed. */ 215 nv_add_string(nvout, name, "resource%u", no); 216 217 if (res == NULL) { 218 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 219 if (strcmp(res->hr_name, name) == 0) 220 break; 221 } 222 if (res == NULL) { 223 nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 224 return; 225 } 226 } 227 assert(res != NULL); 228 nv_add_string(nvout, res->hr_provname, "provname%u", no); 229 nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 230 nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 231 switch (res->hr_replication) { 232 case HAST_REPLICATION_FULLSYNC: 233 nv_add_string(nvout, "fullsync", "replication%u", no); 234 break; 235 case HAST_REPLICATION_MEMSYNC: 236 nv_add_string(nvout, "memsync", "replication%u", no); 237 break; 238 case HAST_REPLICATION_ASYNC: 239 nv_add_string(nvout, "async", "replication%u", no); 240 break; 241 default: 242 nv_add_string(nvout, "unknown", "replication%u", no); 243 break; 244 } 245 nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 246 247 switch (res->hr_role) { 248 case HAST_ROLE_PRIMARY: 249 assert(res->hr_workerpid != 0); 250 /* FALLTHROUGH */ 251 case HAST_ROLE_SECONDARY: 252 if (res->hr_workerpid != 0) 253 break; 254 /* FALLTHROUGH */ 255 default: 256 return; 257 } 258 259 /* 260 * If we are here, it means that we have a worker process, which we 261 * want to ask some questions. 262 */ 263 control_status_worker(res, nvout, no); 264} 265 266void 267control_handle(struct hastd_config *cfg) 268{ 269 struct proto_conn *conn; 270 struct nv *nvin, *nvout; 271 unsigned int ii; 272 const char *str; 273 uint8_t cmd, role; 274 int error; 275 276 if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 277 pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 278 return; 279 } 280 281 cfg->hc_controlin = conn; 282 nvin = nvout = NULL; 283 role = HAST_ROLE_UNDEF; 284 285 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 286 pjdlog_errno(LOG_ERR, "Unable to receive control header"); 287 nvin = NULL; 288 goto close; 289 } 290 291 /* Obtain command code. 0 means that nv_get_uint8() failed. */ 292 cmd = nv_get_uint8(nvin, "cmd"); 293 if (cmd == 0) { 294 pjdlog_error("Control header is missing 'cmd' field."); 295 error = EHAST_INVALID; 296 goto close; 297 } 298 299 /* Allocate outgoing nv structure. */ 300 nvout = nv_alloc(); 301 if (nvout == NULL) { 302 pjdlog_error("Unable to allocate header for control response."); 303 error = EHAST_NOMEMORY; 304 goto close; 305 } 306 307 error = 0; 308 309 str = nv_get_string(nvin, "resource0"); 310 if (str == NULL) { 311 pjdlog_error("Control header is missing 'resource0' field."); 312 error = EHAST_INVALID; 313 goto fail; 314 } 315 if (cmd == HASTCTL_SET_ROLE) { 316 role = nv_get_uint8(nvin, "role"); 317 switch (role) { 318 case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 319 case HAST_ROLE_PRIMARY: 320 case HAST_ROLE_SECONDARY: 321 break; 322 default: 323 pjdlog_error("Invalid role received (%hhu).", role); 324 error = EHAST_INVALID; 325 goto fail; 326 } 327 } 328 if (strcmp(str, "all") == 0) { 329 struct hast_resource *res; 330 331 /* All configured resources. */ 332 333 ii = 0; 334 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 335 switch (cmd) { 336 case HASTCTL_SET_ROLE: 337 control_set_role_common(cfg, nvout, role, res, 338 res->hr_name, ii++); 339 break; 340 case HASTCTL_STATUS: 341 control_status(cfg, nvout, res, res->hr_name, 342 ii++); 343 break; 344 default: 345 pjdlog_error("Invalid command received (%hhu).", 346 cmd); 347 error = EHAST_UNIMPLEMENTED; 348 goto fail; 349 } 350 } 351 } else { 352 /* Only selected resources. */ 353 354 for (ii = 0; ; ii++) { 355 str = nv_get_string(nvin, "resource%u", ii); 356 if (str == NULL) 357 break; 358 switch (cmd) { 359 case HASTCTL_SET_ROLE: 360 control_set_role_common(cfg, nvout, role, NULL, 361 str, ii); 362 break; 363 case HASTCTL_STATUS: 364 control_status(cfg, nvout, NULL, str, ii); 365 break; 366 default: 367 pjdlog_error("Invalid command received (%hhu).", 368 cmd); 369 error = EHAST_UNIMPLEMENTED; 370 goto fail; 371 } 372 } 373 } 374 if (nv_error(nvout) != 0) 375 goto close; 376fail: 377 if (error != 0) 378 nv_add_int16(nvout, error, "error"); 379 380 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 381 pjdlog_errno(LOG_ERR, "Unable to send control response"); 382close: 383 if (nvin != NULL) 384 nv_free(nvin); 385 if (nvout != NULL) 386 nv_free(nvout); 387 proto_close(conn); 388 cfg->hc_controlin = NULL; 389} 390 391/* 392 * Thread handles control requests from the parent. 393 */ 394void * 395ctrl_thread(void *arg) 396{ 397 struct hast_resource *res = arg; 398 struct nv *nvin, *nvout; 399 uint8_t cmd; 400 401 for (;;) { 402 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 403 if (sigexit_received) 404 pthread_exit(NULL); 405 pjdlog_errno(LOG_ERR, 406 "Unable to receive control message"); 407 kill(getpid(), SIGTERM); 408 pthread_exit(NULL); 409 } 410 cmd = nv_get_uint8(nvin, "cmd"); 411 if (cmd == 0) { 412 pjdlog_error("Control message is missing 'cmd' field."); 413 nv_free(nvin); 414 continue; 415 } 416 nvout = nv_alloc(); 417 switch (cmd) { 418 case HASTCTL_STATUS: 419 if (res->hr_remotein != NULL && 420 res->hr_remoteout != NULL) { 421 nv_add_string(nvout, "complete", "status"); 422 } else { 423 nv_add_string(nvout, "degraded", "status"); 424 } 425 nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 426 "extentsize"); 427 if (res->hr_role == HAST_ROLE_PRIMARY) { 428 nv_add_uint32(nvout, 429 (uint32_t)res->hr_keepdirty, "keepdirty"); 430 nv_add_uint64(nvout, 431 (uint64_t)(activemap_ndirty(res->hr_amp) * 432 res->hr_extentsize), "dirty"); 433 } else { 434 nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 435 nv_add_uint64(nvout, (uint64_t)0, "dirty"); 436 } 437 nv_add_int16(nvout, 0, "error"); 438 break; 439 case HASTCTL_RELOAD: 440 /* 441 * When parent receives SIGHUP and discovers that 442 * something related to us has changes, it sends reload 443 * message to us. 444 */ 445 assert(res->hr_role == HAST_ROLE_PRIMARY); 446 primary_config_reload(res, nvin); 447 nv_add_int16(nvout, 0, "error"); 448 break; 449 default: 450 nv_add_int16(nvout, EINVAL, "error"); 451 break; 452 } 453 nv_free(nvin); 454 if (nv_error(nvout) != 0) { 455 pjdlog_error("Unable to create answer on control message."); 456 nv_free(nvout); 457 continue; 458 } 459 if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 460 pjdlog_errno(LOG_ERR, 461 "Unable to send reply to control message"); 462 } 463 nv_free(nvout); 464 } 465 /* NOTREACHED */ 466 return (NULL); 467} 468