control.c revision 217784
1/*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Pawel Jakub Dawidek under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sbin/hastd/control.c 217784 2011-01-24 15:04:15Z pjd $"); 32 33#include <sys/types.h> 34#include <sys/wait.h> 35 36#include <assert.h> 37#include <errno.h> 38#include <pthread.h> 39#include <signal.h> 40#include <stdio.h> 41#include <string.h> 42#include <unistd.h> 43 44#include "hast.h" 45#include "hastd.h" 46#include "hast_proto.h" 47#include "hooks.h" 48#include "nv.h" 49#include "pjdlog.h" 50#include "proto.h" 51#include "subr.h" 52 53#include "control.h" 54 55void 56child_cleanup(struct hast_resource *res) 57{ 58 59 proto_close(res->hr_ctrl); 60 res->hr_ctrl = NULL; 61 if (res->hr_event != NULL) { 62 proto_close(res->hr_event); 63 res->hr_event = NULL; 64 } 65 res->hr_workerpid = 0; 66} 67 68static void 69control_set_role_common(struct hastd_config *cfg, struct nv *nvout, 70 uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 71{ 72 int oldrole; 73 74 /* Name is always needed. */ 75 if (name != NULL) 76 nv_add_string(nvout, name, "resource%u", no); 77 78 if (res == NULL) { 79 assert(cfg != NULL); 80 assert(name != NULL); 81 82 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 83 if (strcmp(res->hr_name, name) == 0) 84 break; 85 } 86 if (res == NULL) { 87 nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 88 return; 89 } 90 } 91 assert(res != NULL); 92 93 /* Send previous role back. */ 94 nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 95 96 /* Nothing changed, return here. */ 97 if (role == res->hr_role) 98 return; 99 100 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 101 pjdlog_info("Role changed to %s.", role2str(role)); 102 103 /* Change role to the new one. */ 104 oldrole = res->hr_role; 105 res->hr_role = role; 106 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 107 108 /* 109 * If previous role was primary or secondary we have to kill process 110 * doing that work. 111 */ 112 if (res->hr_workerpid != 0) { 113 if (kill(res->hr_workerpid, SIGTERM) < 0) { 114 pjdlog_errno(LOG_WARNING, 115 "Unable to kill worker process %u", 116 (unsigned int)res->hr_workerpid); 117 } else if (waitpid(res->hr_workerpid, NULL, 0) != 118 res->hr_workerpid) { 119 pjdlog_errno(LOG_WARNING, 120 "Error while waiting for worker process %u", 121 (unsigned int)res->hr_workerpid); 122 } else { 123 pjdlog_debug(1, "Worker process %u stopped.", 124 (unsigned int)res->hr_workerpid); 125 } 126 child_cleanup(res); 127 } 128 129 /* Start worker process if we are changing to primary. */ 130 if (role == HAST_ROLE_PRIMARY) 131 hastd_primary(res); 132 pjdlog_prefix_set("%s", ""); 133 hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 134 role2str(res->hr_role), NULL); 135} 136 137void 138control_set_role(struct hast_resource *res, uint8_t role) 139{ 140 141 control_set_role_common(NULL, NULL, role, res, NULL, 0); 142} 143 144static void 145control_status_worker(struct hast_resource *res, struct nv *nvout, 146 unsigned int no) 147{ 148 struct nv *cnvin, *cnvout; 149 const char *str; 150 int error; 151 152 cnvin = cnvout = NULL; 153 error = 0; 154 155 /* 156 * Prepare and send command to worker process. 157 */ 158 cnvout = nv_alloc(); 159 nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 160 error = nv_error(cnvout); 161 if (error != 0) { 162 pjdlog_common(LOG_ERR, 0, error, 163 "Unable to prepare control header"); 164 goto end; 165 } 166 if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 167 error = errno; 168 pjdlog_errno(LOG_ERR, "Unable to send control header"); 169 goto end; 170 } 171 172 /* 173 * Receive response. 174 */ 175 if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 176 error = errno; 177 pjdlog_errno(LOG_ERR, "Unable to receive control header"); 178 goto end; 179 } 180 181 error = nv_get_int16(cnvin, "error"); 182 if (error != 0) 183 goto end; 184 185 if ((str = nv_get_string(cnvin, "status")) == NULL) { 186 error = ENOENT; 187 pjdlog_errno(LOG_ERR, "Field 'status' is missing."); 188 goto end; 189 } 190 nv_add_string(nvout, str, "status%u", no); 191 nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 192 nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 193 "extentsize%u", no); 194 nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 195 "keepdirty%u", no); 196end: 197 if (cnvin != NULL) 198 nv_free(cnvin); 199 if (cnvout != NULL) 200 nv_free(cnvout); 201 if (error != 0) 202 nv_add_int16(nvout, error, "error"); 203} 204 205static void 206control_status(struct hastd_config *cfg, struct nv *nvout, 207 struct hast_resource *res, const char *name, unsigned int no) 208{ 209 210 assert(cfg != NULL); 211 assert(nvout != NULL); 212 assert(name != NULL); 213 214 /* Name is always needed. */ 215 nv_add_string(nvout, name, "resource%u", no); 216 217 if (res == NULL) { 218 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 219 if (strcmp(res->hr_name, name) == 0) 220 break; 221 } 222 if (res == NULL) { 223 nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 224 return; 225 } 226 } 227 assert(res != NULL); 228 nv_add_string(nvout, res->hr_provname, "provname%u", no); 229 nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 230 nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 231 switch (res->hr_replication) { 232 case HAST_REPLICATION_FULLSYNC: 233 nv_add_string(nvout, "fullsync", "replication%u", no); 234 break; 235 case HAST_REPLICATION_MEMSYNC: 236 nv_add_string(nvout, "memsync", "replication%u", no); 237 break; 238 case HAST_REPLICATION_ASYNC: 239 nv_add_string(nvout, "async", "replication%u", no); 240 break; 241 default: 242 nv_add_string(nvout, "unknown", "replication%u", no); 243 break; 244 } 245 nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 246 247 switch (res->hr_role) { 248 case HAST_ROLE_PRIMARY: 249 assert(res->hr_workerpid != 0); 250 /* FALLTHROUGH */ 251 case HAST_ROLE_SECONDARY: 252 if (res->hr_workerpid != 0) 253 break; 254 /* FALLTHROUGH */ 255 default: 256 return; 257 } 258 259 /* 260 * If we are here, it means that we have a worker process, which we 261 * want to ask some questions. 262 */ 263 control_status_worker(res, nvout, no); 264} 265 266void 267control_handle(struct hastd_config *cfg) 268{ 269 struct proto_conn *conn; 270 struct nv *nvin, *nvout; 271 unsigned int ii; 272 const char *str; 273 uint8_t cmd, role; 274 int error; 275 276 if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 277 pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 278 return; 279 } 280 281 nvin = nvout = NULL; 282 role = HAST_ROLE_UNDEF; 283 284 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 285 pjdlog_errno(LOG_ERR, "Unable to receive control header"); 286 nvin = NULL; 287 goto close; 288 } 289 290 /* Obtain command code. 0 means that nv_get_uint8() failed. */ 291 cmd = nv_get_uint8(nvin, "cmd"); 292 if (cmd == 0) { 293 pjdlog_error("Control header is missing 'cmd' field."); 294 error = EHAST_INVALID; 295 goto close; 296 } 297 298 /* Allocate outgoing nv structure. */ 299 nvout = nv_alloc(); 300 if (nvout == NULL) { 301 pjdlog_error("Unable to allocate header for control response."); 302 error = EHAST_NOMEMORY; 303 goto close; 304 } 305 306 error = 0; 307 308 str = nv_get_string(nvin, "resource0"); 309 if (str == NULL) { 310 pjdlog_error("Control header is missing 'resource0' field."); 311 error = EHAST_INVALID; 312 goto fail; 313 } 314 if (cmd == HASTCTL_SET_ROLE) { 315 role = nv_get_uint8(nvin, "role"); 316 switch (role) { 317 case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 318 case HAST_ROLE_PRIMARY: 319 case HAST_ROLE_SECONDARY: 320 break; 321 default: 322 pjdlog_error("Invalid role received (%hhu).", role); 323 error = EHAST_INVALID; 324 goto fail; 325 } 326 } 327 if (strcmp(str, "all") == 0) { 328 struct hast_resource *res; 329 330 /* All configured resources. */ 331 332 ii = 0; 333 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 334 switch (cmd) { 335 case HASTCTL_SET_ROLE: 336 control_set_role_common(cfg, nvout, role, res, 337 res->hr_name, ii++); 338 break; 339 case HASTCTL_STATUS: 340 control_status(cfg, nvout, res, res->hr_name, 341 ii++); 342 break; 343 default: 344 pjdlog_error("Invalid command received (%hhu).", 345 cmd); 346 error = EHAST_UNIMPLEMENTED; 347 goto fail; 348 } 349 } 350 } else { 351 /* Only selected resources. */ 352 353 for (ii = 0; ; ii++) { 354 str = nv_get_string(nvin, "resource%u", ii); 355 if (str == NULL) 356 break; 357 switch (cmd) { 358 case HASTCTL_SET_ROLE: 359 control_set_role_common(cfg, nvout, role, NULL, 360 str, ii); 361 break; 362 case HASTCTL_STATUS: 363 control_status(cfg, nvout, NULL, str, ii); 364 break; 365 default: 366 pjdlog_error("Invalid command received (%hhu).", 367 cmd); 368 error = EHAST_UNIMPLEMENTED; 369 goto fail; 370 } 371 } 372 } 373 if (nv_error(nvout) != 0) 374 goto close; 375fail: 376 if (error != 0) 377 nv_add_int16(nvout, error, "error"); 378 379 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 380 pjdlog_errno(LOG_ERR, "Unable to send control response"); 381close: 382 if (nvin != NULL) 383 nv_free(nvin); 384 if (nvout != NULL) 385 nv_free(nvout); 386 proto_close(conn); 387} 388 389/* 390 * Thread handles control requests from the parent. 391 */ 392void * 393ctrl_thread(void *arg) 394{ 395 struct hast_resource *res = arg; 396 struct nv *nvin, *nvout; 397 uint8_t cmd; 398 399 for (;;) { 400 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 401 if (sigexit_received) 402 pthread_exit(NULL); 403 pjdlog_errno(LOG_ERR, 404 "Unable to receive control message"); 405 kill(getpid(), SIGTERM); 406 pthread_exit(NULL); 407 } 408 cmd = nv_get_uint8(nvin, "cmd"); 409 if (cmd == 0) { 410 pjdlog_error("Control message is missing 'cmd' field."); 411 nv_free(nvin); 412 continue; 413 } 414 nvout = nv_alloc(); 415 switch (cmd) { 416 case HASTCTL_STATUS: 417 if (res->hr_remotein != NULL && 418 res->hr_remoteout != NULL) { 419 nv_add_string(nvout, "complete", "status"); 420 } else { 421 nv_add_string(nvout, "degraded", "status"); 422 } 423 nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 424 "extentsize"); 425 if (res->hr_role == HAST_ROLE_PRIMARY) { 426 nv_add_uint32(nvout, 427 (uint32_t)res->hr_keepdirty, "keepdirty"); 428 nv_add_uint64(nvout, 429 (uint64_t)(activemap_ndirty(res->hr_amp) * 430 res->hr_extentsize), "dirty"); 431 } else { 432 nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 433 nv_add_uint64(nvout, (uint64_t)0, "dirty"); 434 } 435 nv_add_int16(nvout, 0, "error"); 436 break; 437 case HASTCTL_RELOAD: 438 /* 439 * When parent receives SIGHUP and discovers that 440 * something related to us has changes, it sends reload 441 * message to us. 442 */ 443 assert(res->hr_role == HAST_ROLE_PRIMARY); 444 primary_config_reload(res, nvin); 445 nv_add_int16(nvout, 0, "error"); 446 break; 447 default: 448 nv_add_int16(nvout, EINVAL, "error"); 449 break; 450 } 451 nv_free(nvin); 452 if (nv_error(nvout) != 0) { 453 pjdlog_error("Unable to create answer on control message."); 454 nv_free(nvout); 455 continue; 456 } 457 if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 458 pjdlog_errno(LOG_ERR, 459 "Unable to send reply to control message"); 460 } 461 nv_free(nvout); 462 } 463 /* NOTREACHED */ 464 return (NULL); 465} 466