control.c revision 213006
1/*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Pawel Jakub Dawidek under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sbin/hastd/control.c 213006 2010-09-22 18:57:06Z pjd $"); 32 33#include <sys/types.h> 34#include <sys/wait.h> 35 36#include <assert.h> 37#include <errno.h> 38#include <pthread.h> 39#include <signal.h> 40#include <stdio.h> 41#include <string.h> 42#include <unistd.h> 43 44#include "hast.h" 45#include "hastd.h" 46#include "hast_proto.h" 47#include "hooks.h" 48#include "nv.h" 49#include "pjdlog.h" 50#include "proto.h" 51#include "subr.h" 52 53#include "control.h" 54 55void 56child_cleanup(struct hast_resource *res) 57{ 58 59 proto_close(res->hr_ctrl); 60 res->hr_ctrl = NULL; 61 proto_close(res->hr_event); 62 res->hr_event = NULL; 63 res->hr_workerpid = 0; 64} 65 66static void 67control_set_role_common(struct hastd_config *cfg, struct nv *nvout, 68 uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 69{ 70 int oldrole; 71 72 /* Name is always needed. */ 73 if (name != NULL) 74 nv_add_string(nvout, name, "resource%u", no); 75 76 if (res == NULL) { 77 assert(cfg != NULL); 78 assert(name != NULL); 79 80 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 81 if (strcmp(res->hr_name, name) == 0) 82 break; 83 } 84 if (res == NULL) { 85 nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 86 return; 87 } 88 } 89 assert(res != NULL); 90 91 /* Send previous role back. */ 92 nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 93 94 /* Nothing changed, return here. */ 95 if (role == res->hr_role) 96 return; 97 98 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 99 pjdlog_info("Role changed to %s.", role2str(role)); 100 101 /* Change role to the new one. */ 102 oldrole = res->hr_role; 103 res->hr_role = role; 104 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 105 106 /* 107 * If previous role was primary or secondary we have to kill process 108 * doing that work. 109 */ 110 if (res->hr_workerpid != 0) { 111 if (kill(res->hr_workerpid, SIGTERM) < 0) { 112 pjdlog_errno(LOG_WARNING, 113 "Unable to kill worker process %u", 114 (unsigned int)res->hr_workerpid); 115 } else if (waitpid(res->hr_workerpid, NULL, 0) != 116 res->hr_workerpid) { 117 pjdlog_errno(LOG_WARNING, 118 "Error while waiting for worker process %u", 119 (unsigned int)res->hr_workerpid); 120 } else { 121 pjdlog_debug(1, "Worker process %u stopped.", 122 (unsigned int)res->hr_workerpid); 123 } 124 child_cleanup(res); 125 } 126 127 /* Start worker process if we are changing to primary. */ 128 if (role == HAST_ROLE_PRIMARY) 129 hastd_primary(res); 130 pjdlog_prefix_set("%s", ""); 131 hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 132 role2str(res->hr_role), NULL); 133} 134 135void 136control_set_role(struct hast_resource *res, uint8_t role) 137{ 138 139 control_set_role_common(NULL, NULL, role, res, NULL, 0); 140} 141 142static void 143control_status_worker(struct hast_resource *res, struct nv *nvout, 144 unsigned int no) 145{ 146 struct nv *cnvin, *cnvout; 147 const char *str; 148 int error; 149 150 cnvin = cnvout = NULL; 151 error = 0; 152 153 /* 154 * Prepare and send command to worker process. 155 */ 156 cnvout = nv_alloc(); 157 nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 158 error = nv_error(cnvout); 159 if (error != 0) { 160 /* LOG */ 161 goto end; 162 } 163 if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 164 error = errno; 165 /* LOG */ 166 goto end; 167 } 168 169 /* 170 * Receive response. 171 */ 172 if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 173 error = errno; 174 /* LOG */ 175 goto end; 176 } 177 178 error = nv_get_int64(cnvin, "error"); 179 if (error != 0) 180 goto end; 181 182 if ((str = nv_get_string(cnvin, "status")) == NULL) { 183 error = ENOENT; 184 /* LOG */ 185 goto end; 186 } 187 nv_add_string(nvout, str, "status%u", no); 188 nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 189 nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 190 "extentsize%u", no); 191 nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 192 "keepdirty%u", no); 193end: 194 if (cnvin != NULL) 195 nv_free(cnvin); 196 if (cnvout != NULL) 197 nv_free(cnvout); 198 if (error != 0) 199 nv_add_int16(nvout, error, "error"); 200} 201 202static void 203control_status(struct hastd_config *cfg, struct nv *nvout, 204 struct hast_resource *res, const char *name, unsigned int no) 205{ 206 207 assert(cfg != NULL); 208 assert(nvout != NULL); 209 assert(name != NULL); 210 211 /* Name is always needed. */ 212 nv_add_string(nvout, name, "resource%u", no); 213 214 if (res == NULL) { 215 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 216 if (strcmp(res->hr_name, name) == 0) 217 break; 218 } 219 if (res == NULL) { 220 nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 221 return; 222 } 223 } 224 assert(res != NULL); 225 nv_add_string(nvout, res->hr_provname, "provname%u", no); 226 nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 227 nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 228 switch (res->hr_replication) { 229 case HAST_REPLICATION_FULLSYNC: 230 nv_add_string(nvout, "fullsync", "replication%u", no); 231 break; 232 case HAST_REPLICATION_MEMSYNC: 233 nv_add_string(nvout, "memsync", "replication%u", no); 234 break; 235 case HAST_REPLICATION_ASYNC: 236 nv_add_string(nvout, "async", "replication%u", no); 237 break; 238 default: 239 nv_add_string(nvout, "unknown", "replication%u", no); 240 break; 241 } 242 nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 243 244 switch (res->hr_role) { 245 case HAST_ROLE_PRIMARY: 246 assert(res->hr_workerpid != 0); 247 /* FALLTHROUGH */ 248 case HAST_ROLE_SECONDARY: 249 if (res->hr_workerpid != 0) 250 break; 251 /* FALLTHROUGH */ 252 default: 253 return; 254 } 255 256 /* 257 * If we are here, it means that we have a worker process, which we 258 * want to ask some questions. 259 */ 260 control_status_worker(res, nvout, no); 261} 262 263void 264control_handle(struct hastd_config *cfg) 265{ 266 struct proto_conn *conn; 267 struct nv *nvin, *nvout; 268 unsigned int ii; 269 const char *str; 270 uint8_t cmd, role; 271 int error; 272 273 if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 274 pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 275 return; 276 } 277 278 nvin = nvout = NULL; 279 role = HAST_ROLE_UNDEF; 280 281 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 282 pjdlog_errno(LOG_ERR, "Unable to receive control header"); 283 nvin = NULL; 284 goto close; 285 } 286 287 /* Obtain command code. 0 means that nv_get_uint8() failed. */ 288 cmd = nv_get_uint8(nvin, "cmd"); 289 if (cmd == 0) { 290 pjdlog_error("Control header is missing 'cmd' field."); 291 error = EHAST_INVALID; 292 goto close; 293 } 294 295 /* Allocate outgoing nv structure. */ 296 nvout = nv_alloc(); 297 if (nvout == NULL) { 298 pjdlog_error("Unable to allocate header for control response."); 299 error = EHAST_NOMEMORY; 300 goto close; 301 } 302 303 error = 0; 304 305 str = nv_get_string(nvin, "resource0"); 306 if (str == NULL) { 307 pjdlog_error("Control header is missing 'resource0' field."); 308 error = EHAST_INVALID; 309 goto fail; 310 } 311 if (cmd == HASTCTL_SET_ROLE) { 312 role = nv_get_uint8(nvin, "role"); 313 switch (role) { 314 case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 315 case HAST_ROLE_PRIMARY: 316 case HAST_ROLE_SECONDARY: 317 break; 318 default: 319 pjdlog_error("Invalid role received (%hhu).", role); 320 error = EHAST_INVALID; 321 goto fail; 322 } 323 } 324 if (strcmp(str, "all") == 0) { 325 struct hast_resource *res; 326 327 /* All configured resources. */ 328 329 ii = 0; 330 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 331 switch (cmd) { 332 case HASTCTL_SET_ROLE: 333 control_set_role_common(cfg, nvout, role, res, 334 res->hr_name, ii++); 335 break; 336 case HASTCTL_STATUS: 337 control_status(cfg, nvout, res, res->hr_name, 338 ii++); 339 break; 340 default: 341 pjdlog_error("Invalid command received (%hhu).", 342 cmd); 343 error = EHAST_UNIMPLEMENTED; 344 goto fail; 345 } 346 } 347 } else { 348 /* Only selected resources. */ 349 350 for (ii = 0; ; ii++) { 351 str = nv_get_string(nvin, "resource%u", ii); 352 if (str == NULL) 353 break; 354 switch (cmd) { 355 case HASTCTL_SET_ROLE: 356 control_set_role_common(cfg, nvout, role, NULL, 357 str, ii); 358 break; 359 case HASTCTL_STATUS: 360 control_status(cfg, nvout, NULL, str, ii); 361 break; 362 default: 363 pjdlog_error("Invalid command received (%hhu).", 364 cmd); 365 error = EHAST_UNIMPLEMENTED; 366 goto fail; 367 } 368 } 369 } 370 if (nv_error(nvout) != 0) 371 goto close; 372fail: 373 if (error != 0) 374 nv_add_int16(nvout, error, "error"); 375 376 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 377 pjdlog_errno(LOG_ERR, "Unable to send control response"); 378close: 379 if (nvin != NULL) 380 nv_free(nvin); 381 if (nvout != NULL) 382 nv_free(nvout); 383 proto_close(conn); 384} 385 386/* 387 * Thread handles control requests from the parent. 388 */ 389void * 390ctrl_thread(void *arg) 391{ 392 struct hast_resource *res = arg; 393 struct nv *nvin, *nvout; 394 uint8_t cmd; 395 396 for (;;) { 397 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 398 if (sigexit_received) 399 pthread_exit(NULL); 400 pjdlog_errno(LOG_ERR, 401 "Unable to receive control message"); 402 kill(getpid(), SIGTERM); 403 pthread_exit(NULL); 404 } 405 cmd = nv_get_uint8(nvin, "cmd"); 406 if (cmd == 0) { 407 pjdlog_error("Control message is missing 'cmd' field."); 408 nv_free(nvin); 409 continue; 410 } 411 nv_free(nvin); 412 nvout = nv_alloc(); 413 switch (cmd) { 414 case HASTCTL_STATUS: 415 if (res->hr_remotein != NULL && 416 res->hr_remoteout != NULL) { 417 nv_add_string(nvout, "complete", "status"); 418 } else { 419 nv_add_string(nvout, "degraded", "status"); 420 } 421 nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 422 "extentsize"); 423 if (res->hr_role == HAST_ROLE_PRIMARY) { 424 nv_add_uint32(nvout, 425 (uint32_t)res->hr_keepdirty, "keepdirty"); 426 nv_add_uint64(nvout, 427 (uint64_t)(activemap_ndirty(res->hr_amp) * 428 res->hr_extentsize), "dirty"); 429 } else { 430 nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 431 nv_add_uint64(nvout, (uint64_t)0, "dirty"); 432 } 433 break; 434 default: 435 nv_add_int16(nvout, EINVAL, "error"); 436 break; 437 } 438 if (nv_error(nvout) != 0) { 439 pjdlog_error("Unable to create answer on control message."); 440 nv_free(nvout); 441 continue; 442 } 443 if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 444 pjdlog_errno(LOG_ERR, 445 "Unable to send reply to control message"); 446 } 447 nv_free(nvout); 448 } 449 /* NOTREACHED */ 450 return (NULL); 451} 452