control.c revision 217784
1204076Spjd/*- 2204076Spjd * Copyright (c) 2009-2010 The FreeBSD Foundation 3204076Spjd * All rights reserved. 4204076Spjd * 5204076Spjd * This software was developed by Pawel Jakub Dawidek under sponsorship from 6204076Spjd * the FreeBSD Foundation. 7204076Spjd * 8204076Spjd * Redistribution and use in source and binary forms, with or without 9204076Spjd * modification, are permitted provided that the following conditions 10204076Spjd * are met: 11204076Spjd * 1. Redistributions of source code must retain the above copyright 12204076Spjd * notice, this list of conditions and the following disclaimer. 13204076Spjd * 2. Redistributions in binary form must reproduce the above copyright 14204076Spjd * notice, this list of conditions and the following disclaimer in the 15204076Spjd * documentation and/or other materials provided with the distribution. 16204076Spjd * 17204076Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18204076Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19204076Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20204076Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21204076Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22204076Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23204076Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24204076Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25204076Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26204076Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27204076Spjd * SUCH DAMAGE. 28204076Spjd */ 29204076Spjd 30204076Spjd#include <sys/cdefs.h> 31204076Spjd__FBSDID("$FreeBSD: head/sbin/hastd/control.c 217784 2011-01-24 15:04:15Z pjd $"); 32204076Spjd 33204076Spjd#include <sys/types.h> 34204076Spjd#include <sys/wait.h> 35204076Spjd 36204076Spjd#include <assert.h> 37204076Spjd#include <errno.h> 38204076Spjd#include <pthread.h> 39213003Spjd#include <signal.h> 40204076Spjd#include <stdio.h> 41204076Spjd#include <string.h> 42213006Spjd#include <unistd.h> 43204076Spjd 44204076Spjd#include "hast.h" 45204076Spjd#include "hastd.h" 46204076Spjd#include "hast_proto.h" 47211978Spjd#include "hooks.h" 48204076Spjd#include "nv.h" 49204076Spjd#include "pjdlog.h" 50204076Spjd#include "proto.h" 51204076Spjd#include "subr.h" 52204076Spjd 53204076Spjd#include "control.h" 54204076Spjd 55213006Spjdvoid 56213006Spjdchild_cleanup(struct hast_resource *res) 57213006Spjd{ 58213006Spjd 59213006Spjd proto_close(res->hr_ctrl); 60213006Spjd res->hr_ctrl = NULL; 61213579Spjd if (res->hr_event != NULL) { 62213579Spjd proto_close(res->hr_event); 63213579Spjd res->hr_event = NULL; 64213579Spjd } 65213006Spjd res->hr_workerpid = 0; 66213006Spjd} 67213006Spjd 68204076Spjdstatic void 69210882Spjdcontrol_set_role_common(struct hastd_config *cfg, struct nv *nvout, 70210882Spjd uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 71204076Spjd{ 72211978Spjd int oldrole; 73204076Spjd 74204076Spjd /* Name is always needed. */ 75210882Spjd if (name != NULL) 76210882Spjd nv_add_string(nvout, name, "resource%u", no); 77204076Spjd 78204076Spjd if (res == NULL) { 79210882Spjd assert(cfg != NULL); 80210882Spjd assert(name != NULL); 81210882Spjd 82204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 83204076Spjd if (strcmp(res->hr_name, name) == 0) 84204076Spjd break; 85204076Spjd } 86204076Spjd if (res == NULL) { 87204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 88204076Spjd return; 89204076Spjd } 90204076Spjd } 91204076Spjd assert(res != NULL); 92204076Spjd 93204076Spjd /* Send previous role back. */ 94204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 95204076Spjd 96204076Spjd /* Nothing changed, return here. */ 97204076Spjd if (role == res->hr_role) 98204076Spjd return; 99204076Spjd 100204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 101204076Spjd pjdlog_info("Role changed to %s.", role2str(role)); 102204076Spjd 103204076Spjd /* Change role to the new one. */ 104211978Spjd oldrole = res->hr_role; 105204076Spjd res->hr_role = role; 106204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 107204076Spjd 108204076Spjd /* 109204076Spjd * If previous role was primary or secondary we have to kill process 110204076Spjd * doing that work. 111204076Spjd */ 112204076Spjd if (res->hr_workerpid != 0) { 113204076Spjd if (kill(res->hr_workerpid, SIGTERM) < 0) { 114204076Spjd pjdlog_errno(LOG_WARNING, 115204076Spjd "Unable to kill worker process %u", 116204076Spjd (unsigned int)res->hr_workerpid); 117204076Spjd } else if (waitpid(res->hr_workerpid, NULL, 0) != 118204076Spjd res->hr_workerpid) { 119204076Spjd pjdlog_errno(LOG_WARNING, 120204076Spjd "Error while waiting for worker process %u", 121204076Spjd (unsigned int)res->hr_workerpid); 122204076Spjd } else { 123204076Spjd pjdlog_debug(1, "Worker process %u stopped.", 124204076Spjd (unsigned int)res->hr_workerpid); 125204076Spjd } 126213006Spjd child_cleanup(res); 127204076Spjd } 128204076Spjd 129204076Spjd /* Start worker process if we are changing to primary. */ 130204076Spjd if (role == HAST_ROLE_PRIMARY) 131204076Spjd hastd_primary(res); 132204076Spjd pjdlog_prefix_set("%s", ""); 133211978Spjd hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 134211978Spjd role2str(res->hr_role), NULL); 135204076Spjd} 136204076Spjd 137210882Spjdvoid 138210882Spjdcontrol_set_role(struct hast_resource *res, uint8_t role) 139210882Spjd{ 140210882Spjd 141210882Spjd control_set_role_common(NULL, NULL, role, res, NULL, 0); 142210882Spjd} 143210882Spjd 144204076Spjdstatic void 145204076Spjdcontrol_status_worker(struct hast_resource *res, struct nv *nvout, 146204076Spjd unsigned int no) 147204076Spjd{ 148204076Spjd struct nv *cnvin, *cnvout; 149204076Spjd const char *str; 150204076Spjd int error; 151204076Spjd 152204076Spjd cnvin = cnvout = NULL; 153204076Spjd error = 0; 154204076Spjd 155204076Spjd /* 156204076Spjd * Prepare and send command to worker process. 157204076Spjd */ 158204076Spjd cnvout = nv_alloc(); 159204076Spjd nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 160204076Spjd error = nv_error(cnvout); 161204076Spjd if (error != 0) { 162217737Spjd pjdlog_common(LOG_ERR, 0, error, 163217737Spjd "Unable to prepare control header"); 164204076Spjd goto end; 165204076Spjd } 166204076Spjd if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 167204076Spjd error = errno; 168217737Spjd pjdlog_errno(LOG_ERR, "Unable to send control header"); 169204076Spjd goto end; 170204076Spjd } 171204076Spjd 172204076Spjd /* 173204076Spjd * Receive response. 174204076Spjd */ 175204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 176204076Spjd error = errno; 177217737Spjd pjdlog_errno(LOG_ERR, "Unable to receive control header"); 178204076Spjd goto end; 179204076Spjd } 180204076Spjd 181217730Spjd error = nv_get_int16(cnvin, "error"); 182204076Spjd if (error != 0) 183204076Spjd goto end; 184204076Spjd 185204076Spjd if ((str = nv_get_string(cnvin, "status")) == NULL) { 186204076Spjd error = ENOENT; 187217737Spjd pjdlog_errno(LOG_ERR, "Field 'status' is missing."); 188204076Spjd goto end; 189204076Spjd } 190204076Spjd nv_add_string(nvout, str, "status%u", no); 191204076Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 192204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 193204076Spjd "extentsize%u", no); 194204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 195204076Spjd "keepdirty%u", no); 196204076Spjdend: 197204076Spjd if (cnvin != NULL) 198204076Spjd nv_free(cnvin); 199204076Spjd if (cnvout != NULL) 200204076Spjd nv_free(cnvout); 201204076Spjd if (error != 0) 202204076Spjd nv_add_int16(nvout, error, "error"); 203204076Spjd} 204204076Spjd 205204076Spjdstatic void 206204076Spjdcontrol_status(struct hastd_config *cfg, struct nv *nvout, 207204076Spjd struct hast_resource *res, const char *name, unsigned int no) 208204076Spjd{ 209204076Spjd 210204076Spjd assert(cfg != NULL); 211204076Spjd assert(nvout != NULL); 212204076Spjd assert(name != NULL); 213204076Spjd 214204076Spjd /* Name is always needed. */ 215204076Spjd nv_add_string(nvout, name, "resource%u", no); 216204076Spjd 217204076Spjd if (res == NULL) { 218204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 219204076Spjd if (strcmp(res->hr_name, name) == 0) 220204076Spjd break; 221204076Spjd } 222204076Spjd if (res == NULL) { 223204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 224204076Spjd return; 225204076Spjd } 226204076Spjd } 227204076Spjd assert(res != NULL); 228204076Spjd nv_add_string(nvout, res->hr_provname, "provname%u", no); 229204076Spjd nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 230204076Spjd nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 231204076Spjd switch (res->hr_replication) { 232204076Spjd case HAST_REPLICATION_FULLSYNC: 233204076Spjd nv_add_string(nvout, "fullsync", "replication%u", no); 234204076Spjd break; 235204076Spjd case HAST_REPLICATION_MEMSYNC: 236204076Spjd nv_add_string(nvout, "memsync", "replication%u", no); 237204076Spjd break; 238204076Spjd case HAST_REPLICATION_ASYNC: 239204076Spjd nv_add_string(nvout, "async", "replication%u", no); 240204076Spjd break; 241204076Spjd default: 242204076Spjd nv_add_string(nvout, "unknown", "replication%u", no); 243204076Spjd break; 244204076Spjd } 245204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 246204076Spjd 247204076Spjd switch (res->hr_role) { 248204076Spjd case HAST_ROLE_PRIMARY: 249204076Spjd assert(res->hr_workerpid != 0); 250204076Spjd /* FALLTHROUGH */ 251204076Spjd case HAST_ROLE_SECONDARY: 252204076Spjd if (res->hr_workerpid != 0) 253204076Spjd break; 254204076Spjd /* FALLTHROUGH */ 255204076Spjd default: 256204076Spjd return; 257204076Spjd } 258204076Spjd 259204076Spjd /* 260204076Spjd * If we are here, it means that we have a worker process, which we 261204076Spjd * want to ask some questions. 262204076Spjd */ 263204076Spjd control_status_worker(res, nvout, no); 264204076Spjd} 265204076Spjd 266204076Spjdvoid 267204076Spjdcontrol_handle(struct hastd_config *cfg) 268204076Spjd{ 269204076Spjd struct proto_conn *conn; 270204076Spjd struct nv *nvin, *nvout; 271204076Spjd unsigned int ii; 272204076Spjd const char *str; 273204076Spjd uint8_t cmd, role; 274204076Spjd int error; 275204076Spjd 276204076Spjd if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 277204076Spjd pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 278204076Spjd return; 279204076Spjd } 280204076Spjd 281204076Spjd nvin = nvout = NULL; 282204076Spjd role = HAST_ROLE_UNDEF; 283204076Spjd 284204076Spjd if (hast_proto_recv_hdr(conn, &nvin) < 0) { 285204076Spjd pjdlog_errno(LOG_ERR, "Unable to receive control header"); 286204076Spjd nvin = NULL; 287204076Spjd goto close; 288204076Spjd } 289204076Spjd 290204076Spjd /* Obtain command code. 0 means that nv_get_uint8() failed. */ 291204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 292204076Spjd if (cmd == 0) { 293204076Spjd pjdlog_error("Control header is missing 'cmd' field."); 294204076Spjd error = EHAST_INVALID; 295204076Spjd goto close; 296204076Spjd } 297204076Spjd 298204076Spjd /* Allocate outgoing nv structure. */ 299204076Spjd nvout = nv_alloc(); 300204076Spjd if (nvout == NULL) { 301204076Spjd pjdlog_error("Unable to allocate header for control response."); 302204076Spjd error = EHAST_NOMEMORY; 303204076Spjd goto close; 304204076Spjd } 305204076Spjd 306204076Spjd error = 0; 307204076Spjd 308204076Spjd str = nv_get_string(nvin, "resource0"); 309204076Spjd if (str == NULL) { 310204076Spjd pjdlog_error("Control header is missing 'resource0' field."); 311204076Spjd error = EHAST_INVALID; 312204076Spjd goto fail; 313204076Spjd } 314204076Spjd if (cmd == HASTCTL_SET_ROLE) { 315204076Spjd role = nv_get_uint8(nvin, "role"); 316204076Spjd switch (role) { 317204076Spjd case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 318204076Spjd case HAST_ROLE_PRIMARY: 319204076Spjd case HAST_ROLE_SECONDARY: 320204076Spjd break; 321204076Spjd default: 322204076Spjd pjdlog_error("Invalid role received (%hhu).", role); 323204076Spjd error = EHAST_INVALID; 324204076Spjd goto fail; 325204076Spjd } 326204076Spjd } 327204076Spjd if (strcmp(str, "all") == 0) { 328204076Spjd struct hast_resource *res; 329204076Spjd 330204076Spjd /* All configured resources. */ 331204076Spjd 332204076Spjd ii = 0; 333204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 334204076Spjd switch (cmd) { 335204076Spjd case HASTCTL_SET_ROLE: 336210882Spjd control_set_role_common(cfg, nvout, role, res, 337204076Spjd res->hr_name, ii++); 338204076Spjd break; 339204076Spjd case HASTCTL_STATUS: 340204076Spjd control_status(cfg, nvout, res, res->hr_name, 341204076Spjd ii++); 342204076Spjd break; 343204076Spjd default: 344204076Spjd pjdlog_error("Invalid command received (%hhu).", 345204076Spjd cmd); 346204076Spjd error = EHAST_UNIMPLEMENTED; 347204076Spjd goto fail; 348204076Spjd } 349204076Spjd } 350204076Spjd } else { 351204076Spjd /* Only selected resources. */ 352204076Spjd 353204076Spjd for (ii = 0; ; ii++) { 354204076Spjd str = nv_get_string(nvin, "resource%u", ii); 355204076Spjd if (str == NULL) 356204076Spjd break; 357204076Spjd switch (cmd) { 358204076Spjd case HASTCTL_SET_ROLE: 359210882Spjd control_set_role_common(cfg, nvout, role, NULL, 360210882Spjd str, ii); 361204076Spjd break; 362204076Spjd case HASTCTL_STATUS: 363204076Spjd control_status(cfg, nvout, NULL, str, ii); 364204076Spjd break; 365204076Spjd default: 366204076Spjd pjdlog_error("Invalid command received (%hhu).", 367204076Spjd cmd); 368204076Spjd error = EHAST_UNIMPLEMENTED; 369204076Spjd goto fail; 370204076Spjd } 371204076Spjd } 372204076Spjd } 373204076Spjd if (nv_error(nvout) != 0) 374204076Spjd goto close; 375204076Spjdfail: 376204076Spjd if (error != 0) 377204076Spjd nv_add_int16(nvout, error, "error"); 378204076Spjd 379204076Spjd if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 380204076Spjd pjdlog_errno(LOG_ERR, "Unable to send control response"); 381204076Spjdclose: 382204076Spjd if (nvin != NULL) 383204076Spjd nv_free(nvin); 384204076Spjd if (nvout != NULL) 385204076Spjd nv_free(nvout); 386204076Spjd proto_close(conn); 387204076Spjd} 388204076Spjd 389204076Spjd/* 390204076Spjd * Thread handles control requests from the parent. 391204076Spjd */ 392204076Spjdvoid * 393204076Spjdctrl_thread(void *arg) 394204076Spjd{ 395204076Spjd struct hast_resource *res = arg; 396204076Spjd struct nv *nvin, *nvout; 397204076Spjd uint8_t cmd; 398204076Spjd 399204076Spjd for (;;) { 400204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 401204076Spjd if (sigexit_received) 402204076Spjd pthread_exit(NULL); 403204076Spjd pjdlog_errno(LOG_ERR, 404204076Spjd "Unable to receive control message"); 405213004Spjd kill(getpid(), SIGTERM); 406213004Spjd pthread_exit(NULL); 407204076Spjd } 408204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 409204076Spjd if (cmd == 0) { 410204076Spjd pjdlog_error("Control message is missing 'cmd' field."); 411204076Spjd nv_free(nvin); 412204076Spjd continue; 413204076Spjd } 414204076Spjd nvout = nv_alloc(); 415204076Spjd switch (cmd) { 416204076Spjd case HASTCTL_STATUS: 417204076Spjd if (res->hr_remotein != NULL && 418204076Spjd res->hr_remoteout != NULL) { 419204076Spjd nv_add_string(nvout, "complete", "status"); 420204076Spjd } else { 421204076Spjd nv_add_string(nvout, "degraded", "status"); 422204076Spjd } 423204076Spjd nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 424204076Spjd "extentsize"); 425204076Spjd if (res->hr_role == HAST_ROLE_PRIMARY) { 426204076Spjd nv_add_uint32(nvout, 427204076Spjd (uint32_t)res->hr_keepdirty, "keepdirty"); 428204076Spjd nv_add_uint64(nvout, 429204076Spjd (uint64_t)(activemap_ndirty(res->hr_amp) * 430204076Spjd res->hr_extentsize), "dirty"); 431204076Spjd } else { 432204076Spjd nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 433204076Spjd nv_add_uint64(nvout, (uint64_t)0, "dirty"); 434204076Spjd } 435217784Spjd nv_add_int16(nvout, 0, "error"); 436204076Spjd break; 437217784Spjd case HASTCTL_RELOAD: 438217784Spjd /* 439217784Spjd * When parent receives SIGHUP and discovers that 440217784Spjd * something related to us has changes, it sends reload 441217784Spjd * message to us. 442217784Spjd */ 443217784Spjd assert(res->hr_role == HAST_ROLE_PRIMARY); 444217784Spjd primary_config_reload(res, nvin); 445217784Spjd nv_add_int16(nvout, 0, "error"); 446217784Spjd break; 447204076Spjd default: 448204076Spjd nv_add_int16(nvout, EINVAL, "error"); 449204076Spjd break; 450204076Spjd } 451217784Spjd nv_free(nvin); 452204076Spjd if (nv_error(nvout) != 0) { 453204076Spjd pjdlog_error("Unable to create answer on control message."); 454204076Spjd nv_free(nvout); 455204076Spjd continue; 456204076Spjd } 457204076Spjd if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 458204076Spjd pjdlog_errno(LOG_ERR, 459204076Spjd "Unable to send reply to control message"); 460204076Spjd } 461204076Spjd nv_free(nvout); 462204076Spjd } 463204076Spjd /* NOTREACHED */ 464204076Spjd return (NULL); 465204076Spjd} 466