control.c revision 213579
1204076Spjd/*- 2204076Spjd * Copyright (c) 2009-2010 The FreeBSD Foundation 3204076Spjd * All rights reserved. 4204076Spjd * 5204076Spjd * This software was developed by Pawel Jakub Dawidek under sponsorship from 6204076Spjd * the FreeBSD Foundation. 7204076Spjd * 8204076Spjd * Redistribution and use in source and binary forms, with or without 9204076Spjd * modification, are permitted provided that the following conditions 10204076Spjd * are met: 11204076Spjd * 1. Redistributions of source code must retain the above copyright 12204076Spjd * notice, this list of conditions and the following disclaimer. 13204076Spjd * 2. Redistributions in binary form must reproduce the above copyright 14204076Spjd * notice, this list of conditions and the following disclaimer in the 15204076Spjd * documentation and/or other materials provided with the distribution. 16204076Spjd * 17204076Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18204076Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19204076Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20204076Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21204076Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22204076Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23204076Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24204076Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25204076Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26204076Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27204076Spjd * SUCH DAMAGE. 28204076Spjd */ 29204076Spjd 30204076Spjd#include <sys/cdefs.h> 31204076Spjd__FBSDID("$FreeBSD: head/sbin/hastd/control.c 213579 2010-10-08 15:02:15Z pjd $"); 32204076Spjd 33204076Spjd#include <sys/types.h> 34204076Spjd#include <sys/wait.h> 35204076Spjd 36204076Spjd#include <assert.h> 37204076Spjd#include <errno.h> 38204076Spjd#include <pthread.h> 39213003Spjd#include <signal.h> 40204076Spjd#include <stdio.h> 41204076Spjd#include <string.h> 42213006Spjd#include <unistd.h> 43204076Spjd 44204076Spjd#include "hast.h" 45204076Spjd#include "hastd.h" 46204076Spjd#include "hast_proto.h" 47211978Spjd#include "hooks.h" 48204076Spjd#include "nv.h" 49204076Spjd#include "pjdlog.h" 50204076Spjd#include "proto.h" 51204076Spjd#include "subr.h" 52204076Spjd 53204076Spjd#include "control.h" 54204076Spjd 55213006Spjdvoid 56213006Spjdchild_cleanup(struct hast_resource *res) 57213006Spjd{ 58213006Spjd 59213006Spjd proto_close(res->hr_ctrl); 60213006Spjd res->hr_ctrl = NULL; 61213579Spjd if (res->hr_event != NULL) { 62213579Spjd proto_close(res->hr_event); 63213579Spjd res->hr_event = NULL; 64213579Spjd } 65213006Spjd res->hr_workerpid = 0; 66213006Spjd} 67213006Spjd 68204076Spjdstatic void 69210882Spjdcontrol_set_role_common(struct hastd_config *cfg, struct nv *nvout, 70210882Spjd uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 71204076Spjd{ 72211978Spjd int oldrole; 73204076Spjd 74204076Spjd /* Name is always needed. */ 75210882Spjd if (name != NULL) 76210882Spjd nv_add_string(nvout, name, "resource%u", no); 77204076Spjd 78204076Spjd if (res == NULL) { 79210882Spjd assert(cfg != NULL); 80210882Spjd assert(name != NULL); 81210882Spjd 82204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 83204076Spjd if (strcmp(res->hr_name, name) == 0) 84204076Spjd break; 85204076Spjd } 86204076Spjd if (res == NULL) { 87204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 88204076Spjd return; 89204076Spjd } 90204076Spjd } 91204076Spjd assert(res != NULL); 92204076Spjd 93204076Spjd /* Send previous role back. */ 94204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 95204076Spjd 96204076Spjd /* Nothing changed, return here. */ 97204076Spjd if (role == res->hr_role) 98204076Spjd return; 99204076Spjd 100204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 101204076Spjd pjdlog_info("Role changed to %s.", role2str(role)); 102204076Spjd 103204076Spjd /* Change role to the new one. */ 104211978Spjd oldrole = res->hr_role; 105204076Spjd res->hr_role = role; 106204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 107204076Spjd 108204076Spjd /* 109204076Spjd * If previous role was primary or secondary we have to kill process 110204076Spjd * doing that work. 111204076Spjd */ 112204076Spjd if (res->hr_workerpid != 0) { 113204076Spjd if (kill(res->hr_workerpid, SIGTERM) < 0) { 114204076Spjd pjdlog_errno(LOG_WARNING, 115204076Spjd "Unable to kill worker process %u", 116204076Spjd (unsigned int)res->hr_workerpid); 117204076Spjd } else if (waitpid(res->hr_workerpid, NULL, 0) != 118204076Spjd res->hr_workerpid) { 119204076Spjd pjdlog_errno(LOG_WARNING, 120204076Spjd "Error while waiting for worker process %u", 121204076Spjd (unsigned int)res->hr_workerpid); 122204076Spjd } else { 123204076Spjd pjdlog_debug(1, "Worker process %u stopped.", 124204076Spjd (unsigned int)res->hr_workerpid); 125204076Spjd } 126213006Spjd child_cleanup(res); 127204076Spjd } 128204076Spjd 129204076Spjd /* Start worker process if we are changing to primary. */ 130204076Spjd if (role == HAST_ROLE_PRIMARY) 131204076Spjd hastd_primary(res); 132204076Spjd pjdlog_prefix_set("%s", ""); 133211978Spjd hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 134211978Spjd role2str(res->hr_role), NULL); 135204076Spjd} 136204076Spjd 137210882Spjdvoid 138210882Spjdcontrol_set_role(struct hast_resource *res, uint8_t role) 139210882Spjd{ 140210882Spjd 141210882Spjd control_set_role_common(NULL, NULL, role, res, NULL, 0); 142210882Spjd} 143210882Spjd 144204076Spjdstatic void 145204076Spjdcontrol_status_worker(struct hast_resource *res, struct nv *nvout, 146204076Spjd unsigned int no) 147204076Spjd{ 148204076Spjd struct nv *cnvin, *cnvout; 149204076Spjd const char *str; 150204076Spjd int error; 151204076Spjd 152204076Spjd cnvin = cnvout = NULL; 153204076Spjd error = 0; 154204076Spjd 155204076Spjd /* 156204076Spjd * Prepare and send command to worker process. 157204076Spjd */ 158204076Spjd cnvout = nv_alloc(); 159204076Spjd nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 160204076Spjd error = nv_error(cnvout); 161204076Spjd if (error != 0) { 162204076Spjd /* LOG */ 163204076Spjd goto end; 164204076Spjd } 165204076Spjd if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 166204076Spjd error = errno; 167204076Spjd /* LOG */ 168204076Spjd goto end; 169204076Spjd } 170204076Spjd 171204076Spjd /* 172204076Spjd * Receive response. 173204076Spjd */ 174204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 175204076Spjd error = errno; 176204076Spjd /* LOG */ 177204076Spjd goto end; 178204076Spjd } 179204076Spjd 180204076Spjd error = nv_get_int64(cnvin, "error"); 181204076Spjd if (error != 0) 182204076Spjd goto end; 183204076Spjd 184204076Spjd if ((str = nv_get_string(cnvin, "status")) == NULL) { 185204076Spjd error = ENOENT; 186204076Spjd /* LOG */ 187204076Spjd goto end; 188204076Spjd } 189204076Spjd nv_add_string(nvout, str, "status%u", no); 190204076Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 191204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 192204076Spjd "extentsize%u", no); 193204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 194204076Spjd "keepdirty%u", no); 195204076Spjdend: 196204076Spjd if (cnvin != NULL) 197204076Spjd nv_free(cnvin); 198204076Spjd if (cnvout != NULL) 199204076Spjd nv_free(cnvout); 200204076Spjd if (error != 0) 201204076Spjd nv_add_int16(nvout, error, "error"); 202204076Spjd} 203204076Spjd 204204076Spjdstatic void 205204076Spjdcontrol_status(struct hastd_config *cfg, struct nv *nvout, 206204076Spjd struct hast_resource *res, const char *name, unsigned int no) 207204076Spjd{ 208204076Spjd 209204076Spjd assert(cfg != NULL); 210204076Spjd assert(nvout != NULL); 211204076Spjd assert(name != NULL); 212204076Spjd 213204076Spjd /* Name is always needed. */ 214204076Spjd nv_add_string(nvout, name, "resource%u", no); 215204076Spjd 216204076Spjd if (res == NULL) { 217204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 218204076Spjd if (strcmp(res->hr_name, name) == 0) 219204076Spjd break; 220204076Spjd } 221204076Spjd if (res == NULL) { 222204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 223204076Spjd return; 224204076Spjd } 225204076Spjd } 226204076Spjd assert(res != NULL); 227204076Spjd nv_add_string(nvout, res->hr_provname, "provname%u", no); 228204076Spjd nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 229204076Spjd nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 230204076Spjd switch (res->hr_replication) { 231204076Spjd case HAST_REPLICATION_FULLSYNC: 232204076Spjd nv_add_string(nvout, "fullsync", "replication%u", no); 233204076Spjd break; 234204076Spjd case HAST_REPLICATION_MEMSYNC: 235204076Spjd nv_add_string(nvout, "memsync", "replication%u", no); 236204076Spjd break; 237204076Spjd case HAST_REPLICATION_ASYNC: 238204076Spjd nv_add_string(nvout, "async", "replication%u", no); 239204076Spjd break; 240204076Spjd default: 241204076Spjd nv_add_string(nvout, "unknown", "replication%u", no); 242204076Spjd break; 243204076Spjd } 244204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 245204076Spjd 246204076Spjd switch (res->hr_role) { 247204076Spjd case HAST_ROLE_PRIMARY: 248204076Spjd assert(res->hr_workerpid != 0); 249204076Spjd /* FALLTHROUGH */ 250204076Spjd case HAST_ROLE_SECONDARY: 251204076Spjd if (res->hr_workerpid != 0) 252204076Spjd break; 253204076Spjd /* FALLTHROUGH */ 254204076Spjd default: 255204076Spjd return; 256204076Spjd } 257204076Spjd 258204076Spjd /* 259204076Spjd * If we are here, it means that we have a worker process, which we 260204076Spjd * want to ask some questions. 261204076Spjd */ 262204076Spjd control_status_worker(res, nvout, no); 263204076Spjd} 264204076Spjd 265204076Spjdvoid 266204076Spjdcontrol_handle(struct hastd_config *cfg) 267204076Spjd{ 268204076Spjd struct proto_conn *conn; 269204076Spjd struct nv *nvin, *nvout; 270204076Spjd unsigned int ii; 271204076Spjd const char *str; 272204076Spjd uint8_t cmd, role; 273204076Spjd int error; 274204076Spjd 275204076Spjd if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 276204076Spjd pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 277204076Spjd return; 278204076Spjd } 279204076Spjd 280204076Spjd nvin = nvout = NULL; 281204076Spjd role = HAST_ROLE_UNDEF; 282204076Spjd 283204076Spjd if (hast_proto_recv_hdr(conn, &nvin) < 0) { 284204076Spjd pjdlog_errno(LOG_ERR, "Unable to receive control header"); 285204076Spjd nvin = NULL; 286204076Spjd goto close; 287204076Spjd } 288204076Spjd 289204076Spjd /* Obtain command code. 0 means that nv_get_uint8() failed. */ 290204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 291204076Spjd if (cmd == 0) { 292204076Spjd pjdlog_error("Control header is missing 'cmd' field."); 293204076Spjd error = EHAST_INVALID; 294204076Spjd goto close; 295204076Spjd } 296204076Spjd 297204076Spjd /* Allocate outgoing nv structure. */ 298204076Spjd nvout = nv_alloc(); 299204076Spjd if (nvout == NULL) { 300204076Spjd pjdlog_error("Unable to allocate header for control response."); 301204076Spjd error = EHAST_NOMEMORY; 302204076Spjd goto close; 303204076Spjd } 304204076Spjd 305204076Spjd error = 0; 306204076Spjd 307204076Spjd str = nv_get_string(nvin, "resource0"); 308204076Spjd if (str == NULL) { 309204076Spjd pjdlog_error("Control header is missing 'resource0' field."); 310204076Spjd error = EHAST_INVALID; 311204076Spjd goto fail; 312204076Spjd } 313204076Spjd if (cmd == HASTCTL_SET_ROLE) { 314204076Spjd role = nv_get_uint8(nvin, "role"); 315204076Spjd switch (role) { 316204076Spjd case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 317204076Spjd case HAST_ROLE_PRIMARY: 318204076Spjd case HAST_ROLE_SECONDARY: 319204076Spjd break; 320204076Spjd default: 321204076Spjd pjdlog_error("Invalid role received (%hhu).", role); 322204076Spjd error = EHAST_INVALID; 323204076Spjd goto fail; 324204076Spjd } 325204076Spjd } 326204076Spjd if (strcmp(str, "all") == 0) { 327204076Spjd struct hast_resource *res; 328204076Spjd 329204076Spjd /* All configured resources. */ 330204076Spjd 331204076Spjd ii = 0; 332204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 333204076Spjd switch (cmd) { 334204076Spjd case HASTCTL_SET_ROLE: 335210882Spjd control_set_role_common(cfg, nvout, role, res, 336204076Spjd res->hr_name, ii++); 337204076Spjd break; 338204076Spjd case HASTCTL_STATUS: 339204076Spjd control_status(cfg, nvout, res, res->hr_name, 340204076Spjd ii++); 341204076Spjd break; 342204076Spjd default: 343204076Spjd pjdlog_error("Invalid command received (%hhu).", 344204076Spjd cmd); 345204076Spjd error = EHAST_UNIMPLEMENTED; 346204076Spjd goto fail; 347204076Spjd } 348204076Spjd } 349204076Spjd } else { 350204076Spjd /* Only selected resources. */ 351204076Spjd 352204076Spjd for (ii = 0; ; ii++) { 353204076Spjd str = nv_get_string(nvin, "resource%u", ii); 354204076Spjd if (str == NULL) 355204076Spjd break; 356204076Spjd switch (cmd) { 357204076Spjd case HASTCTL_SET_ROLE: 358210882Spjd control_set_role_common(cfg, nvout, role, NULL, 359210882Spjd str, ii); 360204076Spjd break; 361204076Spjd case HASTCTL_STATUS: 362204076Spjd control_status(cfg, nvout, NULL, str, ii); 363204076Spjd break; 364204076Spjd default: 365204076Spjd pjdlog_error("Invalid command received (%hhu).", 366204076Spjd cmd); 367204076Spjd error = EHAST_UNIMPLEMENTED; 368204076Spjd goto fail; 369204076Spjd } 370204076Spjd } 371204076Spjd } 372204076Spjd if (nv_error(nvout) != 0) 373204076Spjd goto close; 374204076Spjdfail: 375204076Spjd if (error != 0) 376204076Spjd nv_add_int16(nvout, error, "error"); 377204076Spjd 378204076Spjd if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 379204076Spjd pjdlog_errno(LOG_ERR, "Unable to send control response"); 380204076Spjdclose: 381204076Spjd if (nvin != NULL) 382204076Spjd nv_free(nvin); 383204076Spjd if (nvout != NULL) 384204076Spjd nv_free(nvout); 385204076Spjd proto_close(conn); 386204076Spjd} 387204076Spjd 388204076Spjd/* 389204076Spjd * Thread handles control requests from the parent. 390204076Spjd */ 391204076Spjdvoid * 392204076Spjdctrl_thread(void *arg) 393204076Spjd{ 394204076Spjd struct hast_resource *res = arg; 395204076Spjd struct nv *nvin, *nvout; 396204076Spjd uint8_t cmd; 397204076Spjd 398204076Spjd for (;;) { 399204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 400204076Spjd if (sigexit_received) 401204076Spjd pthread_exit(NULL); 402204076Spjd pjdlog_errno(LOG_ERR, 403204076Spjd "Unable to receive control message"); 404213004Spjd kill(getpid(), SIGTERM); 405213004Spjd pthread_exit(NULL); 406204076Spjd } 407204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 408204076Spjd if (cmd == 0) { 409204076Spjd pjdlog_error("Control message is missing 'cmd' field."); 410204076Spjd nv_free(nvin); 411204076Spjd continue; 412204076Spjd } 413204076Spjd nv_free(nvin); 414204076Spjd nvout = nv_alloc(); 415204076Spjd switch (cmd) { 416204076Spjd case HASTCTL_STATUS: 417204076Spjd if (res->hr_remotein != NULL && 418204076Spjd res->hr_remoteout != NULL) { 419204076Spjd nv_add_string(nvout, "complete", "status"); 420204076Spjd } else { 421204076Spjd nv_add_string(nvout, "degraded", "status"); 422204076Spjd } 423204076Spjd nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 424204076Spjd "extentsize"); 425204076Spjd if (res->hr_role == HAST_ROLE_PRIMARY) { 426204076Spjd nv_add_uint32(nvout, 427204076Spjd (uint32_t)res->hr_keepdirty, "keepdirty"); 428204076Spjd nv_add_uint64(nvout, 429204076Spjd (uint64_t)(activemap_ndirty(res->hr_amp) * 430204076Spjd res->hr_extentsize), "dirty"); 431204076Spjd } else { 432204076Spjd nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 433204076Spjd nv_add_uint64(nvout, (uint64_t)0, "dirty"); 434204076Spjd } 435204076Spjd break; 436204076Spjd default: 437204076Spjd nv_add_int16(nvout, EINVAL, "error"); 438204076Spjd break; 439204076Spjd } 440204076Spjd if (nv_error(nvout) != 0) { 441204076Spjd pjdlog_error("Unable to create answer on control message."); 442204076Spjd nv_free(nvout); 443204076Spjd continue; 444204076Spjd } 445204076Spjd if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 446204076Spjd pjdlog_errno(LOG_ERR, 447204076Spjd "Unable to send reply to control message"); 448204076Spjd } 449204076Spjd nv_free(nvout); 450204076Spjd } 451204076Spjd /* NOTREACHED */ 452204076Spjd return (NULL); 453204076Spjd} 454