control.c revision 210882
1204076Spjd/*- 2204076Spjd * Copyright (c) 2009-2010 The FreeBSD Foundation 3204076Spjd * All rights reserved. 4204076Spjd * 5204076Spjd * This software was developed by Pawel Jakub Dawidek under sponsorship from 6204076Spjd * the FreeBSD Foundation. 7204076Spjd * 8204076Spjd * Redistribution and use in source and binary forms, with or without 9204076Spjd * modification, are permitted provided that the following conditions 10204076Spjd * are met: 11204076Spjd * 1. Redistributions of source code must retain the above copyright 12204076Spjd * notice, this list of conditions and the following disclaimer. 13204076Spjd * 2. Redistributions in binary form must reproduce the above copyright 14204076Spjd * notice, this list of conditions and the following disclaimer in the 15204076Spjd * documentation and/or other materials provided with the distribution. 16204076Spjd * 17204076Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18204076Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19204076Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20204076Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21204076Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22204076Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23204076Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24204076Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25204076Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26204076Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27204076Spjd * SUCH DAMAGE. 28204076Spjd */ 29204076Spjd 30204076Spjd#include <sys/cdefs.h> 31204076Spjd__FBSDID("$FreeBSD: head/sbin/hastd/control.c 210882 2010-08-05 19:04:29Z pjd $"); 32204076Spjd 33204076Spjd#include <sys/types.h> 34204076Spjd#include <sys/wait.h> 35204076Spjd#include <signal.h> 36204076Spjd 37204076Spjd#include <assert.h> 38204076Spjd#include <errno.h> 39204076Spjd#include <pthread.h> 40204076Spjd#include <stdio.h> 41204076Spjd#include <string.h> 42204076Spjd 43204076Spjd#include "hast.h" 44204076Spjd#include "hastd.h" 45204076Spjd#include "hast_proto.h" 46204076Spjd#include "nv.h" 47204076Spjd#include "pjdlog.h" 48204076Spjd#include "proto.h" 49204076Spjd#include "subr.h" 50204076Spjd 51204076Spjd#include "control.h" 52204076Spjd 53204076Spjdstatic void 54210882Spjdcontrol_set_role_common(struct hastd_config *cfg, struct nv *nvout, 55210882Spjd uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 56204076Spjd{ 57204076Spjd 58204076Spjd /* Name is always needed. */ 59210882Spjd if (name != NULL) 60210882Spjd nv_add_string(nvout, name, "resource%u", no); 61204076Spjd 62204076Spjd if (res == NULL) { 63210882Spjd assert(cfg != NULL); 64210882Spjd assert(name != NULL); 65210882Spjd 66204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 67204076Spjd if (strcmp(res->hr_name, name) == 0) 68204076Spjd break; 69204076Spjd } 70204076Spjd if (res == NULL) { 71204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 72204076Spjd return; 73204076Spjd } 74204076Spjd } 75204076Spjd assert(res != NULL); 76204076Spjd 77204076Spjd /* Send previous role back. */ 78204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 79204076Spjd 80204076Spjd /* Nothing changed, return here. */ 81204076Spjd if (role == res->hr_role) 82204076Spjd return; 83204076Spjd 84204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 85204076Spjd pjdlog_info("Role changed to %s.", role2str(role)); 86204076Spjd 87204076Spjd /* Change role to the new one. */ 88204076Spjd res->hr_role = role; 89204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 90204076Spjd 91204076Spjd /* 92204076Spjd * If previous role was primary or secondary we have to kill process 93204076Spjd * doing that work. 94204076Spjd */ 95204076Spjd if (res->hr_workerpid != 0) { 96204076Spjd if (kill(res->hr_workerpid, SIGTERM) < 0) { 97204076Spjd pjdlog_errno(LOG_WARNING, 98204076Spjd "Unable to kill worker process %u", 99204076Spjd (unsigned int)res->hr_workerpid); 100204076Spjd } else if (waitpid(res->hr_workerpid, NULL, 0) != 101204076Spjd res->hr_workerpid) { 102204076Spjd pjdlog_errno(LOG_WARNING, 103204076Spjd "Error while waiting for worker process %u", 104204076Spjd (unsigned int)res->hr_workerpid); 105204076Spjd } else { 106204076Spjd pjdlog_debug(1, "Worker process %u stopped.", 107204076Spjd (unsigned int)res->hr_workerpid); 108204076Spjd } 109204076Spjd res->hr_workerpid = 0; 110204076Spjd } 111204076Spjd 112204076Spjd /* Start worker process if we are changing to primary. */ 113204076Spjd if (role == HAST_ROLE_PRIMARY) 114204076Spjd hastd_primary(res); 115204076Spjd pjdlog_prefix_set("%s", ""); 116204076Spjd} 117204076Spjd 118210882Spjdvoid 119210882Spjdcontrol_set_role(struct hast_resource *res, uint8_t role) 120210882Spjd{ 121210882Spjd 122210882Spjd control_set_role_common(NULL, NULL, role, res, NULL, 0); 123210882Spjd} 124210882Spjd 125204076Spjdstatic void 126204076Spjdcontrol_status_worker(struct hast_resource *res, struct nv *nvout, 127204076Spjd unsigned int no) 128204076Spjd{ 129204076Spjd struct nv *cnvin, *cnvout; 130204076Spjd const char *str; 131204076Spjd int error; 132204076Spjd 133204076Spjd cnvin = cnvout = NULL; 134204076Spjd error = 0; 135204076Spjd 136204076Spjd /* 137204076Spjd * Prepare and send command to worker process. 138204076Spjd */ 139204076Spjd cnvout = nv_alloc(); 140204076Spjd nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 141204076Spjd error = nv_error(cnvout); 142204076Spjd if (error != 0) { 143204076Spjd /* LOG */ 144204076Spjd goto end; 145204076Spjd } 146204076Spjd if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 147204076Spjd error = errno; 148204076Spjd /* LOG */ 149204076Spjd goto end; 150204076Spjd } 151204076Spjd 152204076Spjd /* 153204076Spjd * Receive response. 154204076Spjd */ 155204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 156204076Spjd error = errno; 157204076Spjd /* LOG */ 158204076Spjd goto end; 159204076Spjd } 160204076Spjd 161204076Spjd error = nv_get_int64(cnvin, "error"); 162204076Spjd if (error != 0) 163204076Spjd goto end; 164204076Spjd 165204076Spjd if ((str = nv_get_string(cnvin, "status")) == NULL) { 166204076Spjd error = ENOENT; 167204076Spjd /* LOG */ 168204076Spjd goto end; 169204076Spjd } 170204076Spjd nv_add_string(nvout, str, "status%u", no); 171204076Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 172204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 173204076Spjd "extentsize%u", no); 174204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 175204076Spjd "keepdirty%u", no); 176204076Spjdend: 177204076Spjd if (cnvin != NULL) 178204076Spjd nv_free(cnvin); 179204076Spjd if (cnvout != NULL) 180204076Spjd nv_free(cnvout); 181204076Spjd if (error != 0) 182204076Spjd nv_add_int16(nvout, error, "error"); 183204076Spjd} 184204076Spjd 185204076Spjdstatic void 186204076Spjdcontrol_status(struct hastd_config *cfg, struct nv *nvout, 187204076Spjd struct hast_resource *res, const char *name, unsigned int no) 188204076Spjd{ 189204076Spjd 190204076Spjd assert(cfg != NULL); 191204076Spjd assert(nvout != NULL); 192204076Spjd assert(name != NULL); 193204076Spjd 194204076Spjd /* Name is always needed. */ 195204076Spjd nv_add_string(nvout, name, "resource%u", no); 196204076Spjd 197204076Spjd if (res == NULL) { 198204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 199204076Spjd if (strcmp(res->hr_name, name) == 0) 200204076Spjd break; 201204076Spjd } 202204076Spjd if (res == NULL) { 203204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 204204076Spjd return; 205204076Spjd } 206204076Spjd } 207204076Spjd assert(res != NULL); 208204076Spjd nv_add_string(nvout, res->hr_provname, "provname%u", no); 209204076Spjd nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 210204076Spjd nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 211204076Spjd switch (res->hr_replication) { 212204076Spjd case HAST_REPLICATION_FULLSYNC: 213204076Spjd nv_add_string(nvout, "fullsync", "replication%u", no); 214204076Spjd break; 215204076Spjd case HAST_REPLICATION_MEMSYNC: 216204076Spjd nv_add_string(nvout, "memsync", "replication%u", no); 217204076Spjd break; 218204076Spjd case HAST_REPLICATION_ASYNC: 219204076Spjd nv_add_string(nvout, "async", "replication%u", no); 220204076Spjd break; 221204076Spjd default: 222204076Spjd nv_add_string(nvout, "unknown", "replication%u", no); 223204076Spjd break; 224204076Spjd } 225204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 226204076Spjd 227204076Spjd switch (res->hr_role) { 228204076Spjd case HAST_ROLE_PRIMARY: 229204076Spjd assert(res->hr_workerpid != 0); 230204076Spjd /* FALLTHROUGH */ 231204076Spjd case HAST_ROLE_SECONDARY: 232204076Spjd if (res->hr_workerpid != 0) 233204076Spjd break; 234204076Spjd /* FALLTHROUGH */ 235204076Spjd default: 236204076Spjd return; 237204076Spjd } 238204076Spjd 239204076Spjd /* 240204076Spjd * If we are here, it means that we have a worker process, which we 241204076Spjd * want to ask some questions. 242204076Spjd */ 243204076Spjd control_status_worker(res, nvout, no); 244204076Spjd} 245204076Spjd 246204076Spjdvoid 247204076Spjdcontrol_handle(struct hastd_config *cfg) 248204076Spjd{ 249204076Spjd struct proto_conn *conn; 250204076Spjd struct nv *nvin, *nvout; 251204076Spjd unsigned int ii; 252204076Spjd const char *str; 253204076Spjd uint8_t cmd, role; 254204076Spjd int error; 255204076Spjd 256204076Spjd if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 257204076Spjd pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 258204076Spjd return; 259204076Spjd } 260204076Spjd 261204076Spjd nvin = nvout = NULL; 262204076Spjd role = HAST_ROLE_UNDEF; 263204076Spjd 264204076Spjd if (hast_proto_recv_hdr(conn, &nvin) < 0) { 265204076Spjd pjdlog_errno(LOG_ERR, "Unable to receive control header"); 266204076Spjd nvin = NULL; 267204076Spjd goto close; 268204076Spjd } 269204076Spjd 270204076Spjd /* Obtain command code. 0 means that nv_get_uint8() failed. */ 271204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 272204076Spjd if (cmd == 0) { 273204076Spjd pjdlog_error("Control header is missing 'cmd' field."); 274204076Spjd error = EHAST_INVALID; 275204076Spjd goto close; 276204076Spjd } 277204076Spjd 278204076Spjd /* Allocate outgoing nv structure. */ 279204076Spjd nvout = nv_alloc(); 280204076Spjd if (nvout == NULL) { 281204076Spjd pjdlog_error("Unable to allocate header for control response."); 282204076Spjd error = EHAST_NOMEMORY; 283204076Spjd goto close; 284204076Spjd } 285204076Spjd 286204076Spjd error = 0; 287204076Spjd 288204076Spjd str = nv_get_string(nvin, "resource0"); 289204076Spjd if (str == NULL) { 290204076Spjd pjdlog_error("Control header is missing 'resource0' field."); 291204076Spjd error = EHAST_INVALID; 292204076Spjd goto fail; 293204076Spjd } 294204076Spjd if (cmd == HASTCTL_SET_ROLE) { 295204076Spjd role = nv_get_uint8(nvin, "role"); 296204076Spjd switch (role) { 297204076Spjd case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 298204076Spjd case HAST_ROLE_PRIMARY: 299204076Spjd case HAST_ROLE_SECONDARY: 300204076Spjd break; 301204076Spjd default: 302204076Spjd pjdlog_error("Invalid role received (%hhu).", role); 303204076Spjd error = EHAST_INVALID; 304204076Spjd goto fail; 305204076Spjd } 306204076Spjd } 307204076Spjd if (strcmp(str, "all") == 0) { 308204076Spjd struct hast_resource *res; 309204076Spjd 310204076Spjd /* All configured resources. */ 311204076Spjd 312204076Spjd ii = 0; 313204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 314204076Spjd switch (cmd) { 315204076Spjd case HASTCTL_SET_ROLE: 316210882Spjd control_set_role_common(cfg, nvout, role, res, 317204076Spjd res->hr_name, ii++); 318204076Spjd break; 319204076Spjd case HASTCTL_STATUS: 320204076Spjd control_status(cfg, nvout, res, res->hr_name, 321204076Spjd ii++); 322204076Spjd break; 323204076Spjd default: 324204076Spjd pjdlog_error("Invalid command received (%hhu).", 325204076Spjd cmd); 326204076Spjd error = EHAST_UNIMPLEMENTED; 327204076Spjd goto fail; 328204076Spjd } 329204076Spjd } 330204076Spjd } else { 331204076Spjd /* Only selected resources. */ 332204076Spjd 333204076Spjd for (ii = 0; ; ii++) { 334204076Spjd str = nv_get_string(nvin, "resource%u", ii); 335204076Spjd if (str == NULL) 336204076Spjd break; 337204076Spjd switch (cmd) { 338204076Spjd case HASTCTL_SET_ROLE: 339210882Spjd control_set_role_common(cfg, nvout, role, NULL, 340210882Spjd str, ii); 341204076Spjd break; 342204076Spjd case HASTCTL_STATUS: 343204076Spjd control_status(cfg, nvout, NULL, str, ii); 344204076Spjd break; 345204076Spjd default: 346204076Spjd pjdlog_error("Invalid command received (%hhu).", 347204076Spjd cmd); 348204076Spjd error = EHAST_UNIMPLEMENTED; 349204076Spjd goto fail; 350204076Spjd } 351204076Spjd } 352204076Spjd } 353204076Spjd if (nv_error(nvout) != 0) 354204076Spjd goto close; 355204076Spjdfail: 356204076Spjd if (error != 0) 357204076Spjd nv_add_int16(nvout, error, "error"); 358204076Spjd 359204076Spjd if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 360204076Spjd pjdlog_errno(LOG_ERR, "Unable to send control response"); 361204076Spjdclose: 362204076Spjd if (nvin != NULL) 363204076Spjd nv_free(nvin); 364204076Spjd if (nvout != NULL) 365204076Spjd nv_free(nvout); 366204076Spjd proto_close(conn); 367204076Spjd} 368204076Spjd 369204076Spjd/* 370204076Spjd * Thread handles control requests from the parent. 371204076Spjd */ 372204076Spjdvoid * 373204076Spjdctrl_thread(void *arg) 374204076Spjd{ 375204076Spjd struct hast_resource *res = arg; 376204076Spjd struct nv *nvin, *nvout; 377204076Spjd uint8_t cmd; 378204076Spjd 379204076Spjd for (;;) { 380204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 381204076Spjd if (sigexit_received) 382204076Spjd pthread_exit(NULL); 383204076Spjd pjdlog_errno(LOG_ERR, 384204076Spjd "Unable to receive control message"); 385204076Spjd continue; 386204076Spjd } 387204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 388204076Spjd if (cmd == 0) { 389204076Spjd pjdlog_error("Control message is missing 'cmd' field."); 390204076Spjd nv_free(nvin); 391204076Spjd continue; 392204076Spjd } 393204076Spjd nv_free(nvin); 394204076Spjd nvout = nv_alloc(); 395204076Spjd switch (cmd) { 396204076Spjd case HASTCTL_STATUS: 397204076Spjd if (res->hr_remotein != NULL && 398204076Spjd res->hr_remoteout != NULL) { 399204076Spjd nv_add_string(nvout, "complete", "status"); 400204076Spjd } else { 401204076Spjd nv_add_string(nvout, "degraded", "status"); 402204076Spjd } 403204076Spjd nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 404204076Spjd "extentsize"); 405204076Spjd if (res->hr_role == HAST_ROLE_PRIMARY) { 406204076Spjd nv_add_uint32(nvout, 407204076Spjd (uint32_t)res->hr_keepdirty, "keepdirty"); 408204076Spjd nv_add_uint64(nvout, 409204076Spjd (uint64_t)(activemap_ndirty(res->hr_amp) * 410204076Spjd res->hr_extentsize), "dirty"); 411204076Spjd } else { 412204076Spjd nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 413204076Spjd nv_add_uint64(nvout, (uint64_t)0, "dirty"); 414204076Spjd } 415204076Spjd break; 416204076Spjd default: 417204076Spjd nv_add_int16(nvout, EINVAL, "error"); 418204076Spjd break; 419204076Spjd } 420204076Spjd if (nv_error(nvout) != 0) { 421204076Spjd pjdlog_error("Unable to create answer on control message."); 422204076Spjd nv_free(nvout); 423204076Spjd continue; 424204076Spjd } 425204076Spjd if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 426204076Spjd pjdlog_errno(LOG_ERR, 427204076Spjd "Unable to send reply to control message"); 428204076Spjd } 429204076Spjd nv_free(nvout); 430204076Spjd } 431204076Spjd /* NOTREACHED */ 432204076Spjd return (NULL); 433204076Spjd} 434