control.c revision 229509
1204076Spjd/*- 2204076Spjd * Copyright (c) 2009-2010 The FreeBSD Foundation 3204076Spjd * All rights reserved. 4204076Spjd * 5204076Spjd * This software was developed by Pawel Jakub Dawidek under sponsorship from 6204076Spjd * the FreeBSD Foundation. 7204076Spjd * 8204076Spjd * Redistribution and use in source and binary forms, with or without 9204076Spjd * modification, are permitted provided that the following conditions 10204076Spjd * are met: 11204076Spjd * 1. Redistributions of source code must retain the above copyright 12204076Spjd * notice, this list of conditions and the following disclaimer. 13204076Spjd * 2. Redistributions in binary form must reproduce the above copyright 14204076Spjd * notice, this list of conditions and the following disclaimer in the 15204076Spjd * documentation and/or other materials provided with the distribution. 16204076Spjd * 17204076Spjd * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18204076Spjd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19204076Spjd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20204076Spjd * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21204076Spjd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22204076Spjd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23204076Spjd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24204076Spjd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25204076Spjd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26204076Spjd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27204076Spjd * SUCH DAMAGE. 28204076Spjd */ 29204076Spjd 30204076Spjd#include <sys/cdefs.h> 31204076Spjd__FBSDID("$FreeBSD: stable/9/sbin/hastd/control.c 229509 2012-01-04 17:22:10Z trociny $"); 32204076Spjd 33204076Spjd#include <sys/types.h> 34204076Spjd#include <sys/wait.h> 35204076Spjd 36204076Spjd#include <errno.h> 37204076Spjd#include <pthread.h> 38213003Spjd#include <signal.h> 39204076Spjd#include <stdio.h> 40204076Spjd#include <string.h> 41213006Spjd#include <unistd.h> 42204076Spjd 43204076Spjd#include "hast.h" 44204076Spjd#include "hastd.h" 45219351Spjd#include "hast_checksum.h" 46219354Spjd#include "hast_compression.h" 47204076Spjd#include "hast_proto.h" 48211978Spjd#include "hooks.h" 49204076Spjd#include "nv.h" 50204076Spjd#include "pjdlog.h" 51204076Spjd#include "proto.h" 52204076Spjd#include "subr.h" 53204076Spjd 54204076Spjd#include "control.h" 55204076Spjd 56213006Spjdvoid 57213006Spjdchild_cleanup(struct hast_resource *res) 58213006Spjd{ 59213006Spjd 60213006Spjd proto_close(res->hr_ctrl); 61213006Spjd res->hr_ctrl = NULL; 62213579Spjd if (res->hr_event != NULL) { 63213579Spjd proto_close(res->hr_event); 64213579Spjd res->hr_event = NULL; 65213579Spjd } 66218218Spjd if (res->hr_conn != NULL) { 67218218Spjd proto_close(res->hr_conn); 68218218Spjd res->hr_conn = NULL; 69218218Spjd } 70213006Spjd res->hr_workerpid = 0; 71213006Spjd} 72213006Spjd 73204076Spjdstatic void 74210882Spjdcontrol_set_role_common(struct hastd_config *cfg, struct nv *nvout, 75210882Spjd uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 76204076Spjd{ 77211978Spjd int oldrole; 78204076Spjd 79204076Spjd /* Name is always needed. */ 80210882Spjd if (name != NULL) 81210882Spjd nv_add_string(nvout, name, "resource%u", no); 82204076Spjd 83204076Spjd if (res == NULL) { 84229509Strociny PJDLOG_ASSERT(cfg != NULL); 85229509Strociny PJDLOG_ASSERT(name != NULL); 86210882Spjd 87204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 88204076Spjd if (strcmp(res->hr_name, name) == 0) 89204076Spjd break; 90204076Spjd } 91204076Spjd if (res == NULL) { 92204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 93204076Spjd return; 94204076Spjd } 95204076Spjd } 96229509Strociny PJDLOG_ASSERT(res != NULL); 97204076Spjd 98204076Spjd /* Send previous role back. */ 99204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 100204076Spjd 101204076Spjd /* Nothing changed, return here. */ 102204076Spjd if (role == res->hr_role) 103204076Spjd return; 104204076Spjd 105204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 106204076Spjd pjdlog_info("Role changed to %s.", role2str(role)); 107204076Spjd 108204076Spjd /* Change role to the new one. */ 109211978Spjd oldrole = res->hr_role; 110204076Spjd res->hr_role = role; 111204076Spjd pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 112204076Spjd 113204076Spjd /* 114204076Spjd * If previous role was primary or secondary we have to kill process 115204076Spjd * doing that work. 116204076Spjd */ 117204076Spjd if (res->hr_workerpid != 0) { 118204076Spjd if (kill(res->hr_workerpid, SIGTERM) < 0) { 119204076Spjd pjdlog_errno(LOG_WARNING, 120204076Spjd "Unable to kill worker process %u", 121204076Spjd (unsigned int)res->hr_workerpid); 122204076Spjd } else if (waitpid(res->hr_workerpid, NULL, 0) != 123204076Spjd res->hr_workerpid) { 124204076Spjd pjdlog_errno(LOG_WARNING, 125204076Spjd "Error while waiting for worker process %u", 126204076Spjd (unsigned int)res->hr_workerpid); 127204076Spjd } else { 128204076Spjd pjdlog_debug(1, "Worker process %u stopped.", 129204076Spjd (unsigned int)res->hr_workerpid); 130204076Spjd } 131213006Spjd child_cleanup(res); 132204076Spjd } 133204076Spjd 134204076Spjd /* Start worker process if we are changing to primary. */ 135204076Spjd if (role == HAST_ROLE_PRIMARY) 136204076Spjd hastd_primary(res); 137204076Spjd pjdlog_prefix_set("%s", ""); 138211978Spjd hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 139211978Spjd role2str(res->hr_role), NULL); 140204076Spjd} 141204076Spjd 142210882Spjdvoid 143210882Spjdcontrol_set_role(struct hast_resource *res, uint8_t role) 144210882Spjd{ 145210882Spjd 146210882Spjd control_set_role_common(NULL, NULL, role, res, NULL, 0); 147210882Spjd} 148210882Spjd 149204076Spjdstatic void 150204076Spjdcontrol_status_worker(struct hast_resource *res, struct nv *nvout, 151204076Spjd unsigned int no) 152204076Spjd{ 153204076Spjd struct nv *cnvin, *cnvout; 154204076Spjd const char *str; 155204076Spjd int error; 156204076Spjd 157223780Strociny cnvin = NULL; 158204076Spjd 159204076Spjd /* 160204076Spjd * Prepare and send command to worker process. 161204076Spjd */ 162204076Spjd cnvout = nv_alloc(); 163221076Strociny nv_add_uint8(cnvout, CONTROL_STATUS, "cmd"); 164204076Spjd error = nv_error(cnvout); 165204076Spjd if (error != 0) { 166217737Spjd pjdlog_common(LOG_ERR, 0, error, 167217737Spjd "Unable to prepare control header"); 168204076Spjd goto end; 169204076Spjd } 170204076Spjd if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 171204076Spjd error = errno; 172217737Spjd pjdlog_errno(LOG_ERR, "Unable to send control header"); 173204076Spjd goto end; 174204076Spjd } 175204076Spjd 176204076Spjd /* 177204076Spjd * Receive response. 178204076Spjd */ 179204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 180204076Spjd error = errno; 181217737Spjd pjdlog_errno(LOG_ERR, "Unable to receive control header"); 182204076Spjd goto end; 183204076Spjd } 184204076Spjd 185217730Spjd error = nv_get_int16(cnvin, "error"); 186204076Spjd if (error != 0) 187204076Spjd goto end; 188204076Spjd 189204076Spjd if ((str = nv_get_string(cnvin, "status")) == NULL) { 190204076Spjd error = ENOENT; 191217737Spjd pjdlog_errno(LOG_ERR, "Field 'status' is missing."); 192204076Spjd goto end; 193204076Spjd } 194204076Spjd nv_add_string(nvout, str, "status%u", no); 195204076Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 196204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 197204076Spjd "extentsize%u", no); 198204076Spjd nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 199204076Spjd "keepdirty%u", no); 200222228Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_read"), 201222228Spjd "stat_read%u", no); 202222228Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_write"), 203222228Spjd "stat_write%u", no); 204222228Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_delete"), 205222228Spjd "stat_delete%u", no); 206222228Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush"), 207222228Spjd "stat_flush%u", no); 208222228Spjd nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_activemap_update"), 209222228Spjd "stat_activemap_update%u", no); 210204076Spjdend: 211204076Spjd if (cnvin != NULL) 212204076Spjd nv_free(cnvin); 213204076Spjd if (cnvout != NULL) 214204076Spjd nv_free(cnvout); 215204076Spjd if (error != 0) 216204076Spjd nv_add_int16(nvout, error, "error"); 217204076Spjd} 218204076Spjd 219204076Spjdstatic void 220204076Spjdcontrol_status(struct hastd_config *cfg, struct nv *nvout, 221204076Spjd struct hast_resource *res, const char *name, unsigned int no) 222204076Spjd{ 223204076Spjd 224229509Strociny PJDLOG_ASSERT(cfg != NULL); 225229509Strociny PJDLOG_ASSERT(nvout != NULL); 226229509Strociny PJDLOG_ASSERT(name != NULL); 227204076Spjd 228204076Spjd /* Name is always needed. */ 229204076Spjd nv_add_string(nvout, name, "resource%u", no); 230204076Spjd 231204076Spjd if (res == NULL) { 232204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 233204076Spjd if (strcmp(res->hr_name, name) == 0) 234204076Spjd break; 235204076Spjd } 236204076Spjd if (res == NULL) { 237204076Spjd nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 238204076Spjd return; 239204076Spjd } 240204076Spjd } 241229509Strociny PJDLOG_ASSERT(res != NULL); 242204076Spjd nv_add_string(nvout, res->hr_provname, "provname%u", no); 243204076Spjd nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 244204076Spjd nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 245219818Spjd if (res->hr_sourceaddr[0] != '\0') 246219818Spjd nv_add_string(nvout, res->hr_sourceaddr, "sourceaddr%u", no); 247204076Spjd switch (res->hr_replication) { 248204076Spjd case HAST_REPLICATION_FULLSYNC: 249204076Spjd nv_add_string(nvout, "fullsync", "replication%u", no); 250204076Spjd break; 251204076Spjd case HAST_REPLICATION_MEMSYNC: 252204076Spjd nv_add_string(nvout, "memsync", "replication%u", no); 253204076Spjd break; 254204076Spjd case HAST_REPLICATION_ASYNC: 255204076Spjd nv_add_string(nvout, "async", "replication%u", no); 256204076Spjd break; 257204076Spjd default: 258204076Spjd nv_add_string(nvout, "unknown", "replication%u", no); 259204076Spjd break; 260204076Spjd } 261219351Spjd nv_add_string(nvout, checksum_name(res->hr_checksum), 262219351Spjd "checksum%u", no); 263219354Spjd nv_add_string(nvout, compression_name(res->hr_compression), 264219354Spjd "compression%u", no); 265204076Spjd nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 266204076Spjd 267204076Spjd switch (res->hr_role) { 268204076Spjd case HAST_ROLE_PRIMARY: 269229509Strociny PJDLOG_ASSERT(res->hr_workerpid != 0); 270204076Spjd /* FALLTHROUGH */ 271204076Spjd case HAST_ROLE_SECONDARY: 272204076Spjd if (res->hr_workerpid != 0) 273204076Spjd break; 274204076Spjd /* FALLTHROUGH */ 275204076Spjd default: 276204076Spjd return; 277204076Spjd } 278204076Spjd 279204076Spjd /* 280204076Spjd * If we are here, it means that we have a worker process, which we 281204076Spjd * want to ask some questions. 282204076Spjd */ 283204076Spjd control_status_worker(res, nvout, no); 284204076Spjd} 285204076Spjd 286204076Spjdvoid 287204076Spjdcontrol_handle(struct hastd_config *cfg) 288204076Spjd{ 289204076Spjd struct proto_conn *conn; 290204076Spjd struct nv *nvin, *nvout; 291204076Spjd unsigned int ii; 292204076Spjd const char *str; 293204076Spjd uint8_t cmd, role; 294204076Spjd int error; 295204076Spjd 296204076Spjd if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 297204076Spjd pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 298204076Spjd return; 299204076Spjd } 300204076Spjd 301217969Spjd cfg->hc_controlin = conn; 302204076Spjd nvin = nvout = NULL; 303204076Spjd role = HAST_ROLE_UNDEF; 304204076Spjd 305204076Spjd if (hast_proto_recv_hdr(conn, &nvin) < 0) { 306204076Spjd pjdlog_errno(LOG_ERR, "Unable to receive control header"); 307204076Spjd nvin = NULL; 308204076Spjd goto close; 309204076Spjd } 310204076Spjd 311204076Spjd /* Obtain command code. 0 means that nv_get_uint8() failed. */ 312204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 313204076Spjd if (cmd == 0) { 314204076Spjd pjdlog_error("Control header is missing 'cmd' field."); 315204076Spjd goto close; 316204076Spjd } 317204076Spjd 318204076Spjd /* Allocate outgoing nv structure. */ 319204076Spjd nvout = nv_alloc(); 320204076Spjd if (nvout == NULL) { 321204076Spjd pjdlog_error("Unable to allocate header for control response."); 322204076Spjd goto close; 323204076Spjd } 324204076Spjd 325204076Spjd error = 0; 326204076Spjd 327204076Spjd str = nv_get_string(nvin, "resource0"); 328204076Spjd if (str == NULL) { 329204076Spjd pjdlog_error("Control header is missing 'resource0' field."); 330204076Spjd error = EHAST_INVALID; 331204076Spjd goto fail; 332204076Spjd } 333221075Strociny if (cmd == HASTCTL_CMD_SETROLE) { 334204076Spjd role = nv_get_uint8(nvin, "role"); 335204076Spjd switch (role) { 336219833Spjd case HAST_ROLE_INIT: 337204076Spjd case HAST_ROLE_PRIMARY: 338204076Spjd case HAST_ROLE_SECONDARY: 339204076Spjd break; 340204076Spjd default: 341204076Spjd pjdlog_error("Invalid role received (%hhu).", role); 342204076Spjd error = EHAST_INVALID; 343204076Spjd goto fail; 344204076Spjd } 345204076Spjd } 346204076Spjd if (strcmp(str, "all") == 0) { 347204076Spjd struct hast_resource *res; 348204076Spjd 349204076Spjd /* All configured resources. */ 350204076Spjd 351204076Spjd ii = 0; 352204076Spjd TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 353204076Spjd switch (cmd) { 354221075Strociny case HASTCTL_CMD_SETROLE: 355210882Spjd control_set_role_common(cfg, nvout, role, res, 356204076Spjd res->hr_name, ii++); 357204076Spjd break; 358221075Strociny case HASTCTL_CMD_STATUS: 359204076Spjd control_status(cfg, nvout, res, res->hr_name, 360204076Spjd ii++); 361204076Spjd break; 362204076Spjd default: 363204076Spjd pjdlog_error("Invalid command received (%hhu).", 364204076Spjd cmd); 365204076Spjd error = EHAST_UNIMPLEMENTED; 366204076Spjd goto fail; 367204076Spjd } 368204076Spjd } 369204076Spjd } else { 370204076Spjd /* Only selected resources. */ 371204076Spjd 372204076Spjd for (ii = 0; ; ii++) { 373204076Spjd str = nv_get_string(nvin, "resource%u", ii); 374204076Spjd if (str == NULL) 375204076Spjd break; 376204076Spjd switch (cmd) { 377221075Strociny case HASTCTL_CMD_SETROLE: 378210882Spjd control_set_role_common(cfg, nvout, role, NULL, 379210882Spjd str, ii); 380204076Spjd break; 381221075Strociny case HASTCTL_CMD_STATUS: 382204076Spjd control_status(cfg, nvout, NULL, str, ii); 383204076Spjd break; 384204076Spjd default: 385204076Spjd pjdlog_error("Invalid command received (%hhu).", 386204076Spjd cmd); 387204076Spjd error = EHAST_UNIMPLEMENTED; 388204076Spjd goto fail; 389204076Spjd } 390204076Spjd } 391204076Spjd } 392204076Spjd if (nv_error(nvout) != 0) 393204076Spjd goto close; 394204076Spjdfail: 395204076Spjd if (error != 0) 396204076Spjd nv_add_int16(nvout, error, "error"); 397204076Spjd 398204076Spjd if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 399204076Spjd pjdlog_errno(LOG_ERR, "Unable to send control response"); 400204076Spjdclose: 401204076Spjd if (nvin != NULL) 402204076Spjd nv_free(nvin); 403204076Spjd if (nvout != NULL) 404204076Spjd nv_free(nvout); 405204076Spjd proto_close(conn); 406217969Spjd cfg->hc_controlin = NULL; 407204076Spjd} 408204076Spjd 409204076Spjd/* 410204076Spjd * Thread handles control requests from the parent. 411204076Spjd */ 412204076Spjdvoid * 413204076Spjdctrl_thread(void *arg) 414204076Spjd{ 415204076Spjd struct hast_resource *res = arg; 416204076Spjd struct nv *nvin, *nvout; 417204076Spjd uint8_t cmd; 418204076Spjd 419204076Spjd for (;;) { 420204076Spjd if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 421204076Spjd if (sigexit_received) 422204076Spjd pthread_exit(NULL); 423204076Spjd pjdlog_errno(LOG_ERR, 424204076Spjd "Unable to receive control message"); 425213004Spjd kill(getpid(), SIGTERM); 426213004Spjd pthread_exit(NULL); 427204076Spjd } 428204076Spjd cmd = nv_get_uint8(nvin, "cmd"); 429204076Spjd if (cmd == 0) { 430204076Spjd pjdlog_error("Control message is missing 'cmd' field."); 431204076Spjd nv_free(nvin); 432204076Spjd continue; 433204076Spjd } 434204076Spjd nvout = nv_alloc(); 435204076Spjd switch (cmd) { 436221076Strociny case CONTROL_STATUS: 437204076Spjd if (res->hr_remotein != NULL && 438204076Spjd res->hr_remoteout != NULL) { 439204076Spjd nv_add_string(nvout, "complete", "status"); 440204076Spjd } else { 441204076Spjd nv_add_string(nvout, "degraded", "status"); 442204076Spjd } 443204076Spjd nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 444204076Spjd "extentsize"); 445204076Spjd if (res->hr_role == HAST_ROLE_PRIMARY) { 446204076Spjd nv_add_uint32(nvout, 447204076Spjd (uint32_t)res->hr_keepdirty, "keepdirty"); 448204076Spjd nv_add_uint64(nvout, 449204076Spjd (uint64_t)(activemap_ndirty(res->hr_amp) * 450204076Spjd res->hr_extentsize), "dirty"); 451204076Spjd } else { 452204076Spjd nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 453204076Spjd nv_add_uint64(nvout, (uint64_t)0, "dirty"); 454204076Spjd } 455222228Spjd nv_add_uint64(nvout, res->hr_stat_read, "stat_read"); 456222228Spjd nv_add_uint64(nvout, res->hr_stat_write, "stat_write"); 457222228Spjd nv_add_uint64(nvout, res->hr_stat_delete, 458222228Spjd "stat_delete"); 459222228Spjd nv_add_uint64(nvout, res->hr_stat_flush, "stat_flush"); 460222228Spjd nv_add_uint64(nvout, res->hr_stat_activemap_update, 461222228Spjd "stat_activemap_update"); 462217784Spjd nv_add_int16(nvout, 0, "error"); 463204076Spjd break; 464221076Strociny case CONTROL_RELOAD: 465217784Spjd /* 466217784Spjd * When parent receives SIGHUP and discovers that 467217784Spjd * something related to us has changes, it sends reload 468217784Spjd * message to us. 469217784Spjd */ 470229509Strociny PJDLOG_ASSERT(res->hr_role == HAST_ROLE_PRIMARY); 471217784Spjd primary_config_reload(res, nvin); 472217784Spjd nv_add_int16(nvout, 0, "error"); 473217784Spjd break; 474204076Spjd default: 475204076Spjd nv_add_int16(nvout, EINVAL, "error"); 476204076Spjd break; 477204076Spjd } 478217784Spjd nv_free(nvin); 479204076Spjd if (nv_error(nvout) != 0) { 480204076Spjd pjdlog_error("Unable to create answer on control message."); 481204076Spjd nv_free(nvout); 482204076Spjd continue; 483204076Spjd } 484204076Spjd if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 485204076Spjd pjdlog_errno(LOG_ERR, 486204076Spjd "Unable to send reply to control message"); 487204076Spjd } 488204076Spjd nv_free(nvout); 489204076Spjd } 490204076Spjd /* NOTREACHED */ 491204076Spjd return (NULL); 492204076Spjd} 493