control.c revision 217730
1121992Sjhb/*- 2121992Sjhb * Copyright (c) 2009-2010 The FreeBSD Foundation 3121992Sjhb * All rights reserved. 4121992Sjhb * 5121992Sjhb * This software was developed by Pawel Jakub Dawidek under sponsorship from 6121992Sjhb * the FreeBSD Foundation. 7121992Sjhb * 8121992Sjhb * Redistribution and use in source and binary forms, with or without 9121992Sjhb * modification, are permitted provided that the following conditions 10121992Sjhb * are met: 11121992Sjhb * 1. Redistributions of source code must retain the above copyright 12121992Sjhb * notice, this list of conditions and the following disclaimer. 13121992Sjhb * 2. Redistributions in binary form must reproduce the above copyright 14121992Sjhb * notice, this list of conditions and the following disclaimer in the 15121992Sjhb * documentation and/or other materials provided with the distribution. 16121992Sjhb * 17121992Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18121992Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19121992Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20121992Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21121992Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22121992Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23121992Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24121992Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25121992Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26121992Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27121992Sjhb * SUCH DAMAGE. 28121992Sjhb */ 29121992Sjhb 30121992Sjhb#include <sys/cdefs.h> 31121992Sjhb__FBSDID("$FreeBSD: head/sbin/hastd/control.c 217730 2011-01-22 22:33:27Z pjd $"); 32121992Sjhb 33121992Sjhb#include <sys/types.h> 34278749Skib#include <sys/wait.h> 35121992Sjhb 36121992Sjhb#include <assert.h> 37121992Sjhb#include <errno.h> 38121992Sjhb#include <pthread.h> 39121992Sjhb#include <signal.h> 40214631Sjhb#include <stdio.h> 41121992Sjhb#include <string.h> 42261087Sjhb#include <unistd.h> 43278473Skib 44278749Skib#include "hast.h" 45121992Sjhb#include "hastd.h" 46193530Sjkim#include "hast_proto.h" 47193530Sjkim#include "hooks.h" 48193530Sjkim#include "nv.h" 49121992Sjhb#include "pjdlog.h" 50121992Sjhb#include "proto.h" 51121992Sjhb#include "subr.h" 52121992Sjhb 53233623Sjhb#include "control.h" 54121992Sjhb 55121992Sjhbvoid 56233623Sjhbchild_cleanup(struct hast_resource *res) 57121992Sjhb{ 58233623Sjhb 59121992Sjhb proto_close(res->hr_ctrl); 60129960Sjhb res->hr_ctrl = NULL; 61233623Sjhb if (res->hr_event != NULL) { 62121992Sjhb proto_close(res->hr_event); 63269512Sroyger res->hr_event = NULL; 64167814Sjkim } 65121992Sjhb res->hr_workerpid = 0; 66121992Sjhb} 67121992Sjhb 68227293Sedstatic void 69121992Sjhbcontrol_set_role_common(struct hastd_config *cfg, struct nv *nvout, 70167814Sjkim uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 71167814Sjkim{ 72121992Sjhb int oldrole; 73121992Sjhb 74167814Sjkim /* Name is always needed. */ 75167814Sjkim if (name != NULL) 76167814Sjkim nv_add_string(nvout, name, "resource%u", no); 77167814Sjkim 78167814Sjkim if (res == NULL) { 79167814Sjkim assert(cfg != NULL); 80167814Sjkim assert(name != NULL); 81121992Sjhb 82121992Sjhb TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 83167814Sjkim if (strcmp(res->hr_name, name) == 0) 84167814Sjkim break; 85121992Sjhb } 86121992Sjhb if (res == NULL) { 87121992Sjhb nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 88197439Sjhb return; 89121992Sjhb } 90121992Sjhb } 91121992Sjhb assert(res != NULL); 92121992Sjhb 93121992Sjhb /* Send previous role back. */ 94121992Sjhb nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 95121992Sjhb 96121992Sjhb /* Nothing changed, return here. */ 97121992Sjhb if (role == res->hr_role) 98121992Sjhb return; 99121992Sjhb 100121992Sjhb pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 101121992Sjhb pjdlog_info("Role changed to %s.", role2str(role)); 102121992Sjhb 103121992Sjhb /* Change role to the new one. */ 104121992Sjhb oldrole = res->hr_role; 105197439Sjhb res->hr_role = role; 106197439Sjhb pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 107121992Sjhb 108269511Sroyger /* 109121992Sjhb * If previous role was primary or secondary we have to kill process 110121992Sjhb * doing that work. 111121992Sjhb */ 112121992Sjhb if (res->hr_workerpid != 0) { 113121992Sjhb if (kill(res->hr_workerpid, SIGTERM) < 0) { 114121992Sjhb pjdlog_errno(LOG_WARNING, 115121992Sjhb "Unable to kill worker process %u", 116121992Sjhb (unsigned int)res->hr_workerpid); 117121992Sjhb } else if (waitpid(res->hr_workerpid, NULL, 0) != 118197439Sjhb res->hr_workerpid) { 119197439Sjhb pjdlog_errno(LOG_WARNING, 120121992Sjhb "Error while waiting for worker process %u", 121121992Sjhb (unsigned int)res->hr_workerpid); 122197439Sjhb } else { 123121992Sjhb pjdlog_debug(1, "Worker process %u stopped.", 124121992Sjhb (unsigned int)res->hr_workerpid); 125121992Sjhb } 126121992Sjhb child_cleanup(res); 127121992Sjhb } 128121992Sjhb 129121992Sjhb /* Start worker process if we are changing to primary. */ 130121992Sjhb if (role == HAST_ROLE_PRIMARY) 131121992Sjhb hastd_primary(res); 132121992Sjhb pjdlog_prefix_set("%s", ""); 133278473Skib hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 134278473Skib role2str(res->hr_role), NULL); 135278749Skib} 136121992Sjhb 137161223Sjhbvoid 138278473Skibcontrol_set_role(struct hast_resource *res, uint8_t role) 139278473Skib{ 140278473Skib 141278473Skib control_set_role_common(NULL, NULL, role, res, NULL, 0); 142278473Skib} 143278473Skib 144278473Skibstatic void 145278473Skibcontrol_status_worker(struct hast_resource *res, struct nv *nvout, 146278473Skib unsigned int no) 147278473Skib{ 148278473Skib struct nv *cnvin, *cnvout; 149278473Skib const char *str; 150278473Skib int error; 151278473Skib 152278749Skib cnvin = cnvout = NULL; 153278749Skib error = 0; 154278749Skib 155278749Skib /* 156278749Skib * Prepare and send command to worker process. 157278749Skib */ 158278749Skib cnvout = nv_alloc(); 159278749Skib nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 160278749Skib error = nv_error(cnvout); 161279286Skib if (error != 0) { 162279286Skib /* LOG */ 163278749Skib goto end; 164278473Skib } 165278473Skib if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 166278473Skib error = errno; 167167814Sjkim /* LOG */ 168121992Sjhb goto end; 169167814Sjkim } 170167814Sjkim 171121992Sjhb /* 172121992Sjhb * Receive response. 173121992Sjhb */ 174121992Sjhb if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 175121992Sjhb error = errno; 176121992Sjhb /* LOG */ 177121992Sjhb goto end; 178121992Sjhb } 179121992Sjhb 180125048Sjhb error = nv_get_int16(cnvin, "error"); 181121992Sjhb if (error != 0) 182121992Sjhb goto end; 183121992Sjhb 184121992Sjhb if ((str = nv_get_string(cnvin, "status")) == NULL) { 185128930Sjhb error = ENOENT; 186128930Sjhb /* LOG */ 187121992Sjhb goto end; 188121992Sjhb } 189125048Sjhb nv_add_string(nvout, str, "status%u", no); 190125048Sjhb nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 191125048Sjhb nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 192125048Sjhb "extentsize%u", no); 193125048Sjhb nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 194125048Sjhb "keepdirty%u", no); 195125048Sjhbend: 196125048Sjhb if (cnvin != NULL) 197233623Sjhb nv_free(cnvin); 198233623Sjhb if (cnvout != NULL) 199233623Sjhb nv_free(cnvout); 200233623Sjhb if (error != 0) 201121992Sjhb nv_add_int16(nvout, error, "error"); 202121992Sjhb} 203121992Sjhb 204121992Sjhbstatic void 205121992Sjhbcontrol_status(struct hastd_config *cfg, struct nv *nvout, 206121992Sjhb struct hast_resource *res, const char *name, unsigned int no) 207128930Sjhb{ 208128930Sjhb 209128930Sjhb assert(cfg != NULL); 210128930Sjhb assert(nvout != NULL); 211128930Sjhb assert(name != NULL); 212167814Sjkim 213167814Sjkim /* Name is always needed. */ 214167814Sjkim nv_add_string(nvout, name, "resource%u", no); 215167814Sjkim 216128930Sjhb if (res == NULL) { 217128930Sjhb TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 218128930Sjhb if (strcmp(res->hr_name, name) == 0) 219128930Sjhb break; 220128930Sjhb } 221128930Sjhb if (res == NULL) { 222128930Sjhb nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 223128930Sjhb return; 224121992Sjhb } 225169395Sjhb } 226121992Sjhb assert(res != NULL); 227121992Sjhb nv_add_string(nvout, res->hr_provname, "provname%u", no); 228121992Sjhb nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 229121992Sjhb nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 230121992Sjhb switch (res->hr_replication) { 231121992Sjhb case HAST_REPLICATION_FULLSYNC: 232233623Sjhb nv_add_string(nvout, "fullsync", "replication%u", no); 233233623Sjhb break; 234233623Sjhb case HAST_REPLICATION_MEMSYNC: 235121992Sjhb nv_add_string(nvout, "memsync", "replication%u", no); 236121992Sjhb break; 237121992Sjhb case HAST_REPLICATION_ASYNC: 238121992Sjhb nv_add_string(nvout, "async", "replication%u", no); 239121992Sjhb break; 240121992Sjhb default: 241121992Sjhb nv_add_string(nvout, "unknown", "replication%u", no); 242121992Sjhb break; 243121992Sjhb } 244215009Sjhb nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 245121992Sjhb 246121992Sjhb switch (res->hr_role) { 247121992Sjhb case HAST_ROLE_PRIMARY: 248121992Sjhb assert(res->hr_workerpid != 0); 249121992Sjhb /* FALLTHROUGH */ 250197439Sjhb case HAST_ROLE_SECONDARY: 251121992Sjhb if (res->hr_workerpid != 0) 252121992Sjhb break; 253197439Sjhb /* FALLTHROUGH */ 254197439Sjhb default: 255121992Sjhb return; 256121992Sjhb } 257121992Sjhb 258167814Sjkim /* 259121992Sjhb * If we are here, it means that we have a worker process, which we 260167814Sjkim * want to ask some questions. 261121992Sjhb */ 262121992Sjhb control_status_worker(res, nvout, no); 263121992Sjhb} 264167814Sjkim 265121992Sjhbvoid 266121992Sjhbcontrol_handle(struct hastd_config *cfg) 267121992Sjhb{ 268121992Sjhb struct proto_conn *conn; 269121992Sjhb struct nv *nvin, *nvout; 270167814Sjkim unsigned int ii; 271121992Sjhb const char *str; 272167814Sjkim uint8_t cmd, role; 273167814Sjkim int error; 274167814Sjkim 275167814Sjkim if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 276167814Sjkim pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 277130310Sjhb return; 278169395Sjhb } 279167814Sjkim 280167814Sjkim nvin = nvout = NULL; 281130310Sjhb role = HAST_ROLE_UNDEF; 282167814Sjkim 283130310Sjhb if (hast_proto_recv_hdr(conn, &nvin) < 0) { 284129960Sjhb pjdlog_errno(LOG_ERR, "Unable to receive control header"); 285167814Sjkim nvin = NULL; 286121992Sjhb goto close; 287121992Sjhb } 288121992Sjhb 289121992Sjhb /* Obtain command code. 0 means that nv_get_uint8() failed. */ 290121992Sjhb cmd = nv_get_uint8(nvin, "cmd"); 291121992Sjhb if (cmd == 0) { 292121992Sjhb pjdlog_error("Control header is missing 'cmd' field."); 293121992Sjhb error = EHAST_INVALID; 294121992Sjhb goto close; 295167814Sjkim } 296121992Sjhb 297167814Sjkim /* Allocate outgoing nv structure. */ 298121992Sjhb nvout = nv_alloc(); 299121992Sjhb if (nvout == NULL) { 300167814Sjkim pjdlog_error("Unable to allocate header for control response."); 301167814Sjkim error = EHAST_NOMEMORY; 302121992Sjhb goto close; 303167814Sjkim } 304167814Sjkim 305167814Sjkim error = 0; 306123326Snjl 307169395Sjhb str = nv_get_string(nvin, "resource0"); 308167814Sjkim if (str == NULL) { 309167814Sjkim pjdlog_error("Control header is missing 'resource0' field."); 310167814Sjkim error = EHAST_INVALID; 311167814Sjkim goto fail; 312189404Sjhb } 313189404Sjhb if (cmd == HASTCTL_SET_ROLE) { 314189404Sjhb role = nv_get_uint8(nvin, "role"); 315189404Sjhb switch (role) { 316167814Sjkim case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 317167814Sjkim case HAST_ROLE_PRIMARY: 318167814Sjkim case HAST_ROLE_SECONDARY: 319121992Sjhb break; 320121992Sjhb default: 321121992Sjhb pjdlog_error("Invalid role received (%hhu).", role); 322121992Sjhb error = EHAST_INVALID; 323121992Sjhb goto fail; 324121992Sjhb } 325121992Sjhb } 326129128Sjhb if (strcmp(str, "all") == 0) { 327129128Sjhb struct hast_resource *res; 328128930Sjhb 329129128Sjhb /* All configured resources. */ 330121992Sjhb 331128930Sjhb ii = 0; 332167814Sjkim TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 333121992Sjhb switch (cmd) { 334121992Sjhb case HASTCTL_SET_ROLE: 335167814Sjkim control_set_role_common(cfg, nvout, role, res, 336263859Stakawata res->hr_name, ii++); 337269184Sakiyama break; 338263859Stakawata case HASTCTL_STATUS: 339167814Sjkim control_status(cfg, nvout, res, res->hr_name, 340167814Sjkim ii++); 341128930Sjhb break; 342128930Sjhb default: 343128930Sjhb pjdlog_error("Invalid command received (%hhu).", 344167814Sjkim cmd); 345128930Sjhb error = EHAST_UNIMPLEMENTED; 346167814Sjkim goto fail; 347263859Stakawata } 348121992Sjhb } 349121992Sjhb } else { 350121992Sjhb /* Only selected resources. */ 351128930Sjhb 352167814Sjkim for (ii = 0; ; ii++) { 353121992Sjhb str = nv_get_string(nvin, "resource%u", ii); 354121992Sjhb if (str == NULL) 355167814Sjkim break; 356263859Stakawata switch (cmd) { 357269184Sakiyama case HASTCTL_SET_ROLE: 358263859Stakawata control_set_role_common(cfg, nvout, role, NULL, 359167814Sjkim str, ii); 360167814Sjkim break; 361128930Sjhb case HASTCTL_STATUS: 362128930Sjhb control_status(cfg, nvout, NULL, str, ii); 363128930Sjhb break; 364167814Sjkim default: 365128930Sjhb pjdlog_error("Invalid command received (%hhu).", 366167814Sjkim cmd); 367263859Stakawata error = EHAST_UNIMPLEMENTED; 368121992Sjhb goto fail; 369121992Sjhb } 370121992Sjhb } 371121992Sjhb } 372121992Sjhb if (nv_error(nvout) != 0) 373121992Sjhb goto close; 374121992Sjhbfail: 375121992Sjhb if (error != 0) 376121992Sjhb nv_add_int16(nvout, error, "error"); 377129960Sjhb 378121992Sjhb if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 379169395Sjhb pjdlog_errno(LOG_ERR, "Unable to send control response"); 380130310Sjhbclose: 381129960Sjhb if (nvin != NULL) 382129960Sjhb nv_free(nvin); 383129960Sjhb if (nvout != NULL) 384129960Sjhb nv_free(nvout); 385130310Sjhb proto_close(conn); 386129960Sjhb} 387129960Sjhb 388121992Sjhb/* 389121992Sjhb * Thread handles control requests from the parent. 390121992Sjhb */ 391121992Sjhbvoid * 392121992Sjhbctrl_thread(void *arg) 393121992Sjhb{ 394121992Sjhb struct hast_resource *res = arg; 395121992Sjhb struct nv *nvin, *nvout; 396121992Sjhb uint8_t cmd; 397121992Sjhb 398121992Sjhb for (;;) { 399121992Sjhb if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 400169395Sjhb if (sigexit_received) 401121992Sjhb pthread_exit(NULL); 402121992Sjhb pjdlog_errno(LOG_ERR, 403121992Sjhb "Unable to receive control message"); 404121992Sjhb kill(getpid(), SIGTERM); 405121992Sjhb pthread_exit(NULL); 406121992Sjhb } 407121992Sjhb cmd = nv_get_uint8(nvin, "cmd"); 408121992Sjhb if (cmd == 0) { 409121992Sjhb pjdlog_error("Control message is missing 'cmd' field."); 410121992Sjhb nv_free(nvin); 411121992Sjhb continue; 412121992Sjhb } 413121992Sjhb nv_free(nvin); 414121992Sjhb nvout = nv_alloc(); 415121992Sjhb switch (cmd) { 416121992Sjhb case HASTCTL_STATUS: 417121992Sjhb if (res->hr_remotein != NULL && 418269512Sroyger res->hr_remoteout != NULL) { 419269512Sroyger nv_add_string(nvout, "complete", "status"); 420269512Sroyger } else { 421121992Sjhb nv_add_string(nvout, "degraded", "status"); 422269512Sroyger } 423128930Sjhb nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 424121992Sjhb "extentsize"); 425269512Sroyger if (res->hr_role == HAST_ROLE_PRIMARY) { 426269512Sroyger nv_add_uint32(nvout, 427121992Sjhb (uint32_t)res->hr_keepdirty, "keepdirty"); 428142257Sjhb nv_add_uint64(nvout, 429167814Sjkim (uint64_t)(activemap_ndirty(res->hr_amp) * 430121992Sjhb res->hr_extentsize), "dirty"); 431121992Sjhb } else { 432125048Sjhb nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 433128930Sjhb nv_add_uint64(nvout, (uint64_t)0, "dirty"); 434128930Sjhb } 435128930Sjhb break; 436269512Sroyger default: 437269512Sroyger nv_add_int16(nvout, EINVAL, "error"); 438269512Sroyger break; 439128930Sjhb } 440125048Sjhb if (nv_error(nvout) != 0) { 441128329Sjhb pjdlog_error("Unable to create answer on control message."); 442128329Sjhb nv_free(nvout); 443125048Sjhb continue; 444167814Sjkim } 445128930Sjhb if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 446128930Sjhb pjdlog_errno(LOG_ERR, 447128930Sjhb "Unable to send reply to control message"); 448269512Sroyger } 449128930Sjhb nv_free(nvout); 450269512Sroyger } 451128930Sjhb /* NOTREACHED */ 452128930Sjhb return (NULL); 453128930Sjhb} 454128930Sjhb