control.c revision 217730
1121992Sjhb/*-
2121992Sjhb * Copyright (c) 2009-2010 The FreeBSD Foundation
3121992Sjhb * All rights reserved.
4121992Sjhb *
5121992Sjhb * This software was developed by Pawel Jakub Dawidek under sponsorship from
6121992Sjhb * the FreeBSD Foundation.
7121992Sjhb *
8121992Sjhb * Redistribution and use in source and binary forms, with or without
9121992Sjhb * modification, are permitted provided that the following conditions
10121992Sjhb * are met:
11121992Sjhb * 1. Redistributions of source code must retain the above copyright
12121992Sjhb *    notice, this list of conditions and the following disclaimer.
13121992Sjhb * 2. Redistributions in binary form must reproduce the above copyright
14121992Sjhb *    notice, this list of conditions and the following disclaimer in the
15121992Sjhb *    documentation and/or other materials provided with the distribution.
16121992Sjhb *
17121992Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18121992Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19121992Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20121992Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21121992Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22121992Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23121992Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24121992Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25121992Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26121992Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27121992Sjhb * SUCH DAMAGE.
28121992Sjhb */
29121992Sjhb
30121992Sjhb#include <sys/cdefs.h>
31121992Sjhb__FBSDID("$FreeBSD: head/sbin/hastd/control.c 217730 2011-01-22 22:33:27Z pjd $");
32121992Sjhb
33121992Sjhb#include <sys/types.h>
34278749Skib#include <sys/wait.h>
35121992Sjhb
36121992Sjhb#include <assert.h>
37121992Sjhb#include <errno.h>
38121992Sjhb#include <pthread.h>
39121992Sjhb#include <signal.h>
40214631Sjhb#include <stdio.h>
41121992Sjhb#include <string.h>
42261087Sjhb#include <unistd.h>
43278473Skib
44278749Skib#include "hast.h"
45121992Sjhb#include "hastd.h"
46193530Sjkim#include "hast_proto.h"
47193530Sjkim#include "hooks.h"
48193530Sjkim#include "nv.h"
49121992Sjhb#include "pjdlog.h"
50121992Sjhb#include "proto.h"
51121992Sjhb#include "subr.h"
52121992Sjhb
53233623Sjhb#include "control.h"
54121992Sjhb
55121992Sjhbvoid
56233623Sjhbchild_cleanup(struct hast_resource *res)
57121992Sjhb{
58233623Sjhb
59121992Sjhb	proto_close(res->hr_ctrl);
60129960Sjhb	res->hr_ctrl = NULL;
61233623Sjhb	if (res->hr_event != NULL) {
62121992Sjhb		proto_close(res->hr_event);
63269512Sroyger		res->hr_event = NULL;
64167814Sjkim	}
65121992Sjhb	res->hr_workerpid = 0;
66121992Sjhb}
67121992Sjhb
68227293Sedstatic void
69121992Sjhbcontrol_set_role_common(struct hastd_config *cfg, struct nv *nvout,
70167814Sjkim    uint8_t role, struct hast_resource *res, const char *name, unsigned int no)
71167814Sjkim{
72121992Sjhb	int oldrole;
73121992Sjhb
74167814Sjkim	/* Name is always needed. */
75167814Sjkim	if (name != NULL)
76167814Sjkim		nv_add_string(nvout, name, "resource%u", no);
77167814Sjkim
78167814Sjkim	if (res == NULL) {
79167814Sjkim		assert(cfg != NULL);
80167814Sjkim		assert(name != NULL);
81121992Sjhb
82121992Sjhb		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
83167814Sjkim			if (strcmp(res->hr_name, name) == 0)
84167814Sjkim				break;
85121992Sjhb		}
86121992Sjhb		if (res == NULL) {
87121992Sjhb			nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
88197439Sjhb			return;
89121992Sjhb		}
90121992Sjhb	}
91121992Sjhb	assert(res != NULL);
92121992Sjhb
93121992Sjhb	/* Send previous role back. */
94121992Sjhb	nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
95121992Sjhb
96121992Sjhb	/* Nothing changed, return here. */
97121992Sjhb	if (role == res->hr_role)
98121992Sjhb		return;
99121992Sjhb
100121992Sjhb	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
101121992Sjhb	pjdlog_info("Role changed to %s.", role2str(role));
102121992Sjhb
103121992Sjhb	/* Change role to the new one. */
104121992Sjhb	oldrole = res->hr_role;
105197439Sjhb	res->hr_role = role;
106197439Sjhb	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
107121992Sjhb
108269511Sroyger	/*
109121992Sjhb	 * If previous role was primary or secondary we have to kill process
110121992Sjhb	 * doing that work.
111121992Sjhb	 */
112121992Sjhb	if (res->hr_workerpid != 0) {
113121992Sjhb		if (kill(res->hr_workerpid, SIGTERM) < 0) {
114121992Sjhb			pjdlog_errno(LOG_WARNING,
115121992Sjhb			    "Unable to kill worker process %u",
116121992Sjhb			    (unsigned int)res->hr_workerpid);
117121992Sjhb		} else if (waitpid(res->hr_workerpid, NULL, 0) !=
118197439Sjhb		    res->hr_workerpid) {
119197439Sjhb			pjdlog_errno(LOG_WARNING,
120121992Sjhb			    "Error while waiting for worker process %u",
121121992Sjhb			    (unsigned int)res->hr_workerpid);
122197439Sjhb		} else {
123121992Sjhb			pjdlog_debug(1, "Worker process %u stopped.",
124121992Sjhb			    (unsigned int)res->hr_workerpid);
125121992Sjhb		}
126121992Sjhb		child_cleanup(res);
127121992Sjhb	}
128121992Sjhb
129121992Sjhb	/* Start worker process if we are changing to primary. */
130121992Sjhb	if (role == HAST_ROLE_PRIMARY)
131121992Sjhb		hastd_primary(res);
132121992Sjhb	pjdlog_prefix_set("%s", "");
133278473Skib	hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole),
134278473Skib	    role2str(res->hr_role), NULL);
135278749Skib}
136121992Sjhb
137161223Sjhbvoid
138278473Skibcontrol_set_role(struct hast_resource *res, uint8_t role)
139278473Skib{
140278473Skib
141278473Skib	control_set_role_common(NULL, NULL, role, res, NULL, 0);
142278473Skib}
143278473Skib
144278473Skibstatic void
145278473Skibcontrol_status_worker(struct hast_resource *res, struct nv *nvout,
146278473Skib    unsigned int no)
147278473Skib{
148278473Skib	struct nv *cnvin, *cnvout;
149278473Skib	const char *str;
150278473Skib	int error;
151278473Skib
152278749Skib	cnvin = cnvout = NULL;
153278749Skib	error = 0;
154278749Skib
155278749Skib	/*
156278749Skib	 * Prepare and send command to worker process.
157278749Skib	 */
158278749Skib	cnvout = nv_alloc();
159278749Skib	nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd");
160278749Skib	error = nv_error(cnvout);
161279286Skib	if (error != 0) {
162279286Skib		/* LOG */
163278749Skib		goto end;
164278473Skib	}
165278473Skib	if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) {
166278473Skib		error = errno;
167167814Sjkim		/* LOG */
168121992Sjhb		goto end;
169167814Sjkim	}
170167814Sjkim
171121992Sjhb	/*
172121992Sjhb	 * Receive response.
173121992Sjhb	 */
174121992Sjhb	if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) {
175121992Sjhb		error = errno;
176121992Sjhb		/* LOG */
177121992Sjhb		goto end;
178121992Sjhb	}
179121992Sjhb
180125048Sjhb	error = nv_get_int16(cnvin, "error");
181121992Sjhb	if (error != 0)
182121992Sjhb		goto end;
183121992Sjhb
184121992Sjhb	if ((str = nv_get_string(cnvin, "status")) == NULL) {
185128930Sjhb		error = ENOENT;
186128930Sjhb		/* LOG */
187121992Sjhb		goto end;
188121992Sjhb	}
189125048Sjhb	nv_add_string(nvout, str, "status%u", no);
190125048Sjhb	nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no);
191125048Sjhb	nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"),
192125048Sjhb	    "extentsize%u", no);
193125048Sjhb	nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"),
194125048Sjhb	    "keepdirty%u", no);
195125048Sjhbend:
196125048Sjhb	if (cnvin != NULL)
197233623Sjhb		nv_free(cnvin);
198233623Sjhb	if (cnvout != NULL)
199233623Sjhb		nv_free(cnvout);
200233623Sjhb	if (error != 0)
201121992Sjhb		nv_add_int16(nvout, error, "error");
202121992Sjhb}
203121992Sjhb
204121992Sjhbstatic void
205121992Sjhbcontrol_status(struct hastd_config *cfg, struct nv *nvout,
206121992Sjhb    struct hast_resource *res, const char *name, unsigned int no)
207128930Sjhb{
208128930Sjhb
209128930Sjhb	assert(cfg != NULL);
210128930Sjhb	assert(nvout != NULL);
211128930Sjhb	assert(name != NULL);
212167814Sjkim
213167814Sjkim	/* Name is always needed. */
214167814Sjkim	nv_add_string(nvout, name, "resource%u", no);
215167814Sjkim
216128930Sjhb	if (res == NULL) {
217128930Sjhb		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
218128930Sjhb			if (strcmp(res->hr_name, name) == 0)
219128930Sjhb				break;
220128930Sjhb		}
221128930Sjhb		if (res == NULL) {
222128930Sjhb			nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
223128930Sjhb			return;
224121992Sjhb		}
225169395Sjhb	}
226121992Sjhb	assert(res != NULL);
227121992Sjhb	nv_add_string(nvout, res->hr_provname, "provname%u", no);
228121992Sjhb	nv_add_string(nvout, res->hr_localpath, "localpath%u", no);
229121992Sjhb	nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no);
230121992Sjhb	switch (res->hr_replication) {
231121992Sjhb	case HAST_REPLICATION_FULLSYNC:
232233623Sjhb		nv_add_string(nvout, "fullsync", "replication%u", no);
233233623Sjhb		break;
234233623Sjhb	case HAST_REPLICATION_MEMSYNC:
235121992Sjhb		nv_add_string(nvout, "memsync", "replication%u", no);
236121992Sjhb		break;
237121992Sjhb	case HAST_REPLICATION_ASYNC:
238121992Sjhb		nv_add_string(nvout, "async", "replication%u", no);
239121992Sjhb		break;
240121992Sjhb	default:
241121992Sjhb		nv_add_string(nvout, "unknown", "replication%u", no);
242121992Sjhb		break;
243121992Sjhb	}
244215009Sjhb	nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
245121992Sjhb
246121992Sjhb	switch (res->hr_role) {
247121992Sjhb	case HAST_ROLE_PRIMARY:
248121992Sjhb		assert(res->hr_workerpid != 0);
249121992Sjhb		/* FALLTHROUGH */
250197439Sjhb	case HAST_ROLE_SECONDARY:
251121992Sjhb		if (res->hr_workerpid != 0)
252121992Sjhb			break;
253197439Sjhb		/* FALLTHROUGH */
254197439Sjhb	default:
255121992Sjhb		return;
256121992Sjhb	}
257121992Sjhb
258167814Sjkim	/*
259121992Sjhb	 * If we are here, it means that we have a worker process, which we
260167814Sjkim	 * want to ask some questions.
261121992Sjhb	 */
262121992Sjhb	control_status_worker(res, nvout, no);
263121992Sjhb}
264167814Sjkim
265121992Sjhbvoid
266121992Sjhbcontrol_handle(struct hastd_config *cfg)
267121992Sjhb{
268121992Sjhb	struct proto_conn *conn;
269121992Sjhb	struct nv *nvin, *nvout;
270167814Sjkim	unsigned int ii;
271121992Sjhb	const char *str;
272167814Sjkim	uint8_t cmd, role;
273167814Sjkim	int error;
274167814Sjkim
275167814Sjkim	if (proto_accept(cfg->hc_controlconn, &conn) < 0) {
276167814Sjkim		pjdlog_errno(LOG_ERR, "Unable to accept control connection");
277130310Sjhb		return;
278169395Sjhb	}
279167814Sjkim
280167814Sjkim	nvin = nvout = NULL;
281130310Sjhb	role = HAST_ROLE_UNDEF;
282167814Sjkim
283130310Sjhb	if (hast_proto_recv_hdr(conn, &nvin) < 0) {
284129960Sjhb		pjdlog_errno(LOG_ERR, "Unable to receive control header");
285167814Sjkim		nvin = NULL;
286121992Sjhb		goto close;
287121992Sjhb	}
288121992Sjhb
289121992Sjhb	/* Obtain command code. 0 means that nv_get_uint8() failed. */
290121992Sjhb	cmd = nv_get_uint8(nvin, "cmd");
291121992Sjhb	if (cmd == 0) {
292121992Sjhb		pjdlog_error("Control header is missing 'cmd' field.");
293121992Sjhb		error = EHAST_INVALID;
294121992Sjhb		goto close;
295167814Sjkim	}
296121992Sjhb
297167814Sjkim	/* Allocate outgoing nv structure. */
298121992Sjhb	nvout = nv_alloc();
299121992Sjhb	if (nvout == NULL) {
300167814Sjkim		pjdlog_error("Unable to allocate header for control response.");
301167814Sjkim		error = EHAST_NOMEMORY;
302121992Sjhb		goto close;
303167814Sjkim	}
304167814Sjkim
305167814Sjkim	error = 0;
306123326Snjl
307169395Sjhb	str = nv_get_string(nvin, "resource0");
308167814Sjkim	if (str == NULL) {
309167814Sjkim		pjdlog_error("Control header is missing 'resource0' field.");
310167814Sjkim		error = EHAST_INVALID;
311167814Sjkim		goto fail;
312189404Sjhb	}
313189404Sjhb	if (cmd == HASTCTL_SET_ROLE) {
314189404Sjhb		role = nv_get_uint8(nvin, "role");
315189404Sjhb		switch (role) {
316167814Sjkim		case HAST_ROLE_INIT:	/* Is that valid to set, hmm? */
317167814Sjkim		case HAST_ROLE_PRIMARY:
318167814Sjkim		case HAST_ROLE_SECONDARY:
319121992Sjhb			break;
320121992Sjhb		default:
321121992Sjhb			pjdlog_error("Invalid role received (%hhu).", role);
322121992Sjhb			error = EHAST_INVALID;
323121992Sjhb			goto fail;
324121992Sjhb		}
325121992Sjhb	}
326129128Sjhb	if (strcmp(str, "all") == 0) {
327129128Sjhb		struct hast_resource *res;
328128930Sjhb
329129128Sjhb		/* All configured resources. */
330121992Sjhb
331128930Sjhb		ii = 0;
332167814Sjkim		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
333121992Sjhb			switch (cmd) {
334121992Sjhb			case HASTCTL_SET_ROLE:
335167814Sjkim				control_set_role_common(cfg, nvout, role, res,
336263859Stakawata				    res->hr_name, ii++);
337269184Sakiyama				break;
338263859Stakawata			case HASTCTL_STATUS:
339167814Sjkim				control_status(cfg, nvout, res, res->hr_name,
340167814Sjkim				    ii++);
341128930Sjhb				break;
342128930Sjhb			default:
343128930Sjhb				pjdlog_error("Invalid command received (%hhu).",
344167814Sjkim				    cmd);
345128930Sjhb				error = EHAST_UNIMPLEMENTED;
346167814Sjkim				goto fail;
347263859Stakawata			}
348121992Sjhb		}
349121992Sjhb	} else {
350121992Sjhb		/* Only selected resources. */
351128930Sjhb
352167814Sjkim		for (ii = 0; ; ii++) {
353121992Sjhb			str = nv_get_string(nvin, "resource%u", ii);
354121992Sjhb			if (str == NULL)
355167814Sjkim				break;
356263859Stakawata			switch (cmd) {
357269184Sakiyama			case HASTCTL_SET_ROLE:
358263859Stakawata				control_set_role_common(cfg, nvout, role, NULL,
359167814Sjkim				    str, ii);
360167814Sjkim				break;
361128930Sjhb			case HASTCTL_STATUS:
362128930Sjhb				control_status(cfg, nvout, NULL, str, ii);
363128930Sjhb				break;
364167814Sjkim			default:
365128930Sjhb				pjdlog_error("Invalid command received (%hhu).",
366167814Sjkim				    cmd);
367263859Stakawata				error = EHAST_UNIMPLEMENTED;
368121992Sjhb				goto fail;
369121992Sjhb			}
370121992Sjhb		}
371121992Sjhb	}
372121992Sjhb	if (nv_error(nvout) != 0)
373121992Sjhb		goto close;
374121992Sjhbfail:
375121992Sjhb	if (error != 0)
376121992Sjhb		nv_add_int16(nvout, error, "error");
377129960Sjhb
378121992Sjhb	if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0)
379169395Sjhb		pjdlog_errno(LOG_ERR, "Unable to send control response");
380130310Sjhbclose:
381129960Sjhb	if (nvin != NULL)
382129960Sjhb		nv_free(nvin);
383129960Sjhb	if (nvout != NULL)
384129960Sjhb		nv_free(nvout);
385130310Sjhb	proto_close(conn);
386129960Sjhb}
387129960Sjhb
388121992Sjhb/*
389121992Sjhb * Thread handles control requests from the parent.
390121992Sjhb */
391121992Sjhbvoid *
392121992Sjhbctrl_thread(void *arg)
393121992Sjhb{
394121992Sjhb	struct hast_resource *res = arg;
395121992Sjhb	struct nv *nvin, *nvout;
396121992Sjhb	uint8_t cmd;
397121992Sjhb
398121992Sjhb	for (;;) {
399121992Sjhb		if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) {
400169395Sjhb			if (sigexit_received)
401121992Sjhb				pthread_exit(NULL);
402121992Sjhb			pjdlog_errno(LOG_ERR,
403121992Sjhb			    "Unable to receive control message");
404121992Sjhb			kill(getpid(), SIGTERM);
405121992Sjhb			pthread_exit(NULL);
406121992Sjhb		}
407121992Sjhb		cmd = nv_get_uint8(nvin, "cmd");
408121992Sjhb		if (cmd == 0) {
409121992Sjhb			pjdlog_error("Control message is missing 'cmd' field.");
410121992Sjhb			nv_free(nvin);
411121992Sjhb			continue;
412121992Sjhb		}
413121992Sjhb		nv_free(nvin);
414121992Sjhb		nvout = nv_alloc();
415121992Sjhb		switch (cmd) {
416121992Sjhb		case HASTCTL_STATUS:
417121992Sjhb			if (res->hr_remotein != NULL &&
418269512Sroyger			    res->hr_remoteout != NULL) {
419269512Sroyger				nv_add_string(nvout, "complete", "status");
420269512Sroyger			} else {
421121992Sjhb				nv_add_string(nvout, "degraded", "status");
422269512Sroyger			}
423128930Sjhb			nv_add_uint32(nvout, (uint32_t)res->hr_extentsize,
424121992Sjhb			    "extentsize");
425269512Sroyger			if (res->hr_role == HAST_ROLE_PRIMARY) {
426269512Sroyger				nv_add_uint32(nvout,
427121992Sjhb				    (uint32_t)res->hr_keepdirty, "keepdirty");
428142257Sjhb				nv_add_uint64(nvout,
429167814Sjkim				    (uint64_t)(activemap_ndirty(res->hr_amp) *
430121992Sjhb				    res->hr_extentsize), "dirty");
431121992Sjhb			} else {
432125048Sjhb				nv_add_uint32(nvout, (uint32_t)0, "keepdirty");
433128930Sjhb				nv_add_uint64(nvout, (uint64_t)0, "dirty");
434128930Sjhb			}
435128930Sjhb			break;
436269512Sroyger		default:
437269512Sroyger			nv_add_int16(nvout, EINVAL, "error");
438269512Sroyger			break;
439128930Sjhb		}
440125048Sjhb		if (nv_error(nvout) != 0) {
441128329Sjhb			pjdlog_error("Unable to create answer on control message.");
442128329Sjhb			nv_free(nvout);
443125048Sjhb			continue;
444167814Sjkim		}
445128930Sjhb		if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) {
446128930Sjhb			pjdlog_errno(LOG_ERR,
447128930Sjhb			    "Unable to send reply to control message");
448269512Sroyger		}
449128930Sjhb		nv_free(nvout);
450269512Sroyger	}
451128930Sjhb	/* NOTREACHED */
452128930Sjhb	return (NULL);
453128930Sjhb}
454128930Sjhb