control.c revision 219351
1/*-
2 * Copyright (c) 2009-2010 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Pawel Jakub Dawidek under sponsorship from
6 * the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sbin/hastd/control.c 219351 2011-03-06 22:56:14Z pjd $");
32
33#include <sys/types.h>
34#include <sys/wait.h>
35
36#include <assert.h>
37#include <errno.h>
38#include <pthread.h>
39#include <signal.h>
40#include <stdio.h>
41#include <string.h>
42#include <unistd.h>
43
44#include "hast.h"
45#include "hastd.h"
46#include "hast_checksum.h"
47#include "hast_proto.h"
48#include "hooks.h"
49#include "nv.h"
50#include "pjdlog.h"
51#include "proto.h"
52#include "subr.h"
53
54#include "control.h"
55
56void
57child_cleanup(struct hast_resource *res)
58{
59
60	proto_close(res->hr_ctrl);
61	res->hr_ctrl = NULL;
62	if (res->hr_event != NULL) {
63		proto_close(res->hr_event);
64		res->hr_event = NULL;
65	}
66	if (res->hr_conn != NULL) {
67		proto_close(res->hr_conn);
68		res->hr_conn = NULL;
69	}
70	res->hr_workerpid = 0;
71}
72
73static void
74control_set_role_common(struct hastd_config *cfg, struct nv *nvout,
75    uint8_t role, struct hast_resource *res, const char *name, unsigned int no)
76{
77	int oldrole;
78
79	/* Name is always needed. */
80	if (name != NULL)
81		nv_add_string(nvout, name, "resource%u", no);
82
83	if (res == NULL) {
84		assert(cfg != NULL);
85		assert(name != NULL);
86
87		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
88			if (strcmp(res->hr_name, name) == 0)
89				break;
90		}
91		if (res == NULL) {
92			nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
93			return;
94		}
95	}
96	assert(res != NULL);
97
98	/* Send previous role back. */
99	nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
100
101	/* Nothing changed, return here. */
102	if (role == res->hr_role)
103		return;
104
105	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
106	pjdlog_info("Role changed to %s.", role2str(role));
107
108	/* Change role to the new one. */
109	oldrole = res->hr_role;
110	res->hr_role = role;
111	pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
112
113	/*
114	 * If previous role was primary or secondary we have to kill process
115	 * doing that work.
116	 */
117	if (res->hr_workerpid != 0) {
118		if (kill(res->hr_workerpid, SIGTERM) < 0) {
119			pjdlog_errno(LOG_WARNING,
120			    "Unable to kill worker process %u",
121			    (unsigned int)res->hr_workerpid);
122		} else if (waitpid(res->hr_workerpid, NULL, 0) !=
123		    res->hr_workerpid) {
124			pjdlog_errno(LOG_WARNING,
125			    "Error while waiting for worker process %u",
126			    (unsigned int)res->hr_workerpid);
127		} else {
128			pjdlog_debug(1, "Worker process %u stopped.",
129			    (unsigned int)res->hr_workerpid);
130		}
131		child_cleanup(res);
132	}
133
134	/* Start worker process if we are changing to primary. */
135	if (role == HAST_ROLE_PRIMARY)
136		hastd_primary(res);
137	pjdlog_prefix_set("%s", "");
138	hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole),
139	    role2str(res->hr_role), NULL);
140}
141
142void
143control_set_role(struct hast_resource *res, uint8_t role)
144{
145
146	control_set_role_common(NULL, NULL, role, res, NULL, 0);
147}
148
149static void
150control_status_worker(struct hast_resource *res, struct nv *nvout,
151    unsigned int no)
152{
153	struct nv *cnvin, *cnvout;
154	const char *str;
155	int error;
156
157	cnvin = cnvout = NULL;
158	error = 0;
159
160	/*
161	 * Prepare and send command to worker process.
162	 */
163	cnvout = nv_alloc();
164	nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd");
165	error = nv_error(cnvout);
166	if (error != 0) {
167		pjdlog_common(LOG_ERR, 0, error,
168		    "Unable to prepare control header");
169		goto end;
170	}
171	if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) {
172		error = errno;
173		pjdlog_errno(LOG_ERR, "Unable to send control header");
174		goto end;
175	}
176
177	/*
178	 * Receive response.
179	 */
180	if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) {
181		error = errno;
182		pjdlog_errno(LOG_ERR, "Unable to receive control header");
183		goto end;
184	}
185
186	error = nv_get_int16(cnvin, "error");
187	if (error != 0)
188		goto end;
189
190	if ((str = nv_get_string(cnvin, "status")) == NULL) {
191		error = ENOENT;
192		pjdlog_errno(LOG_ERR, "Field 'status' is missing.");
193		goto end;
194	}
195	nv_add_string(nvout, str, "status%u", no);
196	nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no);
197	nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"),
198	    "extentsize%u", no);
199	nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"),
200	    "keepdirty%u", no);
201end:
202	if (cnvin != NULL)
203		nv_free(cnvin);
204	if (cnvout != NULL)
205		nv_free(cnvout);
206	if (error != 0)
207		nv_add_int16(nvout, error, "error");
208}
209
210static void
211control_status(struct hastd_config *cfg, struct nv *nvout,
212    struct hast_resource *res, const char *name, unsigned int no)
213{
214
215	assert(cfg != NULL);
216	assert(nvout != NULL);
217	assert(name != NULL);
218
219	/* Name is always needed. */
220	nv_add_string(nvout, name, "resource%u", no);
221
222	if (res == NULL) {
223		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
224			if (strcmp(res->hr_name, name) == 0)
225				break;
226		}
227		if (res == NULL) {
228			nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
229			return;
230		}
231	}
232	assert(res != NULL);
233	nv_add_string(nvout, res->hr_provname, "provname%u", no);
234	nv_add_string(nvout, res->hr_localpath, "localpath%u", no);
235	nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no);
236	switch (res->hr_replication) {
237	case HAST_REPLICATION_FULLSYNC:
238		nv_add_string(nvout, "fullsync", "replication%u", no);
239		break;
240	case HAST_REPLICATION_MEMSYNC:
241		nv_add_string(nvout, "memsync", "replication%u", no);
242		break;
243	case HAST_REPLICATION_ASYNC:
244		nv_add_string(nvout, "async", "replication%u", no);
245		break;
246	default:
247		nv_add_string(nvout, "unknown", "replication%u", no);
248		break;
249	}
250	nv_add_string(nvout, checksum_name(res->hr_checksum),
251	    "checksum%u", no);
252	nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
253
254	switch (res->hr_role) {
255	case HAST_ROLE_PRIMARY:
256		assert(res->hr_workerpid != 0);
257		/* FALLTHROUGH */
258	case HAST_ROLE_SECONDARY:
259		if (res->hr_workerpid != 0)
260			break;
261		/* FALLTHROUGH */
262	default:
263		return;
264	}
265
266	/*
267	 * If we are here, it means that we have a worker process, which we
268	 * want to ask some questions.
269	 */
270	control_status_worker(res, nvout, no);
271}
272
273void
274control_handle(struct hastd_config *cfg)
275{
276	struct proto_conn *conn;
277	struct nv *nvin, *nvout;
278	unsigned int ii;
279	const char *str;
280	uint8_t cmd, role;
281	int error;
282
283	if (proto_accept(cfg->hc_controlconn, &conn) < 0) {
284		pjdlog_errno(LOG_ERR, "Unable to accept control connection");
285		return;
286	}
287
288	cfg->hc_controlin = conn;
289	nvin = nvout = NULL;
290	role = HAST_ROLE_UNDEF;
291
292	if (hast_proto_recv_hdr(conn, &nvin) < 0) {
293		pjdlog_errno(LOG_ERR, "Unable to receive control header");
294		nvin = NULL;
295		goto close;
296	}
297
298	/* Obtain command code. 0 means that nv_get_uint8() failed. */
299	cmd = nv_get_uint8(nvin, "cmd");
300	if (cmd == 0) {
301		pjdlog_error("Control header is missing 'cmd' field.");
302		error = EHAST_INVALID;
303		goto close;
304	}
305
306	/* Allocate outgoing nv structure. */
307	nvout = nv_alloc();
308	if (nvout == NULL) {
309		pjdlog_error("Unable to allocate header for control response.");
310		error = EHAST_NOMEMORY;
311		goto close;
312	}
313
314	error = 0;
315
316	str = nv_get_string(nvin, "resource0");
317	if (str == NULL) {
318		pjdlog_error("Control header is missing 'resource0' field.");
319		error = EHAST_INVALID;
320		goto fail;
321	}
322	if (cmd == HASTCTL_SET_ROLE) {
323		role = nv_get_uint8(nvin, "role");
324		switch (role) {
325		case HAST_ROLE_INIT:	/* Is that valid to set, hmm? */
326		case HAST_ROLE_PRIMARY:
327		case HAST_ROLE_SECONDARY:
328			break;
329		default:
330			pjdlog_error("Invalid role received (%hhu).", role);
331			error = EHAST_INVALID;
332			goto fail;
333		}
334	}
335	if (strcmp(str, "all") == 0) {
336		struct hast_resource *res;
337
338		/* All configured resources. */
339
340		ii = 0;
341		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
342			switch (cmd) {
343			case HASTCTL_SET_ROLE:
344				control_set_role_common(cfg, nvout, role, res,
345				    res->hr_name, ii++);
346				break;
347			case HASTCTL_STATUS:
348				control_status(cfg, nvout, res, res->hr_name,
349				    ii++);
350				break;
351			default:
352				pjdlog_error("Invalid command received (%hhu).",
353				    cmd);
354				error = EHAST_UNIMPLEMENTED;
355				goto fail;
356			}
357		}
358	} else {
359		/* Only selected resources. */
360
361		for (ii = 0; ; ii++) {
362			str = nv_get_string(nvin, "resource%u", ii);
363			if (str == NULL)
364				break;
365			switch (cmd) {
366			case HASTCTL_SET_ROLE:
367				control_set_role_common(cfg, nvout, role, NULL,
368				    str, ii);
369				break;
370			case HASTCTL_STATUS:
371				control_status(cfg, nvout, NULL, str, ii);
372				break;
373			default:
374				pjdlog_error("Invalid command received (%hhu).",
375				    cmd);
376				error = EHAST_UNIMPLEMENTED;
377				goto fail;
378			}
379		}
380	}
381	if (nv_error(nvout) != 0)
382		goto close;
383fail:
384	if (error != 0)
385		nv_add_int16(nvout, error, "error");
386
387	if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0)
388		pjdlog_errno(LOG_ERR, "Unable to send control response");
389close:
390	if (nvin != NULL)
391		nv_free(nvin);
392	if (nvout != NULL)
393		nv_free(nvout);
394	proto_close(conn);
395	cfg->hc_controlin = NULL;
396}
397
398/*
399 * Thread handles control requests from the parent.
400 */
401void *
402ctrl_thread(void *arg)
403{
404	struct hast_resource *res = arg;
405	struct nv *nvin, *nvout;
406	uint8_t cmd;
407
408	for (;;) {
409		if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) {
410			if (sigexit_received)
411				pthread_exit(NULL);
412			pjdlog_errno(LOG_ERR,
413			    "Unable to receive control message");
414			kill(getpid(), SIGTERM);
415			pthread_exit(NULL);
416		}
417		cmd = nv_get_uint8(nvin, "cmd");
418		if (cmd == 0) {
419			pjdlog_error("Control message is missing 'cmd' field.");
420			nv_free(nvin);
421			continue;
422		}
423		nvout = nv_alloc();
424		switch (cmd) {
425		case HASTCTL_STATUS:
426			if (res->hr_remotein != NULL &&
427			    res->hr_remoteout != NULL) {
428				nv_add_string(nvout, "complete", "status");
429			} else {
430				nv_add_string(nvout, "degraded", "status");
431			}
432			nv_add_uint32(nvout, (uint32_t)res->hr_extentsize,
433			    "extentsize");
434			if (res->hr_role == HAST_ROLE_PRIMARY) {
435				nv_add_uint32(nvout,
436				    (uint32_t)res->hr_keepdirty, "keepdirty");
437				nv_add_uint64(nvout,
438				    (uint64_t)(activemap_ndirty(res->hr_amp) *
439				    res->hr_extentsize), "dirty");
440			} else {
441				nv_add_uint32(nvout, (uint32_t)0, "keepdirty");
442				nv_add_uint64(nvout, (uint64_t)0, "dirty");
443			}
444			nv_add_int16(nvout, 0, "error");
445			break;
446		case HASTCTL_RELOAD:
447			/*
448			 * When parent receives SIGHUP and discovers that
449			 * something related to us has changes, it sends reload
450			 * message to us.
451			 */
452			assert(res->hr_role == HAST_ROLE_PRIMARY);
453			primary_config_reload(res, nvin);
454			nv_add_int16(nvout, 0, "error");
455			break;
456		default:
457			nv_add_int16(nvout, EINVAL, "error");
458			break;
459		}
460		nv_free(nvin);
461		if (nv_error(nvout) != 0) {
462			pjdlog_error("Unable to create answer on control message.");
463			nv_free(nvout);
464			continue;
465		}
466		if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) {
467			pjdlog_errno(LOG_ERR,
468			    "Unable to send reply to control message");
469		}
470		nv_free(nvout);
471	}
472	/* NOTREACHED */
473	return (NULL);
474}
475