1/*	$NetBSD: cluster_locking.c,v 1.1.1.3 2009/12/02 00:26:24 haad Exp $	*/
2
3/*
4 * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of LVM2.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17
18/*
19 * Locking functions for LVM.
20 * The main purpose of this part of the library is to serialise LVM
21 * management operations across a cluster.
22 */
23
24#include "lib.h"
25#include "clvm.h"
26#include "lvm-string.h"
27#include "locking.h"
28#include "locking_types.h"
29#include "toolcontext.h"
30
31#include <assert.h>
32#include <stddef.h>
33#include <sys/socket.h>
34#include <sys/un.h>
35#include <unistd.h>
36
37#ifndef CLUSTER_LOCKING_INTERNAL
38int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags);
39int query_resource(const char *resource, int *mode);
40void locking_end(void);
41int locking_init(int type, struct config_tree *cf, uint32_t *flags);
42#endif
43
44typedef struct lvm_response {
45	char node[255];
46	char *response;
47	int status;
48	int len;
49} lvm_response_t;
50
51/*
52 * This gets stuck at the start of memory we allocate so we
53 * can sanity-check it at deallocation time
54 */
55#define LVM_SIGNATURE 0x434C564D
56
57/*
58 * NOTE: the LVMD uses the socket FD as the client ID, this means
59 * that any client that calls fork() will inherit the context of
60 * it's parent.
61 */
62static int _clvmd_sock = -1;
63
64/* FIXME Install SIGPIPE handler? */
65
66/* Open connection to the Cluster Manager daemon */
67static int _open_local_sock(void)
68{
69	int local_socket;
70	struct sockaddr_un sockaddr;
71
72	/* Open local socket */
73	if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
74		log_error("Local socket creation failed: %s", strerror(errno));
75		return -1;
76	}
77
78	memset(&sockaddr, 0, sizeof(sockaddr));
79	memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
80
81	sockaddr.sun_family = AF_UNIX;
82
83	if (connect(local_socket,(struct sockaddr *) &sockaddr,
84		    sizeof(sockaddr))) {
85		int saved_errno = errno;
86
87		log_error("connect() failed on local socket: %s",
88			  strerror(errno));
89		if (close(local_socket))
90			stack;
91
92		errno = saved_errno;
93		return -1;
94	}
95
96	return local_socket;
97}
98
99/* Send a request and return the status */
100static int _send_request(char *inbuf, int inlen, char **retbuf)
101{
102	char outbuf[PIPE_BUF] __attribute((aligned(8)));
103	struct clvm_header *outheader = (struct clvm_header *) outbuf;
104	int len;
105	int off;
106	int buflen;
107	int err;
108
109	/* Send it to CLVMD */
110 rewrite:
111	if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) {
112		if (err == -1 && errno == EINTR)
113			goto rewrite;
114		log_error("Error writing data to clvmd: %s", strerror(errno));
115		return 0;
116	}
117
118	/* Get the response */
119 reread:
120	if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
121		if (errno == EINTR)
122			goto reread;
123		log_error("Error reading data from clvmd: %s", strerror(errno));
124		return 0;
125	}
126
127	if (len == 0) {
128		log_error("EOF reading CLVMD");
129		errno = ENOTCONN;
130		return 0;
131	}
132
133	/* Allocate buffer */
134	buflen = len + outheader->arglen;
135	*retbuf = dm_malloc(buflen);
136	if (!*retbuf) {
137		errno = ENOMEM;
138		return 0;
139	}
140
141	/* Copy the header */
142	memcpy(*retbuf, outbuf, len);
143	outheader = (struct clvm_header *) *retbuf;
144
145	/* Read the returned values */
146	off = 1;		/* we've already read the first byte */
147	while (off <= outheader->arglen && len > 0) {
148		len = read(_clvmd_sock, outheader->args + off,
149			   buflen - off - offsetof(struct clvm_header, args));
150		if (len > 0)
151			off += len;
152	}
153
154	/* Was it an error ? */
155	if (outheader->status != 0) {
156		errno = outheader->status;
157
158		/* Only return an error here if there are no node-specific
159		   errors present in the message that might have more detail */
160		if (!(outheader->flags & CLVMD_FLAG_NODEERRS)) {
161			log_error("cluster request failed: %s", strerror(errno));
162			return 0;
163		}
164
165	}
166
167	return 1;
168}
169
170/* Build the structure header and parse-out wildcard node names */
171/* FIXME: Cleanup implicit casts of clvmd_cmd (int, char, uint8_t, etc). */
172static void _build_header(struct clvm_header *head, int clvmd_cmd, const char *node,
173			  int len)
174{
175	head->cmd = clvmd_cmd;
176	head->status = 0;
177	head->flags = 0;
178	head->clientid = 0;
179	head->arglen = len;
180
181	if (node) {
182		/*
183		 * Allow a couple of special node names:
184		 * "*" for all nodes,
185		 * "." for the local node only
186		 */
187		if (strcmp(node, "*") == 0) {
188			head->node[0] = '\0';
189		} else if (strcmp(node, ".") == 0) {
190			head->node[0] = '\0';
191			head->flags = CLVMD_FLAG_LOCAL;
192		} else
193			strcpy(head->node, node);
194	} else
195		head->node[0] = '\0';
196}
197
198/*
199 * Send a message to a(or all) node(s) in the cluster and wait for replies
200 */
201static int _cluster_request(char clvmd_cmd, const char *node, void *data, int len,
202			   lvm_response_t ** response, int *num)
203{
204	char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1] __attribute((aligned(8)));
205	char *inptr;
206	char *retbuf = NULL;
207	int status;
208	int i;
209	int num_responses = 0;
210	struct clvm_header *head = (struct clvm_header *) outbuf;
211	lvm_response_t *rarray;
212
213	*num = 0;
214
215	if (_clvmd_sock == -1)
216		_clvmd_sock = _open_local_sock();
217
218	if (_clvmd_sock == -1)
219		return 0;
220
221	_build_header(head, clvmd_cmd, node, len);
222	memcpy(head->node + strlen(head->node) + 1, data, len);
223
224	status = _send_request(outbuf, sizeof(struct clvm_header) +
225			      strlen(head->node) + len, &retbuf);
226	if (!status)
227		goto out;
228
229	/* Count the number of responses we got */
230	head = (struct clvm_header *) retbuf;
231	inptr = head->args;
232	while (inptr[0]) {
233		num_responses++;
234		inptr += strlen(inptr) + 1;
235		inptr += sizeof(int);
236		inptr += strlen(inptr) + 1;
237	}
238
239	/*
240	 * Allocate response array.
241	 * With an extra pair of INTs on the front to sanity
242	 * check the pointer when we are given it back to free
243	 */
244	*response = dm_malloc(sizeof(lvm_response_t) * num_responses);
245	if (!*response) {
246		errno = ENOMEM;
247		status = 0;
248		goto out;
249	}
250
251	rarray = *response;
252
253	/* Unpack the response into an lvm_response_t array */
254	inptr = head->args;
255	i = 0;
256	while (inptr[0]) {
257		strcpy(rarray[i].node, inptr);
258		inptr += strlen(inptr) + 1;
259
260		memcpy(&rarray[i].status, inptr, sizeof(int));
261		inptr += sizeof(int);
262
263		rarray[i].response = dm_malloc(strlen(inptr) + 1);
264		if (rarray[i].response == NULL) {
265			/* Free up everything else and return error */
266			int j;
267			for (j = 0; j < i; j++)
268				dm_free(rarray[i].response);
269			free(*response);
270			errno = ENOMEM;
271			status = -1;
272			goto out;
273		}
274
275		strcpy(rarray[i].response, inptr);
276		rarray[i].len = strlen(inptr);
277		inptr += strlen(inptr) + 1;
278		i++;
279	}
280	*num = num_responses;
281	*response = rarray;
282
283      out:
284	if (retbuf)
285		dm_free(retbuf);
286
287	return status;
288}
289
290/* Free reply array */
291static int _cluster_free_request(lvm_response_t * response, int num)
292{
293	int i;
294
295	for (i = 0; i < num; i++) {
296		dm_free(response[i].response);
297	}
298
299	dm_free(response);
300
301	return 1;
302}
303
304static int _lock_for_cluster(struct cmd_context *cmd, unsigned char clvmd_cmd,
305			     uint32_t flags, const char *name)
306{
307	int status;
308	int i;
309	char *args;
310	const char *node = "";
311	int len;
312	int saved_errno = errno;
313	lvm_response_t *response = NULL;
314	int num_responses;
315
316	assert(name);
317
318	len = strlen(name) + 3;
319	args = alloca(len);
320	strcpy(args + 2, name);
321
322	args[0] = flags & 0x7F; /* Maskoff lock flags */
323	args[1] = flags & 0xC0; /* Bitmap flags */
324
325	if (mirror_in_sync())
326		args[1] |= LCK_MIRROR_NOSYNC_MODE;
327
328	if (dmeventd_monitor_mode())
329		args[1] |= LCK_DMEVENTD_MONITOR_MODE;
330
331	if (cmd->partial_activation)
332		args[1] |= LCK_PARTIAL_MODE;
333
334	/*
335	 * VG locks are just that: locks, and have no side effects
336	 * so we only need to do them on the local node because all
337	 * locks are cluster-wide.
338	 * Also, if the lock is exclusive it makes no sense to try to
339	 * acquire it on all nodes, so just do that on the local node too.
340	 * One exception, is that P_ locks /do/ get distributed across
341	 * the cluster because they might have side-effects.
342	 */
343	if (strncmp(name, "P_", 2) &&
344	    (clvmd_cmd == CLVMD_CMD_LOCK_VG ||
345	     (flags & LCK_TYPE_MASK) == LCK_EXCL ||
346	     (flags & LCK_LOCAL) ||
347	     !(flags & LCK_CLUSTER_VG)))
348		node = ".";
349
350	status = _cluster_request(clvmd_cmd, node, args, len,
351				  &response, &num_responses);
352
353	/* If any nodes were down then display them and return an error */
354	for (i = 0; i < num_responses; i++) {
355		if (response[i].status == EHOSTDOWN) {
356			log_error("clvmd not running on node %s",
357				  response[i].node);
358			status = 0;
359			errno = response[i].status;
360		} else if (response[i].status) {
361			log_error("Error locking on node %s: %s",
362				  response[i].node,
363				  response[i].response[0] ?
364				  	response[i].response :
365				  	strerror(response[i].status));
366			status = 0;
367			errno = response[i].status;
368		}
369	}
370
371	saved_errno = errno;
372	_cluster_free_request(response, num_responses);
373	errno = saved_errno;
374
375	return status;
376}
377
378/* API entry point for LVM */
379#ifdef CLUSTER_LOCKING_INTERNAL
380static int _lock_resource(struct cmd_context *cmd, const char *resource,
381			  uint32_t flags)
382#else
383int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags)
384#endif
385{
386	char lockname[PATH_MAX];
387	int clvmd_cmd = 0;
388	const char *lock_scope;
389	const char *lock_type = "";
390
391	assert(strlen(resource) < sizeof(lockname));
392	assert(resource);
393
394	switch (flags & LCK_SCOPE_MASK) {
395	case LCK_VG:
396		if (flags == LCK_VG_BACKUP) {
397			log_very_verbose("Requesting backup of VG metadata for %s",
398					 resource);
399			return _lock_for_cluster(cmd, CLVMD_CMD_VG_BACKUP,
400						 LCK_CLUSTER_VG, resource);
401		}
402
403		/* If the VG name is empty then lock the unused PVs */
404		if (*resource == '#' || (flags & LCK_CACHE))
405			dm_snprintf(lockname, sizeof(lockname), "P_%s",
406				    resource);
407		else
408			dm_snprintf(lockname, sizeof(lockname), "V_%s",
409				    resource);
410
411		lock_scope = "VG";
412		clvmd_cmd = CLVMD_CMD_LOCK_VG;
413		flags &= LCK_TYPE_MASK;
414		break;
415
416	case LCK_LV:
417		clvmd_cmd = CLVMD_CMD_LOCK_LV;
418		strcpy(lockname, resource);
419		lock_scope = "LV";
420		flags &= 0xffdf;	/* Mask off HOLD flag */
421		break;
422
423	default:
424		log_error("Unrecognised lock scope: %d",
425			  flags & LCK_SCOPE_MASK);
426		return 0;
427	}
428
429	switch(flags & LCK_TYPE_MASK) {
430	case LCK_UNLOCK:
431		lock_type = "UN";
432		break;
433	case LCK_NULL:
434		lock_type = "NL";
435		break;
436	case LCK_READ:
437		lock_type = "CR";
438		break;
439	case LCK_PREAD:
440		lock_type = "PR";
441		break;
442	case LCK_WRITE:
443		lock_type = "PW";
444		break;
445	case LCK_EXCL:
446		lock_type = "EX";
447		break;
448	default:
449		log_error("Unrecognised lock type: %u",
450			  flags & LCK_TYPE_MASK);
451		return 0;
452	}
453
454	log_very_verbose("Locking %s %s %s %s%s%s%s (0x%x)", lock_scope, lockname,
455			 lock_type,
456			 flags & LCK_NONBLOCK ? "" : "B",
457			 flags & LCK_HOLD ? "H" : "",
458			 flags & LCK_LOCAL ? "L" : "",
459			 flags & LCK_CLUSTER_VG ? "C" : "",
460			 flags);
461
462	/* Send a message to the cluster manager */
463	return _lock_for_cluster(cmd, clvmd_cmd, flags, lockname);
464}
465
466static int decode_lock_type(const char *response)
467{
468	if (!response)
469		return LCK_NULL;
470	else if (strcmp(response, "EX"))
471		return LCK_EXCL;
472	else if (strcmp(response, "CR"))
473		return LCK_READ;
474	else if (strcmp(response, "PR"))
475		return LCK_PREAD;
476
477	stack;
478	return 0;
479}
480
481#ifdef CLUSTER_LOCKING_INTERNAL
482static int _query_resource(const char *resource, int *mode)
483#else
484int query_resource(const char *resource, int *mode)
485#endif
486{
487	int i, status, len, num_responses, saved_errno;
488	const char *node = "";
489	char *args;
490	lvm_response_t *response = NULL;
491
492	saved_errno = errno;
493	len = strlen(resource) + 3;
494	args = alloca(len);
495	strcpy(args + 2, resource);
496
497	args[0] = 0;
498	args[1] = LCK_CLUSTER_VG;
499
500	status = _cluster_request(CLVMD_CMD_LOCK_QUERY, node, args, len,
501				  &response, &num_responses);
502	*mode = LCK_NULL;
503	for (i = 0; i < num_responses; i++) {
504		if (response[i].status == EHOSTDOWN)
505			continue;
506
507		if (!response[i].response[0])
508			continue;
509
510		/*
511		 * All nodes should use CR, or exactly one node
512		 * should held EX. (PR is obsolete)
513		 * If two nodes node reports different locks,
514		 * something is broken - just return more important mode.
515		 */
516		if (decode_lock_type(response[i].response) > *mode)
517			*mode = decode_lock_type(response[i].response);
518
519		log_debug("Lock held for %s, node %s : %s", resource,
520			  response[i].node, response[i].response);
521	}
522
523	_cluster_free_request(response, num_responses);
524	errno = saved_errno;
525
526	return status;
527}
528
529#ifdef CLUSTER_LOCKING_INTERNAL
530static void _locking_end(void)
531#else
532void locking_end(void)
533#endif
534{
535	if (_clvmd_sock != -1 && close(_clvmd_sock))
536		stack;
537
538	_clvmd_sock = -1;
539}
540
541#ifdef CLUSTER_LOCKING_INTERNAL
542static void _reset_locking(void)
543#else
544void reset_locking(void)
545#endif
546{
547	if (close(_clvmd_sock))
548		stack;
549
550	_clvmd_sock = _open_local_sock();
551	if (_clvmd_sock == -1)
552		stack;
553}
554
555#ifdef CLUSTER_LOCKING_INTERNAL
556int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd)
557{
558	locking->lock_resource = _lock_resource;
559	locking->query_resource = _query_resource;
560	locking->fin_locking = _locking_end;
561	locking->reset_locking = _reset_locking;
562	locking->flags = LCK_PRE_MEMLOCK | LCK_CLUSTERED;
563
564	_clvmd_sock = _open_local_sock();
565	if (_clvmd_sock == -1)
566		return 0;
567
568	return 1;
569}
570#else
571int locking_init(int type, struct config_tree *cf, uint32_t *flags)
572{
573	_clvmd_sock = _open_local_sock();
574	if (_clvmd_sock == -1)
575		return 0;
576
577	/* Ask LVM to lock memory before calling us */
578	*flags |= LCK_PRE_MEMLOCK;
579	*flags |= LCK_CLUSTERED;
580
581	return 1;
582}
583#endif
584