1/*	$NetBSD$	*/
2
3/*
4 * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of LVM2.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU General Public License v.2.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17
18/*
19 * CMAN communication layer for clvmd.
20 */
21
22#define _GNU_SOURCE
23#define _FILE_OFFSET_BITS 64
24
25#include <configure.h>
26#include <pthread.h>
27#include <sys/types.h>
28#include <sys/stat.h>
29#include <sys/socket.h>
30#include <sys/uio.h>
31#include <sys/un.h>
32#include <sys/time.h>
33#include <sys/ioctl.h>
34#include <sys/utsname.h>
35#include <syslog.h>
36#include <netinet/in.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <stddef.h>
40#include <signal.h>
41#include <unistd.h>
42#include <fcntl.h>
43#include <getopt.h>
44#include <errno.h>
45#include <libdevmapper.h>
46#include <libdlm.h>
47
48#include "clvmd-comms.h"
49#include "clvm.h"
50#include "lvm-logging.h"
51#include "clvmd.h"
52#include "lvm-functions.h"
53
54#define LOCKSPACE_NAME "clvmd"
55
56struct clvmd_node
57{
58	struct cman_node *node;
59	int clvmd_up;
60};
61
62static int num_nodes;
63static struct cman_node *nodes = NULL;
64static struct cman_node this_node;
65static int count_nodes; /* size of allocated nodes array */
66static struct dm_hash_table *node_updown_hash;
67static dlm_lshandle_t *lockspace;
68static cman_handle_t c_handle;
69
70static void count_clvmds_running(void);
71static void get_members(void);
72static int nodeid_from_csid(const char *csid);
73static int name_from_nodeid(int nodeid, char *name);
74static void event_callback(cman_handle_t handle, void *private, int reason, int arg);
75static void data_callback(cman_handle_t handle, void *private,
76			  char *buf, int len, uint8_t port, int nodeid);
77
78struct lock_wait {
79	pthread_cond_t cond;
80	pthread_mutex_t mutex;
81	struct dlm_lksb lksb;
82};
83
84static int _init_cluster(void)
85{
86	node_updown_hash = dm_hash_create(100);
87
88	/* Open the cluster communication socket */
89	c_handle = cman_init(NULL);
90	if (!c_handle) {
91		syslog(LOG_ERR, "Can't open cluster manager socket: %m");
92		return -1;
93	}
94	DEBUGLOG("Connected to CMAN\n");
95
96	if (cman_start_recv_data(c_handle, data_callback, CLUSTER_PORT_CLVMD)) {
97		syslog(LOG_ERR, "Can't bind cluster socket: %m");
98		return -1;
99	}
100
101	if (cman_start_notification(c_handle, event_callback)) {
102		syslog(LOG_ERR, "Can't start cluster event listening");
103		return -1;
104	}
105
106	/* Get the cluster members list */
107	get_members();
108	count_clvmds_running();
109
110	DEBUGLOG("CMAN initialisation complete\n");
111
112	/* Create a lockspace for LV & VG locks to live in */
113	lockspace = dlm_create_lockspace(LOCKSPACE_NAME, 0600);
114	if (!lockspace) {
115		if (errno == EEXIST) {
116			lockspace = dlm_open_lockspace(LOCKSPACE_NAME);
117		}
118		if (!lockspace) {
119			syslog(LOG_ERR, "Unable to create lockspace for CLVM: %m");
120			return -1;
121		}
122	}
123	dlm_ls_pthread_init(lockspace);
124	DEBUGLOG("DLM initialisation complete\n");
125	return 0;
126}
127
128static void _cluster_init_completed(void)
129{
130	clvmd_cluster_init_completed();
131}
132
133static int _get_main_cluster_fd()
134{
135	return cman_get_fd(c_handle);
136}
137
138static int _get_num_nodes()
139{
140	int i;
141	int nnodes = 0;
142
143	/* return number of ACTIVE nodes */
144	for (i=0; i<num_nodes; i++) {
145		if (nodes[i].cn_member && nodes[i].cn_nodeid)
146			nnodes++;
147	}
148	return nnodes;
149}
150
151/* send_message with the fd check removed */
152static int _cluster_send_message(const void *buf, int msglen, const char *csid,
153				 const char *errtext)
154{
155	int nodeid = 0;
156
157	if (csid)
158		memcpy(&nodeid, csid, CMAN_MAX_CSID_LEN);
159
160	if (cman_send_data(c_handle, buf, msglen, 0, CLUSTER_PORT_CLVMD, nodeid) <= 0)
161	{
162		log_error("%s", errtext);
163	}
164	return msglen;
165}
166
167static void _get_our_csid(char *csid)
168{
169	if (this_node.cn_nodeid == 0) {
170		cman_get_node(c_handle, 0, &this_node);
171	}
172	memcpy(csid, &this_node.cn_nodeid, CMAN_MAX_CSID_LEN);
173}
174
175/* Call a callback routine for each node is that known (down means not running a clvmd) */
176static int _cluster_do_node_callback(struct local_client *client,
177				     void (*callback) (struct local_client *,
178						       const char *,
179						       int))
180{
181	int i;
182	int somedown = 0;
183
184	for (i = 0; i < _get_num_nodes(); i++) {
185		if (nodes[i].cn_member && nodes[i].cn_nodeid) {
186			int up = (int)(long)dm_hash_lookup_binary(node_updown_hash, (char *)&nodes[i].cn_nodeid, sizeof(int));
187
188			callback(client, (char *)&nodes[i].cn_nodeid, up);
189			if (!up)
190				somedown = -1;
191		}
192	}
193	return somedown;
194}
195
196/* Process OOB messages from the cluster socket */
197static void event_callback(cman_handle_t handle, void *private, int reason, int arg)
198{
199	char namebuf[MAX_CLUSTER_MEMBER_NAME_LEN];
200
201	switch (reason) {
202        case CMAN_REASON_PORTCLOSED:
203		name_from_nodeid(arg, namebuf);
204		log_notice("clvmd on node %s has died\n", namebuf);
205		DEBUGLOG("Got port closed message, removing node %s\n", namebuf);
206
207		dm_hash_insert_binary(node_updown_hash, (char *)&arg, sizeof(int), (void *)0);
208		break;
209
210	case CMAN_REASON_STATECHANGE:
211		DEBUGLOG("Got state change message, re-reading members list\n");
212		get_members();
213		break;
214
215#if defined(LIBCMAN_VERSION) && LIBCMAN_VERSION >= 2
216	case CMAN_REASON_PORTOPENED:
217		/* Ignore this, wait for startup message from clvmd itself */
218		break;
219
220	case CMAN_REASON_TRY_SHUTDOWN:
221		DEBUGLOG("Got try shutdown, sending OK\n");
222		cman_replyto_shutdown(c_handle, 1);
223		break;
224#endif
225	default:
226		/* ERROR */
227		DEBUGLOG("Got unknown event callback message: %d\n", reason);
228		break;
229	}
230}
231
232static struct local_client *cman_client;
233static int _cluster_fd_callback(struct local_client *fd, char *buf, int len,
234				const char *csid,
235				struct local_client **new_client)
236{
237
238	/* Save this for data_callback */
239	cman_client = fd;
240
241	/* We never return a new client */
242	*new_client = NULL;
243
244	return cman_dispatch(c_handle, 0);
245}
246
247
248static void data_callback(cman_handle_t handle, void *private,
249			  char *buf, int len, uint8_t port, int nodeid)
250{
251	/* Ignore looped back messages */
252	if (nodeid == this_node.cn_nodeid)
253		return;
254	process_message(cman_client, buf, len, (char *)&nodeid);
255}
256
257static void _add_up_node(const char *csid)
258{
259	/* It's up ! */
260	int nodeid = nodeid_from_csid(csid);
261
262	dm_hash_insert_binary(node_updown_hash, (char *)&nodeid, sizeof(int), (void *)1);
263	DEBUGLOG("Added new node %d to updown list\n", nodeid);
264}
265
266static void _cluster_closedown()
267{
268	destroy_lvhash();
269	dlm_release_lockspace(LOCKSPACE_NAME, lockspace, 1);
270	cman_finish(c_handle);
271}
272
273static int is_listening(int nodeid)
274{
275	int status;
276
277	do {
278		status = cman_is_listening(c_handle, nodeid, CLUSTER_PORT_CLVMD);
279		if (status < 0 && errno == EBUSY) {	/* Don't busywait */
280			sleep(1);
281			errno = EBUSY;	/* In case sleep trashes it */
282		}
283	}
284	while (status < 0 && errno == EBUSY);
285
286	return status;
287}
288
289/* Populate the list of CLVMDs running.
290   called only at startup time */
291static void count_clvmds_running(void)
292{
293	int i;
294
295	for (i = 0; i < num_nodes; i++) {
296		int nodeid = nodes[i].cn_nodeid;
297
298		if (is_listening(nodeid) == 1)
299			dm_hash_insert_binary(node_updown_hash, (void *)&nodeid, sizeof(int), (void*)1);
300		else
301			dm_hash_insert_binary(node_updown_hash, (void *)&nodeid, sizeof(int), (void*)0);
302	}
303}
304
305/* Get a list of active cluster members */
306static void get_members()
307{
308	int retnodes;
309	int status;
310	int i;
311	int high_nodeid = 0;
312
313	num_nodes = cman_get_node_count(c_handle);
314	if (num_nodes == -1) {
315		log_error("Unable to get node count");
316		return;
317	}
318
319	/* Not enough room for new nodes list ? */
320	if (num_nodes > count_nodes && nodes) {
321		free(nodes);
322		nodes = NULL;
323	}
324
325	if (nodes == NULL) {
326		count_nodes = num_nodes + 10; /* Overallocate a little */
327		nodes = malloc(count_nodes * sizeof(struct cman_node));
328		if (!nodes) {
329			log_error("Unable to allocate nodes array\n");
330			exit(5);
331		}
332	}
333
334	status = cman_get_nodes(c_handle, count_nodes, &retnodes, nodes);
335	if (status < 0) {
336		log_error("Unable to get node details");
337		exit(6);
338	}
339
340	/* Get the highest nodeid */
341	for (i=0; i<retnodes; i++) {
342		if (nodes[i].cn_nodeid > high_nodeid)
343			high_nodeid = nodes[i].cn_nodeid;
344	}
345}
346
347
348/* Convert a node name to a CSID */
349static int _csid_from_name(char *csid, const char *name)
350{
351	int i;
352
353	for (i = 0; i < num_nodes; i++) {
354		if (strcmp(name, nodes[i].cn_name) == 0) {
355			memcpy(csid, &nodes[i].cn_nodeid, CMAN_MAX_CSID_LEN);
356			return 0;
357		}
358	}
359	return -1;
360}
361
362/* Convert a CSID to a node name */
363static int _name_from_csid(const char *csid, char *name)
364{
365	int i;
366
367	for (i = 0; i < num_nodes; i++) {
368		if (memcmp(csid, &nodes[i].cn_nodeid, CMAN_MAX_CSID_LEN) == 0) {
369			strcpy(name, nodes[i].cn_name);
370			return 0;
371		}
372	}
373	/* Who?? */
374	strcpy(name, "Unknown");
375	return -1;
376}
377
378/* Convert a node ID to a node name */
379static int name_from_nodeid(int nodeid, char *name)
380{
381	int i;
382
383	for (i = 0; i < num_nodes; i++) {
384		if (nodeid == nodes[i].cn_nodeid) {
385			strcpy(name, nodes[i].cn_name);
386			return 0;
387		}
388	}
389	/* Who?? */
390	strcpy(name, "Unknown");
391	return -1;
392}
393
394/* Convert a CSID to a node ID */
395static int nodeid_from_csid(const char *csid)
396{
397        int nodeid;
398
399	memcpy(&nodeid, csid, CMAN_MAX_CSID_LEN);
400
401	return nodeid;
402}
403
404static int _is_quorate()
405{
406	return cman_is_quorate(c_handle);
407}
408
409static void sync_ast_routine(void *arg)
410{
411	struct lock_wait *lwait = arg;
412
413	pthread_mutex_lock(&lwait->mutex);
414	pthread_cond_signal(&lwait->cond);
415	pthread_mutex_unlock(&lwait->mutex);
416}
417
418static int _sync_lock(const char *resource, int mode, int flags, int *lockid)
419{
420	int status;
421	struct lock_wait lwait;
422
423	if (!lockid) {
424		errno = EINVAL;
425		return -1;
426	}
427
428	DEBUGLOG("sync_lock: '%s' mode:%d flags=%d\n", resource,mode,flags);
429	/* Conversions need the lockid in the LKSB */
430	if (flags & LKF_CONVERT)
431		lwait.lksb.sb_lkid = *lockid;
432
433	pthread_cond_init(&lwait.cond, NULL);
434	pthread_mutex_init(&lwait.mutex, NULL);
435	pthread_mutex_lock(&lwait.mutex);
436
437	status = dlm_ls_lock(lockspace,
438			     mode,
439			     &lwait.lksb,
440			     flags,
441			     resource,
442			     strlen(resource),
443			     0, sync_ast_routine, &lwait, NULL, NULL);
444	if (status)
445		return status;
446
447	/* Wait for it to complete */
448	pthread_cond_wait(&lwait.cond, &lwait.mutex);
449	pthread_mutex_unlock(&lwait.mutex);
450
451	*lockid = lwait.lksb.sb_lkid;
452
453	errno = lwait.lksb.sb_status;
454	DEBUGLOG("sync_lock: returning lkid %x\n", *lockid);
455	if (lwait.lksb.sb_status)
456		return -1;
457	else
458		return 0;
459}
460
461static int _sync_unlock(const char *resource /* UNUSED */, int lockid)
462{
463	int status;
464	struct lock_wait lwait;
465
466	DEBUGLOG("sync_unlock: '%s' lkid:%x\n", resource, lockid);
467
468	pthread_cond_init(&lwait.cond, NULL);
469	pthread_mutex_init(&lwait.mutex, NULL);
470	pthread_mutex_lock(&lwait.mutex);
471
472	status = dlm_ls_unlock(lockspace, lockid, 0, &lwait.lksb, &lwait);
473
474	if (status)
475		return status;
476
477	/* Wait for it to complete */
478	pthread_cond_wait(&lwait.cond, &lwait.mutex);
479	pthread_mutex_unlock(&lwait.mutex);
480
481	errno = lwait.lksb.sb_status;
482	if (lwait.lksb.sb_status != EUNLOCK)
483		return -1;
484	else
485		return 0;
486
487}
488
489static int _get_cluster_name(char *buf, int buflen)
490{
491	cman_cluster_t cluster_info;
492	int status;
493
494	status = cman_get_cluster(c_handle, &cluster_info);
495	if (!status) {
496		strncpy(buf, cluster_info.ci_name, buflen);
497	}
498	return status;
499}
500
501static struct cluster_ops _cluster_cman_ops = {
502	.cluster_init_completed   = _cluster_init_completed,
503	.cluster_send_message     = _cluster_send_message,
504	.name_from_csid           = _name_from_csid,
505	.csid_from_name           = _csid_from_name,
506	.get_num_nodes            = _get_num_nodes,
507	.cluster_fd_callback      = _cluster_fd_callback,
508	.get_main_cluster_fd      = _get_main_cluster_fd,
509	.cluster_do_node_callback = _cluster_do_node_callback,
510	.is_quorate               = _is_quorate,
511	.get_our_csid             = _get_our_csid,
512	.add_up_node              = _add_up_node,
513	.cluster_closedown        = _cluster_closedown,
514	.get_cluster_name         = _get_cluster_name,
515	.sync_lock                = _sync_lock,
516	.sync_unlock              = _sync_unlock,
517};
518
519struct cluster_ops *init_cman_cluster(void)
520{
521	if (!_init_cluster())
522		return &_cluster_cman_ops;
523	else
524		return NULL;
525}
526