1/*	$NetBSD$	*/
2
3/*
4 *  Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved.
5 *  Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6 *
7 * This file is part of LVM2.
8 *
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
12 *
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17
18/*
19 * This provides the inter-clvmd communications for a system without CMAN.
20 * There is a listening TCP socket which accepts new connections in the
21 * normal way.
22 * It can also make outgoing connnections to the other clvmd nodes.
23 */
24
25#define _GNU_SOURCE
26#define _FILE_OFFSET_BITS 64
27
28#include <configure.h>
29#include <pthread.h>
30#include <sys/types.h>
31#include <sys/utsname.h>
32#include <sys/ioctl.h>
33#include <sys/socket.h>
34#include <sys/stat.h>
35#include <sys/socket.h>
36#include <netinet/in.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <stdint.h>
40#include <fcntl.h>
41#include <string.h>
42#include <stddef.h>
43#include <stdint.h>
44#include <unistd.h>
45#include <errno.h>
46#include <syslog.h>
47#include <netdb.h>
48#include <assert.h>
49#include <libdevmapper.h>
50
51#include "clvm.h"
52#include "clvmd-comms.h"
53#include "clvmd.h"
54#include "clvmd-gulm.h"
55
56#define DEFAULT_TCP_PORT 21064
57
58static int listen_fd = -1;
59static int tcp_port;
60struct dm_hash_table *sock_hash;
61
62static int get_our_ip_address(char *addr, int *family);
63static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid,
64			     struct local_client **new_client);
65
66/* Called by init_cluster() to open up the listening socket */
67int init_comms(unsigned short port)
68{
69    struct sockaddr_in6 addr;
70
71    sock_hash = dm_hash_create(100);
72    tcp_port = port ? : DEFAULT_TCP_PORT;
73
74    listen_fd = socket(AF_INET6, SOCK_STREAM, 0);
75
76    if (listen_fd < 0)
77    {
78	return -1;
79    }
80    else
81    {
82	int one = 1;
83	setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int));
84	setsockopt(listen_fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
85    }
86
87    memset(&addr, 0, sizeof(addr)); // Bind to INADDR_ANY
88    addr.sin6_family = AF_INET6;
89    addr.sin6_port = htons(tcp_port);
90
91    if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
92    {
93	DEBUGLOG("Can't bind to port: %s\n", strerror(errno));
94	syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port);
95	close(listen_fd);
96	return -1;
97    }
98
99    listen(listen_fd, 5);
100
101    /* Set Close-on-exec */
102    fcntl(listen_fd, F_SETFD, 1);
103
104    return 0;
105}
106
107void tcp_remove_client(const char *c_csid)
108{
109    struct local_client *client;
110    char csid[GULM_MAX_CSID_LEN];
111    unsigned int i;
112    memcpy(csid, c_csid, sizeof csid);
113    DEBUGLOG("tcp_remove_client\n");
114
115    /* Don't actually close the socket here - that's the
116       job of clvmd.c whch will do the job when it notices the
117       other end has gone. We just need to remove the client(s) from
118       the hash table so we don't try to use it for sending any more */
119    for (i = 0; i < 2; i++)
120    {
121	client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
122	if (client)
123	{
124	    dm_hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
125	    client->removeme = 1;
126	    close(client->fd);
127	}
128	/* Look for a mangled one too, on the 2nd iteration. */
129	csid[0] ^= 0x80;
130    }
131}
132
133int alloc_client(int fd, const char *c_csid, struct local_client **new_client)
134{
135    struct local_client *client;
136    char csid[GULM_MAX_CSID_LEN];
137    memcpy(csid, c_csid, sizeof csid);
138
139    DEBUGLOG("alloc_client %d csid = %s\n", fd, print_csid(csid));
140
141    /* Create a local_client and return it */
142    client = malloc(sizeof(struct local_client));
143    if (!client)
144    {
145	DEBUGLOG("malloc failed\n");
146	return -1;
147    }
148
149    memset(client, 0, sizeof(struct local_client));
150    client->fd = fd;
151    client->type = CLUSTER_DATA_SOCK;
152    client->callback = read_from_tcpsock;
153    if (new_client)
154	*new_client = client;
155
156    /* Add to our list of node sockets */
157    if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
158    {
159	DEBUGLOG("alloc_client mangling CSID for second connection\n");
160	/* This is a duplicate connection but we can't close it because
161	   the other end may already have started sending.
162	   So, we mangle the IP address and keep it, all sending will
163	   go out of the main FD
164	*/
165	csid[0] ^= 0x80;
166	client->bits.net.flags = 1; /* indicate mangled CSID */
167
168        /* If it still exists then kill the connection as we should only
169           ever have one incoming connection from each node */
170        if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
171        {
172	    DEBUGLOG("Multiple incoming connections from node\n");
173            syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]);
174
175	    free(client);
176            errno = ECONNREFUSED;
177            return -1;
178        }
179    }
180    dm_hash_insert_binary(sock_hash, csid, GULM_MAX_CSID_LEN, client);
181
182    return 0;
183}
184
185int get_main_gulm_cluster_fd()
186{
187    return listen_fd;
188}
189
190
191/* Read on main comms (listen) socket, accept it */
192int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid,
193			struct local_client **new_client)
194{
195    int newfd;
196    struct sockaddr_in6 addr;
197    socklen_t addrlen = sizeof(addr);
198    int status;
199    char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN];
200
201    DEBUGLOG("cluster_fd_callback\n");
202    *new_client = NULL;
203    newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
204
205    DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno);
206    if (!newfd)
207    {
208	syslog(LOG_ERR, "error in accept: %m");
209	errno = EAGAIN;
210	return -1; /* Don't return an error or clvmd will close the listening FD */
211    }
212
213    /* Check that the client is a member of the cluster
214       and reject if not.
215    */
216    if (gulm_name_from_csid((char *)&addr.sin6_addr, name) < 0)
217    {
218	syslog(LOG_ERR, "Got connect from non-cluster node %s\n",
219	       print_csid((char *)&addr.sin6_addr));
220	DEBUGLOG("Got connect from non-cluster node %s\n",
221		 print_csid((char *)&addr.sin6_addr));
222	close(newfd);
223
224	errno = EAGAIN;
225	return -1;
226    }
227
228    status = alloc_client(newfd, (char *)&addr.sin6_addr, new_client);
229    if (status)
230    {
231	DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status);
232	close(newfd);
233	/* See above... */
234	errno = EAGAIN;
235	return -1;
236    }
237    DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client);
238    return newfd;
239}
240
241/* Try to get at least 'len' bytes from the socket */
242static int really_read(int fd, char *buf, int len)
243{
244	int got, offset;
245
246	got = offset = 0;
247
248	do {
249		got = read(fd, buf+offset, len-offset);
250		DEBUGLOG("really_read. got %d bytes\n", got);
251		offset += got;
252	} while (got > 0 && offset < len);
253
254	if (got < 0)
255		return got;
256	else
257		return offset;
258}
259
260
261static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid,
262			     struct local_client **new_client)
263{
264    struct sockaddr_in6 addr;
265    socklen_t slen = sizeof(addr);
266    struct clvm_header *header = (struct clvm_header *)buf;
267    int status;
268    uint32_t arglen;
269
270    DEBUGLOG("read_from_tcpsock fd %d\n", client->fd);
271    *new_client = NULL;
272
273    /* Get "csid" */
274    getpeername(client->fd, (struct sockaddr *)&addr, &slen);
275    memcpy(csid, &addr.sin6_addr, GULM_MAX_CSID_LEN);
276
277    /* Read just the header first, then get the rest if there is any.
278     * Stream sockets, sigh.
279     */
280    status = really_read(client->fd, buf, sizeof(struct clvm_header));
281    if (status > 0)
282    {
283	    int status2;
284
285	    arglen = ntohl(header->arglen);
286
287	    /* Get the rest */
288	    if (arglen && arglen < GULM_MAX_CLUSTER_MESSAGE)
289	    {
290		    status2 = really_read(client->fd, buf+status, arglen);
291		    if (status2 > 0)
292			    status += status2;
293		    else
294			    status = status2;
295	    }
296    }
297
298    DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno);
299
300    /* Remove it from the hash table if there's an error, clvmd will
301       remove the socket from its lists and free the client struct */
302    if (status == 0 ||
303	(status < 0 && errno != EAGAIN && errno != EINTR))
304    {
305	char remcsid[GULM_MAX_CSID_LEN];
306
307	memcpy(remcsid, csid, GULM_MAX_CSID_LEN);
308	close(client->fd);
309
310	/* If the csid was mangled, then make sure we remove the right entry */
311	if (client->bits.net.flags)
312	    remcsid[0] ^= 0x80;
313	dm_hash_remove_binary(sock_hash, remcsid, GULM_MAX_CSID_LEN);
314
315	/* Tell cluster manager layer */
316	add_down_node(remcsid);
317    }
318    else {
319	    gulm_add_up_node(csid);
320	    /* Send it back to clvmd */
321	    process_message(client, buf, status, csid);
322    }
323    return status;
324}
325
326int gulm_connect_csid(const char *csid, struct local_client **newclient)
327{
328    int fd;
329    struct sockaddr_in6 addr;
330    int status;
331    int one = 1;
332
333    DEBUGLOG("Connecting socket\n");
334    fd = socket(PF_INET6, SOCK_STREAM, 0);
335
336    if (fd < 0)
337    {
338	syslog(LOG_ERR, "Unable to create new socket: %m");
339	return -1;
340    }
341
342    addr.sin6_family = AF_INET6;
343    memcpy(&addr.sin6_addr, csid, GULM_MAX_CSID_LEN);
344    addr.sin6_port = htons(tcp_port);
345
346    DEBUGLOG("Connecting socket %d\n", fd);
347    if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in6)) < 0)
348    {
349	/* "Connection refused" is "normal" because clvmd may not yet be running
350	 * on that node.
351	 */
352	if (errno != ECONNREFUSED)
353	{
354	    syslog(LOG_ERR, "Unable to connect to remote node: %m");
355	}
356	DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno));
357	close(fd);
358	return -1;
359    }
360
361    /* Set Close-on-exec */
362    fcntl(fd, F_SETFD, 1);
363    setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
364
365    status = alloc_client(fd, csid, newclient);
366    if (status)
367	close(fd);
368    else
369	add_client(*newclient);
370
371    /* If we can connect to it, it must be running a clvmd */
372    gulm_add_up_node(csid);
373    return status;
374}
375
376/* Send a message to a known CSID */
377static int tcp_send_message(void *buf, int msglen, const char *csid, const char *errtext)
378{
379    int status;
380    struct local_client *client;
381    char ourcsid[GULM_MAX_CSID_LEN];
382
383    assert(csid);
384
385    DEBUGLOG("tcp_send_message, csid = %s, msglen = %d\n", print_csid(csid), msglen);
386
387    /* Don't connect to ourself */
388    get_our_gulm_csid(ourcsid);
389    if (memcmp(csid, ourcsid, GULM_MAX_CSID_LEN) == 0)
390	return msglen;
391
392    client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
393    if (!client)
394    {
395	status = gulm_connect_csid(csid, &client);
396	if (status)
397	    return -1;
398    }
399    DEBUGLOG("tcp_send_message, fd = %d\n", client->fd);
400
401    return write(client->fd, buf, msglen);
402}
403
404
405int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext)
406{
407    int status=0;
408
409    DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen);
410
411    /* If csid is NULL then send to all known (not just connected) nodes */
412    if (!csid)
413    {
414	void *context = NULL;
415	char loop_csid[GULM_MAX_CSID_LEN];
416
417	/* Loop round all gulm-known nodes */
418	while (get_next_node_csid(&context, loop_csid))
419	{
420	    status = tcp_send_message(buf, msglen, loop_csid, errtext);
421	    if (status == 0 ||
422		(status < 0 && (errno == EAGAIN || errno == EINTR)))
423		break;
424	}
425    }
426    else
427    {
428
429	status = tcp_send_message(buf, msglen, csid, errtext);
430    }
431    return status;
432}
433
434/* To get our own IP address we get the locally bound address of the
435   socket that's talking to GULM in the assumption(eek) that it will
436   be on the "right" network in a multi-homed system */
437static int get_our_ip_address(char *addr, int *family)
438{
439	struct utsname info;
440
441	uname(&info);
442	get_ip_address(info.nodename, addr);
443
444	return 0;
445}
446
447/* Public version of above for those that don't care what protocol
448   we're using */
449void get_our_gulm_csid(char *csid)
450{
451    static char our_csid[GULM_MAX_CSID_LEN];
452    static int got_csid = 0;
453
454    if (!got_csid)
455    {
456	int family;
457
458	memset(our_csid, 0, sizeof(our_csid));
459	if (get_our_ip_address(our_csid, &family))
460	{
461	    got_csid = 1;
462	}
463    }
464    memcpy(csid, our_csid, GULM_MAX_CSID_LEN);
465}
466
467static void map_v4_to_v6(struct in_addr *ip4, struct in6_addr *ip6)
468{
469   ip6->s6_addr32[0] = 0;
470   ip6->s6_addr32[1] = 0;
471   ip6->s6_addr32[2] = htonl(0xffff);
472   ip6->s6_addr32[3] = ip4->s_addr;
473}
474
475/* Get someone else's IP address from DNS */
476int get_ip_address(const char *node, char *addr)
477{
478    struct hostent *he;
479
480    memset(addr, 0, GULM_MAX_CSID_LEN);
481
482    // TODO: what do we do about multi-homed hosts ???
483    // CCSs ip_interfaces solved this but some bugger removed it.
484
485    /* Try IPv6 first. The man page for gethostbyname implies that
486       it will lookup ip6 & ip4 names, but it seems not to */
487    he = gethostbyname2(node, AF_INET6);
488    if (he)
489    {
490	memcpy(addr, he->h_addr_list[0],
491	       he->h_length);
492    }
493    else
494    {
495	he = gethostbyname2(node, AF_INET);
496	if (!he)
497	    return -1;
498	map_v4_to_v6((struct in_addr *)he->h_addr_list[0], (struct in6_addr *)addr);
499    }
500
501    return 0;
502}
503
504char *print_csid(const char *csid)
505{
506    static char buf[128];
507    int *icsid = (int *)csid;
508
509    sprintf(buf, "[%x.%x.%x.%x]",
510	    icsid[0],icsid[1],icsid[2],icsid[3]);
511
512    return buf;
513}
514