mdmn_subr.c revision 11053:f33a1c7f3155
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <unistd.h>
28#include <sys/types.h>
29#include <sys/socket.h>
30#include <netinet/in.h>
31#include <arpa/inet.h>
32#include <thread.h>
33#include "meta.h"
34#include "mdmn_subr.h"
35
36extern int mdmn_init_set(set_t setno, int todo);
37
38uint_t mdmn_busy[MD_MAXSETS][MD_MN_NCLASSES];
39mutex_t	mdmn_busy_mutex[MD_MAXSETS];
40cond_t	mdmn_busy_cv[MD_MAXSETS];
41
42
43/* the wakeup table for the initiator's side */
44mdmn_wti_t mdmn_initiator_table[MD_MAXSETS][MD_MN_NCLASSES];
45
46/* the wakeup table for the master */
47mdmn_wtm_t mdmn_master_table[MD_MAXSETS][MD_MN_NCLASSES];
48
49/* List of licensed ip addresses */
50licensed_ip_t   licensed_nodes[NNODES];
51
52/* speed up the search for licensed ip addresses */
53md_mn_nodeid_t maxlicnodes = 0; /* 0 is not a valid node ID */
54
55/*
56 * Check if a given set/class combination is currently in use
57 * If in use, returns TRUE
58 * Otherwise returns FALSE
59 *
60 * Must be called with mdmn_busy_mutex held
61 */
62bool_t
63mdmn_is_class_busy(set_t setno, md_mn_msgclass_t class)
64{
65	if (mdmn_busy[setno][class] & MDMN_BUSY) {
66		return (TRUE);
67	} else {
68		return (FALSE);
69	}
70}
71
72/*
73 * Mark a given set/class combination as currently in use
74 * If the class was already in use, returns FALSE
75 * Otherwise returns TRUE
76 *
77 * So mdmn_mark_class_busy can be used like
78 * if (mdmn_mark_class_busy(setno, class) == FALSE)
79 * 	failure;
80 * else
81 *	success;
82 *
83 * Must be called with mdmn_busy_mutex held
84 */
85bool_t
86mdmn_mark_class_busy(set_t setno, md_mn_msgclass_t class)
87{
88	if (mdmn_busy[setno][class] & MDMN_BUSY) {
89		return (FALSE);
90	} else {
91		mdmn_busy[setno][class] |= MDMN_BUSY;
92		commd_debug(MD_MMV_MISC, "busy: set=%d, class=%d\n",
93		    setno, class);
94		return (TRUE);
95	}
96}
97
98/*
99 * Mark a given set/class combination as currently available
100 * Always succeeds, thus void.
101 *
102 * If this class is marked MDMN_SUSPEND_ALL, we are in the middle of
103 * draining all classes of this set.
104 * We have to mark class+1 as MDMN_SUSPEND_ALL too.
105 * If class+2 wasn't busy, we proceed with class+2, and so on
106 * If any class is busy, we return.
107 * Then the drain process will be continued by the mdmn_mark_class_unbusy() of
108 * that busy class
109 */
110void
111mdmn_mark_class_unbusy(set_t setno, md_mn_msgclass_t class)
112{
113	commd_debug(MD_MMV_MISC, "unbusy: set=%d, class=%d\n", setno, class);
114	mdmn_busy[setno][class] &= ~MDMN_BUSY;
115	/* something changed, inform threads waiting for that */
116	(void) cond_signal(&mdmn_busy_cv[setno]);
117
118	if ((mdmn_busy[setno][class] & MDMN_SUSPEND_ALL) == 0) {
119		return;
120	}
121
122	while (++class < MD_MN_NCLASSES) {
123		commd_debug(MD_MMV_MISC,
124		    "unbusy: suspending set=%d, class=%d\n", setno, class);
125		if (mdmn_mark_class_suspended(setno, class, MDMN_SUSPEND_ALL)
126		    == MDMNE_SET_NOT_DRAINED) {
127			break;
128		}
129	}
130
131}
132
133
134/*
135 * Check if a given set/class combination is locked.
136 */
137bool_t
138mdmn_is_class_locked(set_t setno, md_mn_msgclass_t class)
139{
140	if (mdmn_busy[setno][class] & MDMN_LOCKED) {
141		return (TRUE);
142	} else {
143		return (FALSE);
144	}
145}
146
147/*
148 * Mark a given set/class combination as locked.
149 * No checking is done here, so routine can be void.
150 * Locking a locked set/class is ok.
151 *
152 * Must be called with mdmn_busy_mutex held
153 */
154void
155mdmn_mark_class_locked(set_t setno, md_mn_msgclass_t class)
156{
157	mdmn_busy[setno][class] |= MDMN_LOCKED;
158}
159
160/*
161 * Mark a given set/class combination as unlocked.
162 * No checking is done here, so routine can be void.
163 * Unlocking a unlocked set/class is ok.
164 *
165 * Must be called with mdmn_busy_mutex held
166 */
167void
168mdmn_mark_class_unlocked(set_t setno, md_mn_msgclass_t class)
169{
170	mdmn_busy[setno][class] &= ~MDMN_LOCKED;
171}
172
173/*
174 * Suspend a set/class combination
175 *
176 * If called during draining all classes of a set susptype is MDMN_SUSPEND_ALL.
177 * If only one class is about to be drained susptype is MDMN_SUSPEND_1.
178 *
179 * Returns:
180 *	MDMNE_ACK if there are no outstanding messages
181 *	MDMNE_SET_NOT_DRAINED otherwise
182 *
183 * Must be called with mdmn_busy_mutex held for this set.
184 */
185int
186mdmn_mark_class_suspended(set_t setno, md_mn_msgclass_t class, uint_t susptype)
187{
188	/*
189	 * We use the mdmn_busy array to mark this set is suspended.
190	 */
191	mdmn_busy[setno][class] |= susptype;
192
193	/*
194	 * If there are outstanding messages for this set/class we
195	 * return MDMNE_SET_NOT_DRAINED, otherwise we return MDMNE_ACK
196	 */
197	if (mdmn_is_class_busy(setno, class) == TRUE) {
198		return (MDMNE_SET_NOT_DRAINED);
199	}
200	return (MDMNE_ACK);
201}
202
203/*
204 * Resume operation for a set/class combination after it was
205 * previously suspended
206 *
207 * If called from mdmn_comm_resume_svc_1 to resume _one_ specific class
208 * then susptype will be MDMN_SUSPEND_1
209 * Otherwise to resume all classes of one set,
210 * then susptype equals (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)
211 *
212 * Always succeeds, thus void.
213 *
214 * Must be called with mdmn_busy_mutex held for this set.
215 */
216void
217mdmn_mark_class_resumed(set_t setno, md_mn_msgclass_t class, uint_t susptype)
218{
219	/* simply the reverse operation to mdmn_mark_set_drained() */
220	mdmn_busy[setno][class] &= ~susptype;
221}
222
223/*
224 * Check if a drain command was issued for this set/class combination.
225 *
226 * Must be called with mdmn_busy_mutex held for this set.
227 */
228bool_t
229mdmn_is_class_suspended(set_t setno, md_mn_msgclass_t class)
230{
231	if (mdmn_busy[setno][class] & (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)) {
232		return (TRUE);
233	} else {
234		return (FALSE);
235	}
236}
237
238/*
239 * Put a result into the wakeup table for the master
240 * It's ensured that the msg id from the master_table entry and from
241 * result are matching
242 */
243void
244mdmn_set_master_table_res(set_t setno, md_mn_msgclass_t class,
245				md_mn_result_t  *res)
246{
247	mdmn_master_table[setno][class].wtm_result = res;
248}
249void
250mdmn_set_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id)
251{
252	MSGID_COPY(id, &(mdmn_master_table[setno][class].wtm_id));
253}
254
255void
256mdmn_set_master_table_addr(set_t setno, md_mn_msgclass_t class,
257    md_mn_nodeid_t nid)
258{
259	mdmn_master_table[setno][class].wtm_addr = nid;
260}
261
262
263md_mn_result_t *
264mdmn_get_master_table_res(set_t setno, md_mn_msgclass_t class)
265{
266	return (mdmn_master_table[setno][class].wtm_result);
267}
268
269void
270mdmn_get_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id)
271{
272	MSGID_COPY(&(mdmn_master_table[setno][class].wtm_id), id);
273}
274
275cond_t *
276mdmn_get_master_table_cv(set_t setno, md_mn_msgclass_t class)
277{
278	return (&(mdmn_master_table[setno][class].wtm_cv));
279}
280
281mutex_t *
282mdmn_get_master_table_mx(set_t setno, md_mn_msgclass_t class)
283{
284	return (&(mdmn_master_table[setno][class].wtm_mx));
285}
286
287md_mn_nodeid_t
288mdmn_get_master_table_addr(set_t setno, md_mn_msgclass_t class)
289{
290	return (mdmn_master_table[setno][class].wtm_addr);
291}
292
293
294
295/* here come the functions dealing with the wakeup table for the initiators */
296
297
298void
299mdmn_register_initiator_table(set_t setno, md_mn_msgclass_t class,
300    md_mn_msg_t *msg, SVCXPRT *transp)
301{
302	uint_t nnodes	= set_descriptor[setno]->sd_mn_numnodes;
303	time_t timeout	= mdmn_get_timeout(msg->msg_type);
304
305
306	MSGID_COPY(&(msg->msg_msgid),
307	    &(mdmn_initiator_table[setno][class].wti_id));
308	mdmn_initiator_table[setno][class].wti_transp = transp;
309	mdmn_initiator_table[setno][class].wti_args = (char *)msg;
310
311	/*
312	 * as the point in time where we want to be guaranteed to be woken up
313	 * again, we chose the
314	 * current time + nnodes times the timeout value for the message type
315	 */
316	mdmn_initiator_table[setno][class].wti_time =
317	    time((time_t *)NULL) + (nnodes * timeout);
318}
319
320/*
321 * If the set/class combination is currently busy, return MDMNE_CLASS_BUSY
322 * Otherwise return MDMNE_ACK
323 */
324int
325mdmn_check_initiator_table(set_t setno, md_mn_msgclass_t class)
326{
327	if ((mdmn_initiator_table[setno][class].wti_id.mid_nid == ~0u) &&
328	    (mdmn_initiator_table[setno][class].wti_transp == (SVCXPRT *)NULL))
329		return (MDMNE_ACK);
330	return (MDMNE_CLASS_BUSY);
331}
332
333/*
334 * Remove an entry from the initiator table entirely,
335 * This must be done with mutex held.
336 */
337void
338mdmn_unregister_initiator_table(set_t setno, md_mn_msgclass_t class)
339{
340	mdmn_initiator_table[setno][class].wti_id.mid_nid = ~0u;
341	mdmn_initiator_table[setno][class].wti_id.mid_time = 0LL;
342	mdmn_initiator_table[setno][class].wti_transp = (SVCXPRT *)NULL;
343	mdmn_initiator_table[setno][class].wti_args = (char *)0;
344	mdmn_initiator_table[setno][class].wti_time = (time_t)0;
345}
346
347void
348mdmn_get_initiator_table_id(set_t setno, md_mn_msgclass_t class,
349				md_mn_msgid_t *mid)
350{
351	MSGID_COPY(&(mdmn_initiator_table[setno][class].wti_id), mid);
352}
353
354SVCXPRT *
355mdmn_get_initiator_table_transp(set_t setno, md_mn_msgclass_t class)
356{
357	return (mdmn_initiator_table[setno][class].wti_transp);
358}
359
360char *
361mdmn_get_initiator_table_args(set_t setno, md_mn_msgclass_t class)
362{
363	return (mdmn_initiator_table[setno][class].wti_args);
364}
365
366mutex_t *
367mdmn_get_initiator_table_mx(set_t setno, md_mn_msgclass_t class)
368{
369	return (&(mdmn_initiator_table[setno][class].wti_mx));
370}
371
372time_t
373mdmn_get_initiator_table_time(set_t setno, md_mn_msgclass_t class)
374{
375	return (mdmn_initiator_table[setno][class].wti_time);
376}
377
378extern uint_t	md_commd_global_verb;	/* global bitmask for debug classes */
379extern FILE	*commdout;		/* debug output file for the commd */
380extern hrtime_t __savetime;
381
382
383/*
384 * Print debug messages to the terminal or to syslog
385 * commd_debug(MD_MMV_SYSLOG,....) is always printed (and always via syslog),
386 * even if md_commd_global_verb is zero.
387 *
388 * Otherwise the correct bit must be set in the bitmask md_commd_global_verb
389 */
390void
391commd_debug(uint_t debug_class, const char *message, ...)
392{
393	va_list ap;
394
395	/* Is this a message for syslog? */
396	if (debug_class == MD_MMV_SYSLOG) {
397
398		va_start(ap, message);
399		(void) vsyslog(LOG_WARNING, message, ap);
400		va_end(ap);
401	} else {
402		/* Is this debug_class set in the global verbosity state?  */
403		if ((md_commd_global_verb & debug_class) == 0) {
404			return;
405		}
406		/* Is our output file already functioning? */
407		if (commdout == NULL) {
408			return;
409		}
410		/* Are timestamps activated ? */
411		if (md_commd_global_verb & MD_MMV_TIMESTAMP) {
412			/* print time since last TRESET in usecs */
413			(void) fprintf(commdout, "[%s]",
414			    meta_print_hrtime(gethrtime() - __savetime));
415		}
416		/* Now print the real message */
417		va_start(ap, message);
418		(void) vfprintf(commdout, message, ap);
419		va_end(ap);
420	}
421}
422
423
424void
425dump_hex(uint_t debug_class, unsigned int *x, int cnt)
426{
427	cnt /= sizeof (unsigned int);
428	while (cnt--) {
429		commd_debug(debug_class, "0x%8x ", *x++);
430		if (cnt % 4)
431			continue;
432		commd_debug(debug_class, "\n");
433	}
434	commd_debug(debug_class, "\n");
435}
436
437/* debug output: dump a message */
438void
439dump_msg(uint_t dbc, char *prefix, md_mn_msg_t *msg)
440{
441	commd_debug(dbc, "%s &msg	= 0x%x\n", prefix, (int)msg);
442	commd_debug(dbc, "%s ID	= (%d, 0x%llx-%d)\n", prefix,
443	    MSGID_ELEMS(msg->msg_msgid));
444	commd_debug(dbc, "%s sender	= %d\n", prefix, msg->msg_sender);
445	commd_debug(dbc, "%s flags	= 0x%x\n", prefix, msg->msg_flags);
446	commd_debug(dbc, "%s setno	= %d\n", prefix, msg->msg_setno);
447	commd_debug(dbc, "%s recipient  = %d\n", prefix, msg->msg_recipient);
448	commd_debug(dbc, "%s type	= %d\n", prefix, msg->msg_type);
449	commd_debug(dbc, "%s size	= %d\n", prefix, msg->msg_event_size);
450	if (msg->msg_event_size) {
451		commd_debug(dbc, "%s data	=\n", prefix);
452		dump_hex(dbc, (unsigned int *)(void *)msg->msg_event_data,
453		    msg->msg_event_size);
454	}
455}
456
457/* debug output: dump a result structure */
458void
459dump_result(uint_t dbc, char *prefix, md_mn_result_t *res)
460{
461	commd_debug(dbc, "%s &res	= 0x%x\n", prefix, (int)res);
462	commd_debug(dbc, "%s ID	= (%d, 0x%llx-%d)\n", prefix,
463	    MSGID_ELEMS(res->mmr_msgid));
464	commd_debug(dbc, "%s setno	= %d\n", prefix, res->mmr_setno);
465	commd_debug(dbc, "%s type	= %d\n", prefix, res->mmr_msgtype);
466	commd_debug(dbc, "%s flags	= 0x%x\n", prefix, res->mmr_flags);
467	commd_debug(dbc, "%s comm_state= %d\n", prefix, res->mmr_comm_state);
468	commd_debug(dbc, "%s exitval	= %d\n", prefix, res->mmr_exitval);
469	commd_debug(dbc, "%s out_size	= %d\n", prefix, res->mmr_out_size);
470	if (res->mmr_out_size)
471		commd_debug(dbc, "%s out	= %s\n", prefix, res->mmr_out);
472	commd_debug(dbc, "%s err_size	= %d\n", prefix, res->mmr_err_size);
473	if (res->mmr_err_size)
474		commd_debug(dbc, "%s err	= %s\n", prefix, res->mmr_err);
475}
476
477
478/*
479 * Here we find out, where to store or find the results for a given msg.
480 *
481 * Per set we have a pointer to a three dimensional array:
482 * mct[set] -> mct_mce[NNODES][MD_MN_NCLASSES][MAX_SUBMESSAGES]
483 * So, for every possible node and for every possible class we can store
484 * MAX_SUBMESSAGES results.
485 * the way to find the correct index is
486 *	submessage +
487 *	class * MAX_SUBMESSAGES +
488 *	nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES.
489 *
490 * To find the correct address the index has to be multiplied
491 * by the size of one entry.
492 */
493static md_mn_mce_t *
494mdmn_get_mce_by_msg(md_mn_msg_t *msg)
495{
496	set_t	setno = msg->msg_setno;
497	int	nodeid = msg->msg_msgid.mid_nid;
498	int	submsg = msg->msg_msgid.mid_smid;
499	int	mct_index;
500	off_t	mct_offset;
501	md_mn_msgclass_t class;
502
503	if (mct[setno] != NULL) {
504		if (mdmn_init_set(setno, MDMN_SET_MCT) != 0) {
505			return ((md_mn_mce_t *)MDMN_MCT_ERROR);
506		}
507	}
508
509	if (submsg == 0) {
510		class = mdmn_get_message_class(msg->msg_type);
511	} else {
512		class = msg->msg_msgid.mid_oclass;
513	}
514
515	mct_index = submsg + class * MAX_SUBMESSAGES +
516	    nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES;
517
518	mct_offset = mct_index * sizeof (md_mn_mce_t);
519
520	/* LINTED Pointer alignment */
521	return ((md_mn_mce_t *)((caddr_t)(mct[setno]) + mct_offset));
522
523	/*
524	 * the lint clean version would be:
525	 * return (&(mct[setno]->mct_mce[0][0][0]) + mct_index);
526	 * :-)
527	 */
528}
529
530/*
531 * mdmn_mark_completion(msg, result, flag)
532 * Stores the result of this message into the mmaped memory MCT[setno]
533 * In case the same message comes along a second time we will know that
534 * this message has already been processed and we can deliver the
535 * results immediately.
536 *
537 * Before a message handler is called, the message in the MCT is flagged
538 * as currently being processed (flag == MDMN_MCT_IN_PROGRESS).
539 * This we need so we don't start a second handler for the same message.
540 *
541 * After a message handler is completed, this routine is called with
542 * flag == MDMN_MCT_DONE and the appropriate result that we store in the MCT.
543 * As MCT[setno] is memory mapped to disks, this information is persistent
544 * even across a crash of the commd.
545 * It doesn't have to be persistent across a reboot, though.
546 *
547 * Returns MDMN_MCT_DONE in case of success
548 * Returns MDMN_MCT_ERROR in case of error creating the mct
549 */
550int
551mdmn_mark_completion(md_mn_msg_t *msg, md_mn_result_t *result, uint_t flag)
552{
553	md_mn_mce_t	*mce;
554	uint_t		offset_in_page;
555
556	mce = mdmn_get_mce_by_msg(msg);
557	if (mce == (md_mn_mce_t *)-1) {
558		return (MDMN_MCT_ERROR);
559	}
560	offset_in_page = (uint_t)(caddr_t)mce % sysconf(_SC_PAGESIZE);
561
562	(void) memset(mce, 0, sizeof (md_mn_mce_t));
563
564	MSGID_COPY(&msg->msg_msgid, &mce->mce_result.mmr_msgid);
565	if (flag == MDMN_MCT_IN_PROGRESS) {
566		mce->mce_flags = MDMN_MCT_IN_PROGRESS;
567		goto mmc_out;
568	}
569
570	/*
571	 * In case the message flags indicate that the result should not be
572	 * stored in the MCT, we return a MDMN_MCT_NOT_DONE,
573	 * so the message will be processed at any rate,
574	 * even if we process this message twice.
575	 * this makes sense if the result of the message is a dynamic status
576	 * and might have changed meanwhile.
577	 */
578	if (msg->msg_flags & MD_MSGF_NO_MCT) {
579		return (MDMN_MCT_DONE);
580	}
581
582	/* This msg is no longer in progress */
583	mce->mce_flags = MDMN_MCT_DONE;
584
585	mce->mce_result.mmr_msgtype	    = result->mmr_msgtype;
586	mce->mce_result.mmr_setno	    = result->mmr_setno;
587	mce->mce_result.mmr_flags	    = result->mmr_flags;
588	mce->mce_result.mmr_sender	    = result->mmr_sender;
589	mce->mce_result.mmr_failing_node    = result->mmr_failing_node;
590	mce->mce_result.mmr_comm_state	    = result->mmr_comm_state;
591	mce->mce_result.mmr_exitval	    = result->mmr_exitval;
592
593	/* if mmr_exitval is zero, we store stdout, otherwise stderr */
594	if (result->mmr_exitval == 0) {
595		if (result->mmr_out_size > 0) {
596			(void) memcpy(mce->mce_data, result->mmr_out,
597			    result->mmr_out_size);
598			mce->mce_result.mmr_out_size = result->mmr_out_size;
599		}
600	} else {
601		if (result->mmr_err_size > 0) {
602			mce->mce_result.mmr_err_size = result->mmr_err_size;
603			(void) memcpy(mce->mce_data, result->mmr_err,
604			    result->mmr_err_size);
605		}
606	}
607
608	dump_result(MD_MMV_PROC_S, "mdmn_mark_completion1", result);
609
610mmc_out:
611	/* now flush this entry to disk */
612	(void) msync((caddr_t)mce - offset_in_page,
613	    sizeof (md_mn_mce_t) + offset_in_page, MS_SYNC);
614	return (MDMN_MCT_DONE);
615}
616
617/*
618 * mdmn_check_completion(msg, resultp)
619 * checks if msg has already been processed on this node, and if so copies
620 * the stored result to resultp.
621 *
622 * returns MDMN_MCT_DONE and the result filled out acurately in case the
623 *		msg has already been processed before
624 * returns MDMN_MCT_NOT_DONE if the message has not been processed before
625 * returns MDMN_MCT_IN_PROGRESS if the message is currently being processed
626 *	This can only occur on a slave node.
627 * return MDMN_MCT_ERROR in case of error creating the mct
628 */
629int
630mdmn_check_completion(md_mn_msg_t *msg, md_mn_result_t *result)
631{
632	md_mn_mce_t	*mce;
633	size_t		outsize;
634	size_t		errsize;
635
636	mce = mdmn_get_mce_by_msg(msg);
637	if (mce == (md_mn_mce_t *)MDMN_MCT_ERROR) {
638		return (MDMN_MCT_ERROR); /* what to do in that case ? */
639	}
640	if (MSGID_CMP(&(msg->msg_msgid), &(mce->mce_result.mmr_msgid))) {
641		/* is the message completed, or in progress? */
642		if (mce->mce_flags & MDMN_MCT_IN_PROGRESS) {
643			return (MDMN_MCT_IN_PROGRESS);
644		}
645		/*
646		 * See comment on MD_MSGF_NO_MCT above, if this flag is set
647		 * for a message no result was stored and so the message has
648		 * to be processed no matter if this is the 2nd time then.
649		 */
650		if (msg->msg_flags & MD_MSGF_NO_MCT) {
651			return (MDMN_MCT_NOT_DONE);
652		}
653
654		/* Paranoia check: mce_flags must be MDMN_MCT_DONE here */
655		if ((mce->mce_flags & MDMN_MCT_DONE) == 0) {
656			commd_debug(MD_MMV_ALL,
657			    "mdmn_check_completion: msg not done and not in "
658			    "progress! ID = (%d, 0x%llx-%d)\n",
659			    MSGID_ELEMS(msg->msg_msgid));
660			return (MDMN_MCT_NOT_DONE);
661		}
662		/*
663		 * Already processed.
664		 * Copy saved results data;
665		 * return only a pointer to any output.
666		 */
667		MSGID_COPY(&(mce->mce_result.mmr_msgid), &result->mmr_msgid);
668		result->mmr_msgtype	    = mce->mce_result.mmr_msgtype;
669		result->mmr_setno	    = mce->mce_result.mmr_setno;
670		result->mmr_flags	    = mce->mce_result.mmr_flags;
671		result->mmr_sender	    = mce->mce_result.mmr_sender;
672		result->mmr_failing_node    = mce->mce_result.mmr_failing_node;
673		result->mmr_comm_state	    = mce->mce_result.mmr_comm_state;
674		result->mmr_exitval	    = mce->mce_result.mmr_exitval;
675		result->mmr_err		    = NULL;
676		result->mmr_out		    = NULL;
677		outsize = result->mmr_out_size = mce->mce_result.mmr_out_size;
678		errsize = result->mmr_err_size = mce->mce_result.mmr_err_size;
679		/*
680		 * if the exit val is zero only stdout was stored (if any)
681		 * otherwise only stderr was stored (if any)
682		 */
683		if (result->mmr_exitval == 0) {
684			if (outsize != 0) {
685				result->mmr_out = Zalloc(outsize);
686				(void) memcpy(result->mmr_out, mce->mce_data,
687				    outsize);
688			}
689		} else {
690			if (errsize != 0) {
691				result->mmr_err = Zalloc(errsize);
692				(void) memcpy(result->mmr_err, mce->mce_data,
693				    errsize);
694			}
695		}
696		commd_debug(MD_MMV_MISC,
697		    "mdmn_check_completion: msg already processed \n");
698		dump_result(MD_MMV_MISC, "mdmn_check_completion", result);
699		return (MDMN_MCT_DONE);
700	}
701	commd_debug(MD_MMV_MISC,
702	    "mdmn_check_completion: msg not yet processed\n");
703	return (MDMN_MCT_NOT_DONE);
704}
705
706
707
708/*
709 * check_license(rqstp, chknid)
710 *
711 * Is this RPC request sent from a licensed host?
712 *
713 * If chknid is non-zero, the caller of check_license() knows the ID of
714 * the sender. Then we check just the one entry of licensed_nodes[]
715 *
716 * If chknid is zero, the sender is not known. In that case the sender must be
717 * the local node.
718 *
719 * If the host is licensed, return TRUE, else return FALSE
720 */
721bool_t
722check_license(struct svc_req *rqstp, md_mn_nodeid_t chknid)
723{
724	char		buf[INET6_ADDRSTRLEN];
725	void		*caller = NULL;
726	in_addr_t	caller_ipv4;
727	in6_addr_t	caller_ipv6;
728	struct sockaddr	*ca;
729
730
731	ca = (struct sockaddr *)(void *)svc_getrpccaller(rqstp->rq_xprt)->buf;
732
733	if (ca->sa_family == AF_INET) {
734		caller_ipv4 =
735		    ((struct sockaddr_in *)(void *)ca)->sin_addr.s_addr;
736		caller = (void *)&caller_ipv4;
737
738		if (chknid == 0) {
739			/* check against local node */
740			if (caller_ipv4 == htonl(INADDR_LOOPBACK)) {
741				return (TRUE);
742
743			}
744		} else {
745			/* check against one specific node */
746			if ((caller_ipv4 == licensed_nodes[chknid].lip_ipv4) &&
747			    (licensed_nodes[chknid].lip_family == AF_INET)) {
748				return (TRUE);
749			} else {
750				commd_debug(MD_MMV_MISC,
751				    "Bad attempt from %x ln[%d]=%x\n",
752				    caller_ipv4, chknid,
753				    licensed_nodes[chknid].lip_ipv4);
754			}
755		}
756	} else if (ca->sa_family == AF_INET6) {
757		caller_ipv6 = ((struct sockaddr_in6 *)(void *)ca)->sin6_addr;
758		caller = (void *)&caller_ipv6;
759
760		if (chknid == 0) {
761			/* check against local node */
762			if (IN6_IS_ADDR_LOOPBACK(&caller_ipv6)) {
763				return (TRUE);
764
765			}
766		} else {
767			/* check against one specific node */
768			if (IN6_ARE_ADDR_EQUAL(&caller_ipv6,
769			    &(licensed_nodes[chknid].lip_ipv6)) &&
770			    (licensed_nodes[chknid].lip_family == AF_INET6)) {
771				return (TRUE);
772			}
773		}
774	}
775	/* if  we are here, we were contacted by an unlicensed node */
776	commd_debug(MD_MMV_SYSLOG,
777	    "Bad attempt to contact rpc.mdcommd from %s\n",
778	    caller ?
779	    inet_ntop(ca->sa_family, caller, buf, INET6_ADDRSTRLEN) :
780	    "unknown");
781
782	return (FALSE);
783}
784
785/*
786 * Add a node to the list of licensed nodes.
787 *
788 * Only IPv4 is currently supported.
789 * for IPv6, we need to change md_mnnode_desc.
790 */
791void
792add_license(md_mnnode_desc *node)
793{
794	md_mn_nodeid_t nid = node->nd_nodeid;
795	char		buf[INET6_ADDRSTRLEN];
796
797	/*
798	 * If this node is not yet licensed, do it now.
799	 * For now only IPv4 addresses are supported.
800	 */
801	commd_debug(MD_MMV_MISC, "add_lic(%s): ln[%d]=%s, lnc[%d]=%d\n",
802	    node->nd_priv_ic, nid,
803	    inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4,
804	    buf, INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt);
805
806	if (licensed_nodes[nid].lip_ipv4 == (in_addr_t)0) {
807		licensed_nodes[nid].lip_family = AF_INET; /* IPv4 */
808		licensed_nodes[nid].lip_ipv4 = inet_addr(node->nd_priv_ic);
809		/* keep track of the last entry for faster search */
810		if (nid > maxlicnodes)
811			maxlicnodes = nid;
812
813	}
814	/* in any case bump up the reference count */
815	licensed_nodes[nid].lip_cnt++;
816}
817
818/*
819 * lower the reference count for one node.
820 * If that drops to zero, remove the node from the list of licensed nodes
821 *
822 * Only IPv4 is currently supported.
823 * for IPv6, we need to change md_mnnode_desc.
824 */
825void
826rem_license(md_mnnode_desc *node)
827{
828	md_mn_nodeid_t nid = node->nd_nodeid;
829	char		buf[INET6_ADDRSTRLEN];
830
831	commd_debug(MD_MMV_MISC, "rem_lic(%s): ln[%d]=%s, lnc[%d]=%d\n",
832	    node->nd_priv_ic, nid,
833	    inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4, buf,
834	    INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt);
835
836	assert(licensed_nodes[nid].lip_cnt > 0);
837
838	/*
839	 * If this was the last reference to that node, it's license expires
840	 * For now only IPv4 addresses are supported.
841	 */
842	if (--licensed_nodes[nid].lip_cnt == 0) {
843		licensed_nodes[nid].lip_ipv4 = (in_addr_t)0;
844	}
845}
846