mdmn_subr.c revision 11053:f33a1c7f3155
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#include <unistd.h> 28#include <sys/types.h> 29#include <sys/socket.h> 30#include <netinet/in.h> 31#include <arpa/inet.h> 32#include <thread.h> 33#include "meta.h" 34#include "mdmn_subr.h" 35 36extern int mdmn_init_set(set_t setno, int todo); 37 38uint_t mdmn_busy[MD_MAXSETS][MD_MN_NCLASSES]; 39mutex_t mdmn_busy_mutex[MD_MAXSETS]; 40cond_t mdmn_busy_cv[MD_MAXSETS]; 41 42 43/* the wakeup table for the initiator's side */ 44mdmn_wti_t mdmn_initiator_table[MD_MAXSETS][MD_MN_NCLASSES]; 45 46/* the wakeup table for the master */ 47mdmn_wtm_t mdmn_master_table[MD_MAXSETS][MD_MN_NCLASSES]; 48 49/* List of licensed ip addresses */ 50licensed_ip_t licensed_nodes[NNODES]; 51 52/* speed up the search for licensed ip addresses */ 53md_mn_nodeid_t maxlicnodes = 0; /* 0 is not a valid node ID */ 54 55/* 56 * Check if a given set/class combination is currently in use 57 * If in use, returns TRUE 58 * Otherwise returns FALSE 59 * 60 * Must be called with mdmn_busy_mutex held 61 */ 62bool_t 63mdmn_is_class_busy(set_t setno, md_mn_msgclass_t class) 64{ 65 if (mdmn_busy[setno][class] & MDMN_BUSY) { 66 return (TRUE); 67 } else { 68 return (FALSE); 69 } 70} 71 72/* 73 * Mark a given set/class combination as currently in use 74 * If the class was already in use, returns FALSE 75 * Otherwise returns TRUE 76 * 77 * So mdmn_mark_class_busy can be used like 78 * if (mdmn_mark_class_busy(setno, class) == FALSE) 79 * failure; 80 * else 81 * success; 82 * 83 * Must be called with mdmn_busy_mutex held 84 */ 85bool_t 86mdmn_mark_class_busy(set_t setno, md_mn_msgclass_t class) 87{ 88 if (mdmn_busy[setno][class] & MDMN_BUSY) { 89 return (FALSE); 90 } else { 91 mdmn_busy[setno][class] |= MDMN_BUSY; 92 commd_debug(MD_MMV_MISC, "busy: set=%d, class=%d\n", 93 setno, class); 94 return (TRUE); 95 } 96} 97 98/* 99 * Mark a given set/class combination as currently available 100 * Always succeeds, thus void. 101 * 102 * If this class is marked MDMN_SUSPEND_ALL, we are in the middle of 103 * draining all classes of this set. 104 * We have to mark class+1 as MDMN_SUSPEND_ALL too. 105 * If class+2 wasn't busy, we proceed with class+2, and so on 106 * If any class is busy, we return. 107 * Then the drain process will be continued by the mdmn_mark_class_unbusy() of 108 * that busy class 109 */ 110void 111mdmn_mark_class_unbusy(set_t setno, md_mn_msgclass_t class) 112{ 113 commd_debug(MD_MMV_MISC, "unbusy: set=%d, class=%d\n", setno, class); 114 mdmn_busy[setno][class] &= ~MDMN_BUSY; 115 /* something changed, inform threads waiting for that */ 116 (void) cond_signal(&mdmn_busy_cv[setno]); 117 118 if ((mdmn_busy[setno][class] & MDMN_SUSPEND_ALL) == 0) { 119 return; 120 } 121 122 while (++class < MD_MN_NCLASSES) { 123 commd_debug(MD_MMV_MISC, 124 "unbusy: suspending set=%d, class=%d\n", setno, class); 125 if (mdmn_mark_class_suspended(setno, class, MDMN_SUSPEND_ALL) 126 == MDMNE_SET_NOT_DRAINED) { 127 break; 128 } 129 } 130 131} 132 133 134/* 135 * Check if a given set/class combination is locked. 136 */ 137bool_t 138mdmn_is_class_locked(set_t setno, md_mn_msgclass_t class) 139{ 140 if (mdmn_busy[setno][class] & MDMN_LOCKED) { 141 return (TRUE); 142 } else { 143 return (FALSE); 144 } 145} 146 147/* 148 * Mark a given set/class combination as locked. 149 * No checking is done here, so routine can be void. 150 * Locking a locked set/class is ok. 151 * 152 * Must be called with mdmn_busy_mutex held 153 */ 154void 155mdmn_mark_class_locked(set_t setno, md_mn_msgclass_t class) 156{ 157 mdmn_busy[setno][class] |= MDMN_LOCKED; 158} 159 160/* 161 * Mark a given set/class combination as unlocked. 162 * No checking is done here, so routine can be void. 163 * Unlocking a unlocked set/class is ok. 164 * 165 * Must be called with mdmn_busy_mutex held 166 */ 167void 168mdmn_mark_class_unlocked(set_t setno, md_mn_msgclass_t class) 169{ 170 mdmn_busy[setno][class] &= ~MDMN_LOCKED; 171} 172 173/* 174 * Suspend a set/class combination 175 * 176 * If called during draining all classes of a set susptype is MDMN_SUSPEND_ALL. 177 * If only one class is about to be drained susptype is MDMN_SUSPEND_1. 178 * 179 * Returns: 180 * MDMNE_ACK if there are no outstanding messages 181 * MDMNE_SET_NOT_DRAINED otherwise 182 * 183 * Must be called with mdmn_busy_mutex held for this set. 184 */ 185int 186mdmn_mark_class_suspended(set_t setno, md_mn_msgclass_t class, uint_t susptype) 187{ 188 /* 189 * We use the mdmn_busy array to mark this set is suspended. 190 */ 191 mdmn_busy[setno][class] |= susptype; 192 193 /* 194 * If there are outstanding messages for this set/class we 195 * return MDMNE_SET_NOT_DRAINED, otherwise we return MDMNE_ACK 196 */ 197 if (mdmn_is_class_busy(setno, class) == TRUE) { 198 return (MDMNE_SET_NOT_DRAINED); 199 } 200 return (MDMNE_ACK); 201} 202 203/* 204 * Resume operation for a set/class combination after it was 205 * previously suspended 206 * 207 * If called from mdmn_comm_resume_svc_1 to resume _one_ specific class 208 * then susptype will be MDMN_SUSPEND_1 209 * Otherwise to resume all classes of one set, 210 * then susptype equals (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1) 211 * 212 * Always succeeds, thus void. 213 * 214 * Must be called with mdmn_busy_mutex held for this set. 215 */ 216void 217mdmn_mark_class_resumed(set_t setno, md_mn_msgclass_t class, uint_t susptype) 218{ 219 /* simply the reverse operation to mdmn_mark_set_drained() */ 220 mdmn_busy[setno][class] &= ~susptype; 221} 222 223/* 224 * Check if a drain command was issued for this set/class combination. 225 * 226 * Must be called with mdmn_busy_mutex held for this set. 227 */ 228bool_t 229mdmn_is_class_suspended(set_t setno, md_mn_msgclass_t class) 230{ 231 if (mdmn_busy[setno][class] & (MDMN_SUSPEND_ALL | MDMN_SUSPEND_1)) { 232 return (TRUE); 233 } else { 234 return (FALSE); 235 } 236} 237 238/* 239 * Put a result into the wakeup table for the master 240 * It's ensured that the msg id from the master_table entry and from 241 * result are matching 242 */ 243void 244mdmn_set_master_table_res(set_t setno, md_mn_msgclass_t class, 245 md_mn_result_t *res) 246{ 247 mdmn_master_table[setno][class].wtm_result = res; 248} 249void 250mdmn_set_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id) 251{ 252 MSGID_COPY(id, &(mdmn_master_table[setno][class].wtm_id)); 253} 254 255void 256mdmn_set_master_table_addr(set_t setno, md_mn_msgclass_t class, 257 md_mn_nodeid_t nid) 258{ 259 mdmn_master_table[setno][class].wtm_addr = nid; 260} 261 262 263md_mn_result_t * 264mdmn_get_master_table_res(set_t setno, md_mn_msgclass_t class) 265{ 266 return (mdmn_master_table[setno][class].wtm_result); 267} 268 269void 270mdmn_get_master_table_id(set_t setno, md_mn_msgclass_t class, md_mn_msgid_t *id) 271{ 272 MSGID_COPY(&(mdmn_master_table[setno][class].wtm_id), id); 273} 274 275cond_t * 276mdmn_get_master_table_cv(set_t setno, md_mn_msgclass_t class) 277{ 278 return (&(mdmn_master_table[setno][class].wtm_cv)); 279} 280 281mutex_t * 282mdmn_get_master_table_mx(set_t setno, md_mn_msgclass_t class) 283{ 284 return (&(mdmn_master_table[setno][class].wtm_mx)); 285} 286 287md_mn_nodeid_t 288mdmn_get_master_table_addr(set_t setno, md_mn_msgclass_t class) 289{ 290 return (mdmn_master_table[setno][class].wtm_addr); 291} 292 293 294 295/* here come the functions dealing with the wakeup table for the initiators */ 296 297 298void 299mdmn_register_initiator_table(set_t setno, md_mn_msgclass_t class, 300 md_mn_msg_t *msg, SVCXPRT *transp) 301{ 302 uint_t nnodes = set_descriptor[setno]->sd_mn_numnodes; 303 time_t timeout = mdmn_get_timeout(msg->msg_type); 304 305 306 MSGID_COPY(&(msg->msg_msgid), 307 &(mdmn_initiator_table[setno][class].wti_id)); 308 mdmn_initiator_table[setno][class].wti_transp = transp; 309 mdmn_initiator_table[setno][class].wti_args = (char *)msg; 310 311 /* 312 * as the point in time where we want to be guaranteed to be woken up 313 * again, we chose the 314 * current time + nnodes times the timeout value for the message type 315 */ 316 mdmn_initiator_table[setno][class].wti_time = 317 time((time_t *)NULL) + (nnodes * timeout); 318} 319 320/* 321 * If the set/class combination is currently busy, return MDMNE_CLASS_BUSY 322 * Otherwise return MDMNE_ACK 323 */ 324int 325mdmn_check_initiator_table(set_t setno, md_mn_msgclass_t class) 326{ 327 if ((mdmn_initiator_table[setno][class].wti_id.mid_nid == ~0u) && 328 (mdmn_initiator_table[setno][class].wti_transp == (SVCXPRT *)NULL)) 329 return (MDMNE_ACK); 330 return (MDMNE_CLASS_BUSY); 331} 332 333/* 334 * Remove an entry from the initiator table entirely, 335 * This must be done with mutex held. 336 */ 337void 338mdmn_unregister_initiator_table(set_t setno, md_mn_msgclass_t class) 339{ 340 mdmn_initiator_table[setno][class].wti_id.mid_nid = ~0u; 341 mdmn_initiator_table[setno][class].wti_id.mid_time = 0LL; 342 mdmn_initiator_table[setno][class].wti_transp = (SVCXPRT *)NULL; 343 mdmn_initiator_table[setno][class].wti_args = (char *)0; 344 mdmn_initiator_table[setno][class].wti_time = (time_t)0; 345} 346 347void 348mdmn_get_initiator_table_id(set_t setno, md_mn_msgclass_t class, 349 md_mn_msgid_t *mid) 350{ 351 MSGID_COPY(&(mdmn_initiator_table[setno][class].wti_id), mid); 352} 353 354SVCXPRT * 355mdmn_get_initiator_table_transp(set_t setno, md_mn_msgclass_t class) 356{ 357 return (mdmn_initiator_table[setno][class].wti_transp); 358} 359 360char * 361mdmn_get_initiator_table_args(set_t setno, md_mn_msgclass_t class) 362{ 363 return (mdmn_initiator_table[setno][class].wti_args); 364} 365 366mutex_t * 367mdmn_get_initiator_table_mx(set_t setno, md_mn_msgclass_t class) 368{ 369 return (&(mdmn_initiator_table[setno][class].wti_mx)); 370} 371 372time_t 373mdmn_get_initiator_table_time(set_t setno, md_mn_msgclass_t class) 374{ 375 return (mdmn_initiator_table[setno][class].wti_time); 376} 377 378extern uint_t md_commd_global_verb; /* global bitmask for debug classes */ 379extern FILE *commdout; /* debug output file for the commd */ 380extern hrtime_t __savetime; 381 382 383/* 384 * Print debug messages to the terminal or to syslog 385 * commd_debug(MD_MMV_SYSLOG,....) is always printed (and always via syslog), 386 * even if md_commd_global_verb is zero. 387 * 388 * Otherwise the correct bit must be set in the bitmask md_commd_global_verb 389 */ 390void 391commd_debug(uint_t debug_class, const char *message, ...) 392{ 393 va_list ap; 394 395 /* Is this a message for syslog? */ 396 if (debug_class == MD_MMV_SYSLOG) { 397 398 va_start(ap, message); 399 (void) vsyslog(LOG_WARNING, message, ap); 400 va_end(ap); 401 } else { 402 /* Is this debug_class set in the global verbosity state? */ 403 if ((md_commd_global_verb & debug_class) == 0) { 404 return; 405 } 406 /* Is our output file already functioning? */ 407 if (commdout == NULL) { 408 return; 409 } 410 /* Are timestamps activated ? */ 411 if (md_commd_global_verb & MD_MMV_TIMESTAMP) { 412 /* print time since last TRESET in usecs */ 413 (void) fprintf(commdout, "[%s]", 414 meta_print_hrtime(gethrtime() - __savetime)); 415 } 416 /* Now print the real message */ 417 va_start(ap, message); 418 (void) vfprintf(commdout, message, ap); 419 va_end(ap); 420 } 421} 422 423 424void 425dump_hex(uint_t debug_class, unsigned int *x, int cnt) 426{ 427 cnt /= sizeof (unsigned int); 428 while (cnt--) { 429 commd_debug(debug_class, "0x%8x ", *x++); 430 if (cnt % 4) 431 continue; 432 commd_debug(debug_class, "\n"); 433 } 434 commd_debug(debug_class, "\n"); 435} 436 437/* debug output: dump a message */ 438void 439dump_msg(uint_t dbc, char *prefix, md_mn_msg_t *msg) 440{ 441 commd_debug(dbc, "%s &msg = 0x%x\n", prefix, (int)msg); 442 commd_debug(dbc, "%s ID = (%d, 0x%llx-%d)\n", prefix, 443 MSGID_ELEMS(msg->msg_msgid)); 444 commd_debug(dbc, "%s sender = %d\n", prefix, msg->msg_sender); 445 commd_debug(dbc, "%s flags = 0x%x\n", prefix, msg->msg_flags); 446 commd_debug(dbc, "%s setno = %d\n", prefix, msg->msg_setno); 447 commd_debug(dbc, "%s recipient = %d\n", prefix, msg->msg_recipient); 448 commd_debug(dbc, "%s type = %d\n", prefix, msg->msg_type); 449 commd_debug(dbc, "%s size = %d\n", prefix, msg->msg_event_size); 450 if (msg->msg_event_size) { 451 commd_debug(dbc, "%s data =\n", prefix); 452 dump_hex(dbc, (unsigned int *)(void *)msg->msg_event_data, 453 msg->msg_event_size); 454 } 455} 456 457/* debug output: dump a result structure */ 458void 459dump_result(uint_t dbc, char *prefix, md_mn_result_t *res) 460{ 461 commd_debug(dbc, "%s &res = 0x%x\n", prefix, (int)res); 462 commd_debug(dbc, "%s ID = (%d, 0x%llx-%d)\n", prefix, 463 MSGID_ELEMS(res->mmr_msgid)); 464 commd_debug(dbc, "%s setno = %d\n", prefix, res->mmr_setno); 465 commd_debug(dbc, "%s type = %d\n", prefix, res->mmr_msgtype); 466 commd_debug(dbc, "%s flags = 0x%x\n", prefix, res->mmr_flags); 467 commd_debug(dbc, "%s comm_state= %d\n", prefix, res->mmr_comm_state); 468 commd_debug(dbc, "%s exitval = %d\n", prefix, res->mmr_exitval); 469 commd_debug(dbc, "%s out_size = %d\n", prefix, res->mmr_out_size); 470 if (res->mmr_out_size) 471 commd_debug(dbc, "%s out = %s\n", prefix, res->mmr_out); 472 commd_debug(dbc, "%s err_size = %d\n", prefix, res->mmr_err_size); 473 if (res->mmr_err_size) 474 commd_debug(dbc, "%s err = %s\n", prefix, res->mmr_err); 475} 476 477 478/* 479 * Here we find out, where to store or find the results for a given msg. 480 * 481 * Per set we have a pointer to a three dimensional array: 482 * mct[set] -> mct_mce[NNODES][MD_MN_NCLASSES][MAX_SUBMESSAGES] 483 * So, for every possible node and for every possible class we can store 484 * MAX_SUBMESSAGES results. 485 * the way to find the correct index is 486 * submessage + 487 * class * MAX_SUBMESSAGES + 488 * nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES. 489 * 490 * To find the correct address the index has to be multiplied 491 * by the size of one entry. 492 */ 493static md_mn_mce_t * 494mdmn_get_mce_by_msg(md_mn_msg_t *msg) 495{ 496 set_t setno = msg->msg_setno; 497 int nodeid = msg->msg_msgid.mid_nid; 498 int submsg = msg->msg_msgid.mid_smid; 499 int mct_index; 500 off_t mct_offset; 501 md_mn_msgclass_t class; 502 503 if (mct[setno] != NULL) { 504 if (mdmn_init_set(setno, MDMN_SET_MCT) != 0) { 505 return ((md_mn_mce_t *)MDMN_MCT_ERROR); 506 } 507 } 508 509 if (submsg == 0) { 510 class = mdmn_get_message_class(msg->msg_type); 511 } else { 512 class = msg->msg_msgid.mid_oclass; 513 } 514 515 mct_index = submsg + class * MAX_SUBMESSAGES + 516 nodeid * MAX_SUBMESSAGES * MD_MN_NCLASSES; 517 518 mct_offset = mct_index * sizeof (md_mn_mce_t); 519 520 /* LINTED Pointer alignment */ 521 return ((md_mn_mce_t *)((caddr_t)(mct[setno]) + mct_offset)); 522 523 /* 524 * the lint clean version would be: 525 * return (&(mct[setno]->mct_mce[0][0][0]) + mct_index); 526 * :-) 527 */ 528} 529 530/* 531 * mdmn_mark_completion(msg, result, flag) 532 * Stores the result of this message into the mmaped memory MCT[setno] 533 * In case the same message comes along a second time we will know that 534 * this message has already been processed and we can deliver the 535 * results immediately. 536 * 537 * Before a message handler is called, the message in the MCT is flagged 538 * as currently being processed (flag == MDMN_MCT_IN_PROGRESS). 539 * This we need so we don't start a second handler for the same message. 540 * 541 * After a message handler is completed, this routine is called with 542 * flag == MDMN_MCT_DONE and the appropriate result that we store in the MCT. 543 * As MCT[setno] is memory mapped to disks, this information is persistent 544 * even across a crash of the commd. 545 * It doesn't have to be persistent across a reboot, though. 546 * 547 * Returns MDMN_MCT_DONE in case of success 548 * Returns MDMN_MCT_ERROR in case of error creating the mct 549 */ 550int 551mdmn_mark_completion(md_mn_msg_t *msg, md_mn_result_t *result, uint_t flag) 552{ 553 md_mn_mce_t *mce; 554 uint_t offset_in_page; 555 556 mce = mdmn_get_mce_by_msg(msg); 557 if (mce == (md_mn_mce_t *)-1) { 558 return (MDMN_MCT_ERROR); 559 } 560 offset_in_page = (uint_t)(caddr_t)mce % sysconf(_SC_PAGESIZE); 561 562 (void) memset(mce, 0, sizeof (md_mn_mce_t)); 563 564 MSGID_COPY(&msg->msg_msgid, &mce->mce_result.mmr_msgid); 565 if (flag == MDMN_MCT_IN_PROGRESS) { 566 mce->mce_flags = MDMN_MCT_IN_PROGRESS; 567 goto mmc_out; 568 } 569 570 /* 571 * In case the message flags indicate that the result should not be 572 * stored in the MCT, we return a MDMN_MCT_NOT_DONE, 573 * so the message will be processed at any rate, 574 * even if we process this message twice. 575 * this makes sense if the result of the message is a dynamic status 576 * and might have changed meanwhile. 577 */ 578 if (msg->msg_flags & MD_MSGF_NO_MCT) { 579 return (MDMN_MCT_DONE); 580 } 581 582 /* This msg is no longer in progress */ 583 mce->mce_flags = MDMN_MCT_DONE; 584 585 mce->mce_result.mmr_msgtype = result->mmr_msgtype; 586 mce->mce_result.mmr_setno = result->mmr_setno; 587 mce->mce_result.mmr_flags = result->mmr_flags; 588 mce->mce_result.mmr_sender = result->mmr_sender; 589 mce->mce_result.mmr_failing_node = result->mmr_failing_node; 590 mce->mce_result.mmr_comm_state = result->mmr_comm_state; 591 mce->mce_result.mmr_exitval = result->mmr_exitval; 592 593 /* if mmr_exitval is zero, we store stdout, otherwise stderr */ 594 if (result->mmr_exitval == 0) { 595 if (result->mmr_out_size > 0) { 596 (void) memcpy(mce->mce_data, result->mmr_out, 597 result->mmr_out_size); 598 mce->mce_result.mmr_out_size = result->mmr_out_size; 599 } 600 } else { 601 if (result->mmr_err_size > 0) { 602 mce->mce_result.mmr_err_size = result->mmr_err_size; 603 (void) memcpy(mce->mce_data, result->mmr_err, 604 result->mmr_err_size); 605 } 606 } 607 608 dump_result(MD_MMV_PROC_S, "mdmn_mark_completion1", result); 609 610mmc_out: 611 /* now flush this entry to disk */ 612 (void) msync((caddr_t)mce - offset_in_page, 613 sizeof (md_mn_mce_t) + offset_in_page, MS_SYNC); 614 return (MDMN_MCT_DONE); 615} 616 617/* 618 * mdmn_check_completion(msg, resultp) 619 * checks if msg has already been processed on this node, and if so copies 620 * the stored result to resultp. 621 * 622 * returns MDMN_MCT_DONE and the result filled out acurately in case the 623 * msg has already been processed before 624 * returns MDMN_MCT_NOT_DONE if the message has not been processed before 625 * returns MDMN_MCT_IN_PROGRESS if the message is currently being processed 626 * This can only occur on a slave node. 627 * return MDMN_MCT_ERROR in case of error creating the mct 628 */ 629int 630mdmn_check_completion(md_mn_msg_t *msg, md_mn_result_t *result) 631{ 632 md_mn_mce_t *mce; 633 size_t outsize; 634 size_t errsize; 635 636 mce = mdmn_get_mce_by_msg(msg); 637 if (mce == (md_mn_mce_t *)MDMN_MCT_ERROR) { 638 return (MDMN_MCT_ERROR); /* what to do in that case ? */ 639 } 640 if (MSGID_CMP(&(msg->msg_msgid), &(mce->mce_result.mmr_msgid))) { 641 /* is the message completed, or in progress? */ 642 if (mce->mce_flags & MDMN_MCT_IN_PROGRESS) { 643 return (MDMN_MCT_IN_PROGRESS); 644 } 645 /* 646 * See comment on MD_MSGF_NO_MCT above, if this flag is set 647 * for a message no result was stored and so the message has 648 * to be processed no matter if this is the 2nd time then. 649 */ 650 if (msg->msg_flags & MD_MSGF_NO_MCT) { 651 return (MDMN_MCT_NOT_DONE); 652 } 653 654 /* Paranoia check: mce_flags must be MDMN_MCT_DONE here */ 655 if ((mce->mce_flags & MDMN_MCT_DONE) == 0) { 656 commd_debug(MD_MMV_ALL, 657 "mdmn_check_completion: msg not done and not in " 658 "progress! ID = (%d, 0x%llx-%d)\n", 659 MSGID_ELEMS(msg->msg_msgid)); 660 return (MDMN_MCT_NOT_DONE); 661 } 662 /* 663 * Already processed. 664 * Copy saved results data; 665 * return only a pointer to any output. 666 */ 667 MSGID_COPY(&(mce->mce_result.mmr_msgid), &result->mmr_msgid); 668 result->mmr_msgtype = mce->mce_result.mmr_msgtype; 669 result->mmr_setno = mce->mce_result.mmr_setno; 670 result->mmr_flags = mce->mce_result.mmr_flags; 671 result->mmr_sender = mce->mce_result.mmr_sender; 672 result->mmr_failing_node = mce->mce_result.mmr_failing_node; 673 result->mmr_comm_state = mce->mce_result.mmr_comm_state; 674 result->mmr_exitval = mce->mce_result.mmr_exitval; 675 result->mmr_err = NULL; 676 result->mmr_out = NULL; 677 outsize = result->mmr_out_size = mce->mce_result.mmr_out_size; 678 errsize = result->mmr_err_size = mce->mce_result.mmr_err_size; 679 /* 680 * if the exit val is zero only stdout was stored (if any) 681 * otherwise only stderr was stored (if any) 682 */ 683 if (result->mmr_exitval == 0) { 684 if (outsize != 0) { 685 result->mmr_out = Zalloc(outsize); 686 (void) memcpy(result->mmr_out, mce->mce_data, 687 outsize); 688 } 689 } else { 690 if (errsize != 0) { 691 result->mmr_err = Zalloc(errsize); 692 (void) memcpy(result->mmr_err, mce->mce_data, 693 errsize); 694 } 695 } 696 commd_debug(MD_MMV_MISC, 697 "mdmn_check_completion: msg already processed \n"); 698 dump_result(MD_MMV_MISC, "mdmn_check_completion", result); 699 return (MDMN_MCT_DONE); 700 } 701 commd_debug(MD_MMV_MISC, 702 "mdmn_check_completion: msg not yet processed\n"); 703 return (MDMN_MCT_NOT_DONE); 704} 705 706 707 708/* 709 * check_license(rqstp, chknid) 710 * 711 * Is this RPC request sent from a licensed host? 712 * 713 * If chknid is non-zero, the caller of check_license() knows the ID of 714 * the sender. Then we check just the one entry of licensed_nodes[] 715 * 716 * If chknid is zero, the sender is not known. In that case the sender must be 717 * the local node. 718 * 719 * If the host is licensed, return TRUE, else return FALSE 720 */ 721bool_t 722check_license(struct svc_req *rqstp, md_mn_nodeid_t chknid) 723{ 724 char buf[INET6_ADDRSTRLEN]; 725 void *caller = NULL; 726 in_addr_t caller_ipv4; 727 in6_addr_t caller_ipv6; 728 struct sockaddr *ca; 729 730 731 ca = (struct sockaddr *)(void *)svc_getrpccaller(rqstp->rq_xprt)->buf; 732 733 if (ca->sa_family == AF_INET) { 734 caller_ipv4 = 735 ((struct sockaddr_in *)(void *)ca)->sin_addr.s_addr; 736 caller = (void *)&caller_ipv4; 737 738 if (chknid == 0) { 739 /* check against local node */ 740 if (caller_ipv4 == htonl(INADDR_LOOPBACK)) { 741 return (TRUE); 742 743 } 744 } else { 745 /* check against one specific node */ 746 if ((caller_ipv4 == licensed_nodes[chknid].lip_ipv4) && 747 (licensed_nodes[chknid].lip_family == AF_INET)) { 748 return (TRUE); 749 } else { 750 commd_debug(MD_MMV_MISC, 751 "Bad attempt from %x ln[%d]=%x\n", 752 caller_ipv4, chknid, 753 licensed_nodes[chknid].lip_ipv4); 754 } 755 } 756 } else if (ca->sa_family == AF_INET6) { 757 caller_ipv6 = ((struct sockaddr_in6 *)(void *)ca)->sin6_addr; 758 caller = (void *)&caller_ipv6; 759 760 if (chknid == 0) { 761 /* check against local node */ 762 if (IN6_IS_ADDR_LOOPBACK(&caller_ipv6)) { 763 return (TRUE); 764 765 } 766 } else { 767 /* check against one specific node */ 768 if (IN6_ARE_ADDR_EQUAL(&caller_ipv6, 769 &(licensed_nodes[chknid].lip_ipv6)) && 770 (licensed_nodes[chknid].lip_family == AF_INET6)) { 771 return (TRUE); 772 } 773 } 774 } 775 /* if we are here, we were contacted by an unlicensed node */ 776 commd_debug(MD_MMV_SYSLOG, 777 "Bad attempt to contact rpc.mdcommd from %s\n", 778 caller ? 779 inet_ntop(ca->sa_family, caller, buf, INET6_ADDRSTRLEN) : 780 "unknown"); 781 782 return (FALSE); 783} 784 785/* 786 * Add a node to the list of licensed nodes. 787 * 788 * Only IPv4 is currently supported. 789 * for IPv6, we need to change md_mnnode_desc. 790 */ 791void 792add_license(md_mnnode_desc *node) 793{ 794 md_mn_nodeid_t nid = node->nd_nodeid; 795 char buf[INET6_ADDRSTRLEN]; 796 797 /* 798 * If this node is not yet licensed, do it now. 799 * For now only IPv4 addresses are supported. 800 */ 801 commd_debug(MD_MMV_MISC, "add_lic(%s): ln[%d]=%s, lnc[%d]=%d\n", 802 node->nd_priv_ic, nid, 803 inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4, 804 buf, INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt); 805 806 if (licensed_nodes[nid].lip_ipv4 == (in_addr_t)0) { 807 licensed_nodes[nid].lip_family = AF_INET; /* IPv4 */ 808 licensed_nodes[nid].lip_ipv4 = inet_addr(node->nd_priv_ic); 809 /* keep track of the last entry for faster search */ 810 if (nid > maxlicnodes) 811 maxlicnodes = nid; 812 813 } 814 /* in any case bump up the reference count */ 815 licensed_nodes[nid].lip_cnt++; 816} 817 818/* 819 * lower the reference count for one node. 820 * If that drops to zero, remove the node from the list of licensed nodes 821 * 822 * Only IPv4 is currently supported. 823 * for IPv6, we need to change md_mnnode_desc. 824 */ 825void 826rem_license(md_mnnode_desc *node) 827{ 828 md_mn_nodeid_t nid = node->nd_nodeid; 829 char buf[INET6_ADDRSTRLEN]; 830 831 commd_debug(MD_MMV_MISC, "rem_lic(%s): ln[%d]=%s, lnc[%d]=%d\n", 832 node->nd_priv_ic, nid, 833 inet_ntop(AF_INET, (void *)&licensed_nodes[nid].lip_ipv4, buf, 834 INET6_ADDRSTRLEN), nid, licensed_nodes[nid].lip_cnt); 835 836 assert(licensed_nodes[nid].lip_cnt > 0); 837 838 /* 839 * If this was the last reference to that node, it's license expires 840 * For now only IPv4 addresses are supported. 841 */ 842 if (--licensed_nodes[nid].lip_cnt == 0) { 843 licensed_nodes[nid].lip_ipv4 = (in_addr_t)0; 844 } 845} 846