1/* $NetBSD: rf_driver.c,v 1.129 2011/05/27 22:48:24 yamt Exp $ */ 2/*- 3 * Copyright (c) 1999 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31/* 32 * Copyright (c) 1995 Carnegie-Mellon University. 33 * All rights reserved. 34 * 35 * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II, 36 * Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka 37 * 38 * Permission to use, copy, modify and distribute this software and 39 * its documentation is hereby granted, provided that both the copyright 40 * notice and this permission notice appear in all copies of the 41 * software, derivative works or modified versions, and any portions 42 * thereof, and that both notices appear in supporting documentation. 43 * 44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 46 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 47 * 48 * Carnegie Mellon requests users of this software to return to 49 * 50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 51 * School of Computer Science 52 * Carnegie Mellon University 53 * Pittsburgh PA 15213-3890 54 * 55 * any improvements or extensions that they make and grant Carnegie the 56 * rights to redistribute these changes. 57 */ 58 59/****************************************************************************** 60 * 61 * rf_driver.c -- main setup, teardown, and access routines for the RAID driver 62 * 63 * all routines are prefixed with rf_ (raidframe), to avoid conficts. 64 * 65 ******************************************************************************/ 66 67 68#include <sys/cdefs.h> 69__KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.129 2011/05/27 22:48:24 yamt Exp $"); 70 71#ifdef _KERNEL_OPT 72#include "opt_raid_diagnostic.h" 73#endif 74 75#include <sys/param.h> 76#include <sys/systm.h> 77#include <sys/ioctl.h> 78#include <sys/fcntl.h> 79#include <sys/vnode.h> 80 81 82#include "rf_archs.h" 83#include "rf_threadstuff.h" 84 85#include <sys/errno.h> 86 87#include "rf_raid.h" 88#include "rf_dag.h" 89#include "rf_aselect.h" 90#include "rf_diskqueue.h" 91#include "rf_parityscan.h" 92#include "rf_alloclist.h" 93#include "rf_dagutils.h" 94#include "rf_utils.h" 95#include "rf_etimer.h" 96#include "rf_acctrace.h" 97#include "rf_general.h" 98#include "rf_desc.h" 99#include "rf_states.h" 100#include "rf_decluster.h" 101#include "rf_map.h" 102#include "rf_revent.h" 103#include "rf_callback.h" 104#include "rf_engine.h" 105#include "rf_mcpair.h" 106#include "rf_nwayxor.h" 107#include "rf_copyback.h" 108#include "rf_driver.h" 109#include "rf_options.h" 110#include "rf_shutdown.h" 111#include "rf_kintf.h" 112#include "rf_paritymap.h" 113 114#include <sys/buf.h> 115 116#ifndef RF_ACCESS_DEBUG 117#define RF_ACCESS_DEBUG 0 118#endif 119 120/* rad == RF_RaidAccessDesc_t */ 121#define RF_MAX_FREE_RAD 128 122#define RF_MIN_FREE_RAD 32 123 124/* debug variables */ 125char rf_panicbuf[2048]; /* a buffer to hold an error msg when we panic */ 126 127/* main configuration routines */ 128static int raidframe_booted = 0; 129 130static void rf_ConfigureDebug(RF_Config_t * cfgPtr); 131static void set_debug_option(char *name, long val); 132static void rf_UnconfigureArray(void); 133static void rf_ShutdownRDFreeList(void *); 134static int rf_ConfigureRDFreeList(RF_ShutdownList_t **); 135 136rf_declare_mutex2(rf_printf_mutex); /* debug only: avoids interleaved 137 * printfs by different stripes */ 138 139#define SIGNAL_QUIESCENT_COND(_raid_) \ 140 rf_broadcast_cond2((_raid_)->access_suspend_cv) 141#define WAIT_FOR_QUIESCENCE(_raid_) \ 142 rf_wait_cond2((_raid_)->access_suspend_cv, \ 143 (_raid_)->access_suspend_mutex) 144 145static int configureCount = 0; /* number of active configurations */ 146static int isconfigged = 0; /* is basic raidframe (non per-array) 147 * stuff configured */ 148static rf_declare_mutex2(configureMutex); /* used to lock the configuration 149 * stuff */ 150static RF_ShutdownList_t *globalShutdown; /* non array-specific 151 * stuff */ 152 153static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp); 154static int rf_AllocEmergBuffers(RF_Raid_t *); 155static void rf_FreeEmergBuffers(RF_Raid_t *); 156 157/* called at system boot time */ 158int 159rf_BootRaidframe(void) 160{ 161 162 if (raidframe_booted) 163 return (EBUSY); 164 raidframe_booted = 1; 165 rf_init_mutex2(configureMutex, IPL_NONE); 166 configureCount = 0; 167 isconfigged = 0; 168 globalShutdown = NULL; 169 return (0); 170} 171 172/* 173 * Called whenever an array is shutdown 174 */ 175static void 176rf_UnconfigureArray(void) 177{ 178 179 rf_lock_mutex2(configureMutex); 180 if (--configureCount == 0) { /* if no active configurations, shut 181 * everything down */ 182 rf_destroy_mutex2(rf_printf_mutex); 183 isconfigged = 0; 184 rf_ShutdownList(&globalShutdown); 185 186 /* 187 * We must wait until now, because the AllocList module 188 * uses the DebugMem module. 189 */ 190#if RF_DEBUG_MEM 191 if (rf_memDebug) 192 rf_print_unfreed(); 193#endif 194 } 195 rf_unlock_mutex2(configureMutex); 196} 197 198/* 199 * Called to shut down an array. 200 */ 201int 202rf_Shutdown(RF_Raid_t *raidPtr) 203{ 204 205 if (!raidPtr->valid) { 206 RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver. Aborting shutdown\n"); 207 return (EINVAL); 208 } 209 /* 210 * wait for outstanding IOs to land 211 * As described in rf_raid.h, we use the rad_freelist lock 212 * to protect the per-array info about outstanding descs 213 * since we need to do freelist locking anyway, and this 214 * cuts down on the amount of serialization we've got going 215 * on. 216 */ 217 rf_lock_mutex2(raidPtr->rad_lock); 218 if (raidPtr->waitShutdown) { 219 rf_unlock_mutex2(raidPtr->rad_lock); 220 return (EBUSY); 221 } 222 raidPtr->waitShutdown = 1; 223 while (raidPtr->nAccOutstanding) { 224 rf_wait_cond2(raidPtr->outstandingCond, raidPtr->rad_lock); 225 } 226 rf_unlock_mutex2(raidPtr->rad_lock); 227 228 /* Wait for any parity re-writes to stop... */ 229 while (raidPtr->parity_rewrite_in_progress) { 230 printf("raid%d: Waiting for parity re-write to exit...\n", 231 raidPtr->raidid); 232 tsleep(&raidPtr->parity_rewrite_in_progress, PRIBIO, 233 "rfprwshutdown", 0); 234 } 235 236 /* Wait for any reconstruction to stop... */ 237 rf_lock_mutex2(raidPtr->mutex); 238 while (raidPtr->reconInProgress) { 239 printf("raid%d: Waiting for reconstruction to stop...\n", 240 raidPtr->raidid); 241 rf_wait_cond2(raidPtr->waitForReconCond, raidPtr->mutex); 242 } 243 rf_unlock_mutex2(raidPtr->mutex); 244 245 raidPtr->valid = 0; 246 247 if (raidPtr->parity_map != NULL) 248 rf_paritymap_detach(raidPtr); 249 250 rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE); 251 252 rf_UnconfigureVnodes(raidPtr); 253 254 rf_FreeEmergBuffers(raidPtr); 255 256 rf_ShutdownList(&raidPtr->shutdownList); 257 258 rf_destroy_cond2(raidPtr->waitForReconCond); 259 rf_destroy_cond2(raidPtr->adding_hot_spare_cv); 260 261 rf_destroy_mutex2(raidPtr->access_suspend_mutex); 262 rf_destroy_cond2(raidPtr->access_suspend_cv); 263 264 rf_destroy_cond2(raidPtr->outstandingCond); 265 rf_destroy_mutex2(raidPtr->rad_lock); 266 267 rf_destroy_mutex2(raidPtr->mutex); 268 269 rf_UnconfigureArray(); 270 271 return (0); 272} 273 274 275#define DO_INIT_CONFIGURE(f) { \ 276 rc = f (&globalShutdown); \ 277 if (rc) { \ 278 RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ 279 rf_ShutdownList(&globalShutdown); \ 280 configureCount--; \ 281 rf_unlock_mutex2(configureMutex); \ 282 rf_destroy_mutex2(rf_printf_mutex); \ 283 return(rc); \ 284 } \ 285} 286 287#define DO_RAID_FAIL() { \ 288 rf_UnconfigureVnodes(raidPtr); \ 289 rf_FreeEmergBuffers(raidPtr); \ 290 rf_ShutdownList(&raidPtr->shutdownList); \ 291 rf_UnconfigureArray(); \ 292} 293 294#define DO_RAID_INIT_CONFIGURE(f) { \ 295 rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \ 296 if (rc) { \ 297 RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \ 298 DO_RAID_FAIL(); \ 299 return(rc); \ 300 } \ 301} 302 303int 304rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac) 305{ 306 RF_RowCol_t col; 307 int rc; 308 309 rf_lock_mutex2(configureMutex); 310 configureCount++; 311 if (isconfigged == 0) { 312 rf_init_mutex2(rf_printf_mutex, IPL_VM); 313 314 /* initialize globals */ 315 316 DO_INIT_CONFIGURE(rf_ConfigureAllocList); 317 318 /* 319 * Yes, this does make debugging general to the whole 320 * system instead of being array specific. Bummer, drag. 321 */ 322 rf_ConfigureDebug(cfgPtr); 323 DO_INIT_CONFIGURE(rf_ConfigureDebugMem); 324#if RF_ACC_TRACE > 0 325 DO_INIT_CONFIGURE(rf_ConfigureAccessTrace); 326#endif 327 DO_INIT_CONFIGURE(rf_ConfigureMapModule); 328 DO_INIT_CONFIGURE(rf_ConfigureReconEvent); 329 DO_INIT_CONFIGURE(rf_ConfigureCallback); 330 DO_INIT_CONFIGURE(rf_ConfigureRDFreeList); 331 DO_INIT_CONFIGURE(rf_ConfigureNWayXor); 332 DO_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList); 333 DO_INIT_CONFIGURE(rf_ConfigureMCPair); 334 DO_INIT_CONFIGURE(rf_ConfigureDAGs); 335 DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs); 336 DO_INIT_CONFIGURE(rf_ConfigureReconstruction); 337 DO_INIT_CONFIGURE(rf_ConfigureCopyback); 338 DO_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem); 339 DO_INIT_CONFIGURE(rf_ConfigurePSStatus); 340 isconfigged = 1; 341 } 342 rf_unlock_mutex2(configureMutex); 343 344 rf_init_mutex2(raidPtr->mutex, IPL_VM); 345 /* set up the cleanup list. Do this after ConfigureDebug so that 346 * value of memDebug will be set */ 347 348 rf_MakeAllocList(raidPtr->cleanupList); 349 if (raidPtr->cleanupList == NULL) { 350 DO_RAID_FAIL(); 351 return (ENOMEM); 352 } 353 rf_ShutdownCreate(&raidPtr->shutdownList, 354 (void (*) (void *)) rf_FreeAllocList, 355 raidPtr->cleanupList); 356 357 raidPtr->numCol = cfgPtr->numCol; 358 raidPtr->numSpare = cfgPtr->numSpare; 359 360 raidPtr->status = rf_rs_optimal; 361 raidPtr->reconControl = NULL; 362 363 DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine); 364 DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks); 365 366 rf_init_cond2(raidPtr->outstandingCond, "rfocond"); 367 rf_init_mutex2(raidPtr->rad_lock, IPL_VM); 368 369 raidPtr->nAccOutstanding = 0; 370 raidPtr->waitShutdown = 0; 371 372 rf_init_mutex2(raidPtr->access_suspend_mutex, IPL_VM); 373 rf_init_cond2(raidPtr->access_suspend_cv, "rfquiesce"); 374 375 rf_init_cond2(raidPtr->waitForReconCond, "rfrcnw"); 376 377 if (ac!=NULL) { 378 /* We have an AutoConfig structure.. Don't do the 379 normal disk configuration... call the auto config 380 stuff */ 381 rf_AutoConfigureDisks(raidPtr, cfgPtr, ac); 382 } else { 383 DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks); 384 DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks); 385 } 386 /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev 387 * no. is set */ 388 DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues); 389 390 DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout); 391 392 /* Initialize per-RAID PSS bits */ 393 rf_InitPSStatus(raidPtr); 394 395#if RF_INCLUDE_CHAINDECLUSTER > 0 396 for (col = 0; col < raidPtr->numCol; col++) { 397 /* 398 * XXX better distribution 399 */ 400 raidPtr->hist_diskreq[col] = 0; 401 } 402#endif 403 raidPtr->numNewFailures = 0; 404 raidPtr->copyback_in_progress = 0; 405 raidPtr->parity_rewrite_in_progress = 0; 406 raidPtr->adding_hot_spare = 0; 407 raidPtr->recon_in_progress = 0; 408 409 rf_init_cond2(raidPtr->adding_hot_spare_cv, "raidhs"); 410 411 raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs; 412 413 /* autoconfigure and root_partition will actually get filled in 414 after the config is done */ 415 raidPtr->autoconfigure = 0; 416 raidPtr->root_partition = 0; 417 raidPtr->last_unit = raidPtr->raidid; 418 raidPtr->config_order = 0; 419 420 if (rf_keepAccTotals) { 421 raidPtr->keep_acc_totals = 1; 422 } 423 424 /* Allocate a bunch of buffers to be used in low-memory conditions */ 425 raidPtr->iobuf = NULL; 426 427 rc = rf_AllocEmergBuffers(raidPtr); 428 if (rc) { 429 printf("raid%d: Unable to allocate emergency buffers.\n", 430 raidPtr->raidid); 431 DO_RAID_FAIL(); 432 return(rc); 433 } 434 435 /* Set up parity map stuff, if applicable. */ 436#ifndef RF_NO_PARITY_MAP 437 rf_paritymap_attach(raidPtr, cfgPtr->force); 438#endif 439 440 raidPtr->valid = 1; 441 442 printf("raid%d: %s\n", raidPtr->raidid, 443 raidPtr->Layout.map->configName); 444 printf("raid%d: Components:", raidPtr->raidid); 445 446 for (col = 0; col < raidPtr->numCol; col++) { 447 printf(" %s", raidPtr->Disks[col].devname); 448 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) { 449 printf("[**FAILED**]"); 450 } 451 } 452 printf("\n"); 453 printf("raid%d: Total Sectors: %" PRIu64 " (%" PRIu64 " MB)\n", 454 raidPtr->raidid, 455 raidPtr->totalSectors, 456 (raidPtr->totalSectors / 1024 * 457 (1 << raidPtr->logBytesPerSector) / 1024)); 458 459 return (0); 460} 461 462 463/* 464 465 Routines to allocate and free the "emergency buffers" for a given 466 RAID set. These emergency buffers will be used when the kernel runs 467 out of kernel memory. 468 469 */ 470 471static int 472rf_AllocEmergBuffers(RF_Raid_t *raidPtr) 473{ 474 void *tmpbuf; 475 RF_VoidPointerListElem_t *vple; 476 int i; 477 478 /* XXX next line needs tuning... */ 479 raidPtr->numEmergencyBuffers = 10 * raidPtr->numCol; 480#if DEBUG 481 printf("raid%d: allocating %d buffers of %d bytes.\n", 482 raidPtr->raidid, 483 raidPtr->numEmergencyBuffers, 484 (int)(raidPtr->Layout.sectorsPerStripeUnit << 485 raidPtr->logBytesPerSector)); 486#endif 487 for (i = 0; i < raidPtr->numEmergencyBuffers; i++) { 488 tmpbuf = malloc( raidPtr->Layout.sectorsPerStripeUnit << 489 raidPtr->logBytesPerSector, 490 M_RAIDFRAME, M_WAITOK); 491 if (tmpbuf) { 492 vple = rf_AllocVPListElem(); 493 vple->p= tmpbuf; 494 vple->next = raidPtr->iobuf; 495 raidPtr->iobuf = vple; 496 raidPtr->iobuf_count++; 497 } else { 498 printf("raid%d: failed to allocate emergency buffer!\n", 499 raidPtr->raidid); 500 return 1; 501 } 502 } 503 504 /* XXX next line needs tuning too... */ 505 raidPtr->numEmergencyStripeBuffers = 10; 506 for (i = 0; i < raidPtr->numEmergencyStripeBuffers; i++) { 507 tmpbuf = malloc( raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit << 508 raidPtr->logBytesPerSector), 509 M_RAIDFRAME, M_WAITOK); 510 if (tmpbuf) { 511 vple = rf_AllocVPListElem(); 512 vple->p= tmpbuf; 513 vple->next = raidPtr->stripebuf; 514 raidPtr->stripebuf = vple; 515 raidPtr->stripebuf_count++; 516 } else { 517 printf("raid%d: failed to allocate emergency stripe buffer!\n", 518 raidPtr->raidid); 519 return 1; 520 } 521 } 522 523 return (0); 524} 525 526static void 527rf_FreeEmergBuffers(RF_Raid_t *raidPtr) 528{ 529 RF_VoidPointerListElem_t *tmp; 530 531 /* Free the emergency IO buffers */ 532 while (raidPtr->iobuf != NULL) { 533 tmp = raidPtr->iobuf; 534 raidPtr->iobuf = raidPtr->iobuf->next; 535 free(tmp->p, M_RAIDFRAME); 536 rf_FreeVPListElem(tmp); 537 } 538 539 /* Free the emergency stripe buffers */ 540 while (raidPtr->stripebuf != NULL) { 541 tmp = raidPtr->stripebuf; 542 raidPtr->stripebuf = raidPtr->stripebuf->next; 543 free(tmp->p, M_RAIDFRAME); 544 rf_FreeVPListElem(tmp); 545 } 546} 547 548 549static void 550rf_ShutdownRDFreeList(void *ignored) 551{ 552 pool_destroy(&rf_pools.rad); 553} 554 555static int 556rf_ConfigureRDFreeList(RF_ShutdownList_t **listp) 557{ 558 559 rf_pool_init(&rf_pools.rad, sizeof(RF_RaidAccessDesc_t), 560 "rf_rad_pl", RF_MIN_FREE_RAD, RF_MAX_FREE_RAD); 561 rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, NULL); 562 return (0); 563} 564 565RF_RaidAccessDesc_t * 566rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type, 567 RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, 568 void *bufPtr, void *bp, RF_RaidAccessFlags_t flags, 569 const RF_AccessState_t *states) 570{ 571 RF_RaidAccessDesc_t *desc; 572 573 desc = pool_get(&rf_pools.rad, PR_WAITOK); 574 575 rf_lock_mutex2(raidPtr->rad_lock); 576 if (raidPtr->waitShutdown) { 577 /* 578 * Actually, we're shutting the array down. Free the desc 579 * and return NULL. 580 */ 581 582 rf_unlock_mutex2(raidPtr->rad_lock); 583 pool_put(&rf_pools.rad, desc); 584 return (NULL); 585 } 586 raidPtr->nAccOutstanding++; 587 588 rf_unlock_mutex2(raidPtr->rad_lock); 589 590 desc->raidPtr = (void *) raidPtr; 591 desc->type = type; 592 desc->raidAddress = raidAddress; 593 desc->numBlocks = numBlocks; 594 desc->bufPtr = bufPtr; 595 desc->bp = bp; 596 desc->flags = flags; 597 desc->states = states; 598 desc->state = 0; 599 desc->dagList = NULL; 600 601 desc->status = 0; 602 desc->numRetries = 0; 603#if RF_ACC_TRACE > 0 604 memset((char *) &desc->tracerec, 0, sizeof(RF_AccTraceEntry_t)); 605#endif 606 desc->callbackFunc = NULL; 607 desc->callbackArg = NULL; 608 desc->next = NULL; 609 desc->iobufs = NULL; 610 desc->stripebufs = NULL; 611 612 return (desc); 613} 614 615void 616rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc) 617{ 618 RF_Raid_t *raidPtr = desc->raidPtr; 619 RF_DagList_t *dagList, *temp; 620 RF_VoidPointerListElem_t *tmp; 621 622 RF_ASSERT(desc); 623 624 /* Cleanup the dagList(s) */ 625 dagList = desc->dagList; 626 while(dagList != NULL) { 627 temp = dagList; 628 dagList = dagList->next; 629 rf_FreeDAGList(temp); 630 } 631 632 while (desc->iobufs) { 633 tmp = desc->iobufs; 634 desc->iobufs = desc->iobufs->next; 635 rf_FreeIOBuffer(raidPtr, tmp); 636 } 637 638 while (desc->stripebufs) { 639 tmp = desc->stripebufs; 640 desc->stripebufs = desc->stripebufs->next; 641 rf_FreeStripeBuffer(raidPtr, tmp); 642 } 643 644 pool_put(&rf_pools.rad, desc); 645 rf_lock_mutex2(raidPtr->rad_lock); 646 raidPtr->nAccOutstanding--; 647 if (raidPtr->waitShutdown) { 648 rf_signal_cond2(raidPtr->outstandingCond); 649 } 650 rf_unlock_mutex2(raidPtr->rad_lock); 651} 652/********************************************************************* 653 * Main routine for performing an access. 654 * Accesses are retried until a DAG can not be selected. This occurs 655 * when either the DAG library is incomplete or there are too many 656 * failures in a parity group. 657 * 658 * type should be read or write async_flag should be RF_TRUE or 659 * RF_FALSE bp_in is a buf pointer. void *to facilitate ignoring it 660 * outside the kernel 661 ********************************************************************/ 662int 663rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, int async_flag, 664 RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks, 665 void *bufPtr, struct buf *bp, RF_RaidAccessFlags_t flags) 666{ 667 RF_RaidAccessDesc_t *desc; 668 void *lbufPtr = bufPtr; 669 670 raidAddress += rf_raidSectorOffset; 671 672#if RF_ACCESS_DEBUG 673 if (rf_accessDebug) { 674 675 printf("logBytes is: %d %d %d\n", raidPtr->raidid, 676 raidPtr->logBytesPerSector, 677 (int) rf_RaidAddressToByte(raidPtr, numBlocks)); 678 printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid, 679 (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress, 680 (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress), 681 (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1), 682 (int) numBlocks, 683 (int) rf_RaidAddressToByte(raidPtr, numBlocks), 684 (long) bufPtr); 685 } 686#endif 687 688 desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress, 689 numBlocks, lbufPtr, bp, flags, raidPtr->Layout.map->states); 690 691 if (desc == NULL) { 692 return (ENOMEM); 693 } 694#if RF_ACC_TRACE > 0 695 RF_ETIMER_START(desc->tracerec.tot_timer); 696#endif 697 desc->async_flag = async_flag; 698 699 if (raidPtr->parity_map != NULL && 700 type == RF_IO_TYPE_WRITE) 701 rf_paritymap_begin(raidPtr->parity_map, raidAddress, 702 numBlocks); 703 704 rf_ContinueRaidAccess(desc); 705 706 return (0); 707} 708#if 0 709/* force the array into reconfigured mode without doing reconstruction */ 710int 711rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col) 712{ 713 if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) { 714 printf("Can't set reconfigured mode in dedicated-spare array\n"); 715 RF_PANIC(); 716 } 717 rf_lock_mutex2(raidPtr->mutex); 718 raidPtr->numFailures++; 719 raidPtr->Disks[col].status = rf_ds_dist_spared; 720 raidPtr->status = rf_rs_reconfigured; 721 rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); 722 /* install spare table only if declustering + distributed sparing 723 * architecture. */ 724 if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED) 725 rf_InstallSpareTable(raidPtr, col); 726 rf_unlock_mutex2(raidPtr->mutex); 727 return (0); 728} 729#endif 730 731int 732rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon) 733{ 734 735 /* need to suspend IO's here -- if there are DAGs in flight 736 and we pull the rug out from under ci_vp, Bad Things 737 can happen. */ 738 739 rf_SuspendNewRequestsAndWait(raidPtr); 740 741 rf_lock_mutex2(raidPtr->mutex); 742 if (raidPtr->Disks[fcol].status != rf_ds_failed) { 743 /* must be failing something that is valid, or else it's 744 already marked as failed (in which case we don't 745 want to mark it failed again!) */ 746 raidPtr->numFailures++; 747 raidPtr->Disks[fcol].status = rf_ds_failed; 748 raidPtr->status = rf_rs_degraded; 749 } 750 rf_unlock_mutex2(raidPtr->mutex); 751 752 rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE); 753 754 /* Close the component, so that it's not "locked" if someone 755 else want's to use it! */ 756 757 rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp, 758 raidPtr->Disks[fcol].auto_configured); 759 760 rf_lock_mutex2(raidPtr->mutex); 761 raidPtr->raid_cinfo[fcol].ci_vp = NULL; 762 763 /* Need to mark the component as not being auto_configured 764 (in case it was previously). */ 765 766 raidPtr->Disks[fcol].auto_configured = 0; 767 rf_unlock_mutex2(raidPtr->mutex); 768 /* now we can allow IO to continue -- we'll be suspending it 769 again in rf_ReconstructFailedDisk() if we have to.. */ 770 771 rf_ResumeNewRequests(raidPtr); 772 773 if (initRecon) 774 rf_ReconstructFailedDisk(raidPtr, fcol); 775 return (0); 776} 777/* releases a thread that is waiting for the array to become quiesced. 778 * access_suspend_mutex should be locked upon calling this 779 */ 780void 781rf_SignalQuiescenceLock(RF_Raid_t *raidPtr) 782{ 783#if RF_DEBUG_QUIESCE 784 if (rf_quiesceDebug) { 785 printf("raid%d: Signalling quiescence lock\n", 786 raidPtr->raidid); 787 } 788#endif 789 raidPtr->access_suspend_release = 1; 790 791 if (raidPtr->waiting_for_quiescence) { 792 SIGNAL_QUIESCENT_COND(raidPtr); 793 } 794} 795/* suspends all new requests to the array. No effect on accesses that are in flight. */ 796int 797rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr) 798{ 799#if RF_DEBUG_QUIESCE 800 if (rf_quiesceDebug) 801 printf("raid%d: Suspending new reqs\n", raidPtr->raidid); 802#endif 803 rf_lock_mutex2(raidPtr->access_suspend_mutex); 804 raidPtr->accesses_suspended++; 805 raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1; 806 807 if (raidPtr->waiting_for_quiescence) { 808 raidPtr->access_suspend_release = 0; 809 while (!raidPtr->access_suspend_release) { 810#if RF_DEBUG_QUIESCE 811 printf("raid%d: Suspending: Waiting for Quiescence\n", 812 raidPtr->raidid); 813#endif 814 WAIT_FOR_QUIESCENCE(raidPtr); 815 raidPtr->waiting_for_quiescence = 0; 816 } 817 } 818#if RF_DEBUG_QUIESCE 819 printf("raid%d: Quiescence reached..\n", raidPtr->raidid); 820#endif 821 822 rf_unlock_mutex2(raidPtr->access_suspend_mutex); 823 return (raidPtr->waiting_for_quiescence); 824} 825/* wake up everyone waiting for quiescence to be released */ 826void 827rf_ResumeNewRequests(RF_Raid_t *raidPtr) 828{ 829 RF_CallbackDesc_t *t, *cb; 830 831#if RF_DEBUG_QUIESCE 832 if (rf_quiesceDebug) 833 printf("raid%d: Resuming new requests\n", raidPtr->raidid); 834#endif 835 836 rf_lock_mutex2(raidPtr->access_suspend_mutex); 837 raidPtr->accesses_suspended--; 838 if (raidPtr->accesses_suspended == 0) 839 cb = raidPtr->quiesce_wait_list; 840 else 841 cb = NULL; 842 raidPtr->quiesce_wait_list = NULL; 843 rf_unlock_mutex2(raidPtr->access_suspend_mutex); 844 845 while (cb) { 846 t = cb; 847 cb = cb->next; 848 (t->callbackFunc) (t->callbackArg); 849 rf_FreeCallbackDesc(t); 850 } 851} 852/***************************************************************************************** 853 * 854 * debug routines 855 * 856 ****************************************************************************************/ 857 858static void 859set_debug_option(char *name, long val) 860{ 861 RF_DebugName_t *p; 862 863 for (p = rf_debugNames; p->name; p++) { 864 if (!strcmp(p->name, name)) { 865 *(p->ptr) = val; 866 printf("[Set debug variable %s to %ld]\n", name, val); 867 return; 868 } 869 } 870 RF_ERRORMSG1("Unknown debug string \"%s\"\n", name); 871} 872 873 874/* would like to use sscanf here, but apparently not available in kernel */ 875/*ARGSUSED*/ 876static void 877rf_ConfigureDebug(RF_Config_t *cfgPtr) 878{ 879 char *val_p, *name_p, *white_p; 880 long val; 881 int i; 882 883 rf_ResetDebugOptions(); 884 for (i = 0; i < RF_MAXDBGV && cfgPtr->debugVars[i][0]; i++) { 885 name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]); 886 white_p = rf_find_white(name_p); /* skip to start of 2nd 887 * word */ 888 val_p = rf_find_non_white(white_p); 889 if (*val_p == '0' && *(val_p + 1) == 'x') 890 val = rf_htoi(val_p + 2); 891 else 892 val = rf_atoi(val_p); 893 *white_p = '\0'; 894 set_debug_option(name_p, val); 895 } 896} 897 898void 899rf_print_panic_message(int line, const char *file) 900{ 901 snprintf(rf_panicbuf, sizeof(rf_panicbuf), 902 "raidframe error at line %d file %s", line, file); 903} 904 905#ifdef RAID_DIAGNOSTIC 906void 907rf_print_assert_panic_message(int line, const char *file, const char *condition) 908{ 909 snprintf(rf_panicbuf, sizeof(rf_panicbuf), 910 "raidframe error at line %d file %s (failed asserting %s)\n", 911 line, file, condition); 912} 913#endif 914 915void 916rf_print_unable_to_init_mutex(const char *file, int line, int rc) 917{ 918 RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", 919 file, line, rc); 920} 921 922void 923rf_print_unable_to_add_shutdown(const char *file, int line, int rc) 924{ 925 RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n", 926 file, line, rc); 927} 928